From 667c78dc229e9fb227776bc8f7985a1ffb7cb01f Mon Sep 17 00:00:00 2001
From: WuXianChaoPin <1029559041@qq.com>
Date: Mon, 30 Apr 2018 23:53:55 +0800
Subject: [PATCH] =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91=E3=80=81=E5=BC=B9?=
=?UTF-8?q?=E5=B9=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
PixivSearch/aliyun/photo/AliyunPhoto.py | 180 ++++++++++++------------
PixivSearch/dao/Comment.py | 132 +++++++++++++++++
PixivSearch/dao/bangumi.py | 85 ++---------
PixivSearch/logging.conf | 2 +-
4 files changed, 237 insertions(+), 162 deletions(-)
create mode 100644 PixivSearch/dao/Comment.py
diff --git a/PixivSearch/aliyun/photo/AliyunPhoto.py b/PixivSearch/aliyun/photo/AliyunPhoto.py
index 69fb51e..615ffad 100644
--- a/PixivSearch/aliyun/photo/AliyunPhoto.py
+++ b/PixivSearch/aliyun/photo/AliyunPhoto.py
@@ -1,4 +1,3 @@
-import datetime
import hashlib
import json
import os
@@ -6,23 +5,27 @@ import time
import aliyunsdkcore
import oss2 as oss2
-from aliyunsdkcore.client import AcsClient, DEFAULT_SDK_CONNECTION_TIMEOUT_IN_SECONDS
+from aliyunsdkcloudphoto.request.v20170711 import ListPhotoStoresRequest, FetchLibrariesRequest, \
+ CreateTransactionRequest, CreatePhotoRequest, EditPhotosRequest, GetPublicAccessUrlsRequest, ListPhotosRequest, \
+ DeletePhotosRequest, InactivatePhotosRequest
+from aliyunsdkcore.auth.credentials import RamRoleArnCredential
+from aliyunsdkcore.client import AcsClient
+from aliyunsdkcore.profile import region_provider
from aliyunsdkcore.request import CommonRequest
+region_id = 'cn-shanghai'
+ak = 'LTAIWzPnmkJs2qpL'
+secret = 'LIIq3HumctXPp0WT8c06yDiFbKKiVe'
+region_provider.add_endpoint('Oss', region_id, 'oss-cn-shanghai.aliyuncs.com')
+region_provider.add_endpoint('CloudPhoto', region_id, 'cloudphoto.cn-shanghai.aliyuncs.com')
+aliyunsdkcore.request.set_default_protocol_type("https")
-class UploadPhoto(AcsClient):
- def __init__(self, ak='LTAIeS8aBuPBZxV2', secret='hyPeTaDQBQs6jetYcqY0BUdpacXTH3', region_id="cn-hongkong",
- auto_retry=True, max_retry_time=3,
- user_agent=None, port=80, timeout=DEFAULT_SDK_CONNECTION_TIMEOUT_IN_SECONDS, public_key_id=None,
- private_key=None, session_period=3600, credential=None, debug=False):
- super().__init__(ak, secret, region_id, auto_retry, max_retry_time, user_agent, port, timeout, public_key_id,
- private_key, session_period, credential, debug)
- aliyunsdkcore.request.set_default_protocol_type("https")
- self.domain = 'cloudphoto.cn-shanghai.aliyuncs.com'
- self.version = '2017-07-11'
- self.LibraryId = None
- self.StoreName = None
+class MyClient(AcsClient):
+
+ def __init__(self, arn, sessionName):
+ super().__init__(region_id=region_id, credential=RamRoleArnCredential(ak, secret, arn, sessionName))
+ self.StoreName=None
def get_md5_01(self, file_path):
md5 = None
@@ -35,19 +38,6 @@ class UploadPhoto(AcsClient):
md5 = str(hash_code).lower()
return md5
- def assumeRole(self):
- request = CommonRequest(domain='sts.aliyuncs.com', version='2015-04-01',
- action_name='AssumeRole')
- RoleSessionName = 'pqh'
- request.add_query_param('RoleArn', 'acs:ram::1098806312754985:role/aliyuncloudphotodefaultrole')
- request.add_query_param('RoleSessionName', RoleSessionName)
- response = json.loads(self.do_action_with_exception(request).decode())
- print(response['Credentials']['SecurityToken'])
-
- def createAction(self, action):
- return CommonRequest(domain=self.domain, version=self.version,
- action_name=action)
-
def do_action_with_exception(self, acs_request):
return json.loads(super().do_action_with_exception(acs_request).decode())
@@ -58,43 +48,30 @@ class UploadPhoto(AcsClient):
print('{index}:{name}'.format(index=index + 1, name=option[key]))
return choose[(int)(input('输入{desc}\n'.format(desc=desc))) - 1]
- def getSL(self):
- request = self.createAction('ListPhotoStores')
+ def listPhotoStores(self):
+ request = ListPhotoStoresRequest.ListPhotoStoresRequest()
response = self.do_action_with_exception(request)
print('PhotoStores:')
photoStores = response['PhotoStores']
self.StoreName = self.showOption(photoStores, 'Name', 'StoreName')
- # request = self.createAction('GetPhotoStore')
- # request.add_query_param('StoreName', StoreName)
- # response = self.do_action_with_exception(request)
- # print(response)
-
- request = self.createAction('FetchLibraries')
- request.add_query_param('StoreName', self.StoreName)
+ def listLibraries(self):
+ request = FetchLibrariesRequest.FetchLibrariesRequest()
+ request.set_StoreName(self.StoreName)
response = self.do_action_with_exception(request)
- Libraries = response['Libraries']
- self.LibraryId = self.showOption(Libraries, 'LibraryId', 'LibraryId')
-
- # request = self.createAction('GetLibrary')
- # request.add_body_params('StoreName', StoreName)
- # request.add_body_params('LibraryId', LibraryId)
- # response = self.do_action_with_exception(request)
- # print(response)
+ self.Libraries = response['Libraries']
def uploadPhoto(self):
- if self.StoreName is None or self.LibraryId is None:
- return '未选择StoreName、LibraryId'
-
- request = self.createAction('CreateTransaction')
- filePath = input('输入上传文件路径')
+ if self.StoreName is None:
+ self.listPhotoStores()
+ request = CreateTransactionRequest.CreateTransactionRequest()
+ filePath = input('输入上传文件路径\n').replace('\\', '/')
+ filePath = 'C:/Users/47418/Desktop/照片/IMG_20170218_212837.jpg'
fileName = filePath.split('/')[-1]
- request.add_query_param('Size', os.path.getsize(filePath))
- request.add_query_param('Ext', fileName[-fileName[::-1].index('.'):])
- request.add_query_param('Md5', self.get_md5_01(filePath))
- request.add_query_param('StoreName', self.StoreName)
-
- request.add_query_param('LibraryId', self.LibraryId)
+ request.set_Size(os.path.getsize(filePath))
+ request.set_Ext(fileName[-fileName[::-1].index('.'):])
+ request.set_Md5(self.get_md5_01(filePath))
+ request.set_StoreName(self.StoreName)
response = self.do_action_with_exception(request)
print(response)
Upload = response['Transaction']['Upload']
@@ -104,48 +81,71 @@ class UploadPhoto(AcsClient):
OssEndpoint = Upload['OssEndpoint']
ObjectKey = Upload['ObjectKey']
- auth = oss2.Auth(self.get_access_key(), self.get_access_secret())
+ auth = oss2.StsAuth(self._signer._session_credential[0], self._signer._session_credential[1],
+ self._signer._session_credential[2])
bucket = oss2.Bucket(auth, OssEndpoint, Bucket)
with open(filePath, 'rb') as fileobj:
result = bucket.put_object(ObjectKey, fileobj)
print('文件上传状态{status}'.format(status=result.status))
- request = self.createAction('CreatePhoto')
- request.add_query_param('FileId', FileId)
- request.add_query_param('PhotoTitle', fileName)
- request.add_query_param('SessionId', SessionId)
- request.add_query_param('StoreName', self.StoreName)
- request.add_query_param('UploadType', 'manual')
- request.add_query_param('LibraryId', self.LibraryId)
+ request = CreatePhotoRequest.CreatePhotoRequest()
+ request.set_FileId(FileId)
+ request.set_PhotoTitle(fileName)
+ request.set_SessionId(SessionId)
+ request.set_StoreName(self.StoreName)
+ request.set_UploadType('manual')
+ response = self.do_action_with_exception(request)
+ print(response)
+
+ def listPhotos(self):
+ if self.StoreName == None:
+ self.listPhotoStores()
+ request = ListPhotosRequest.ListPhotosRequest()
+ request.set_StoreName(self.StoreName)
+ request.set_State('all')
response = self.do_action_with_exception(request)
print(response)
+ return response['Photos']
+
+ def getPhoto(self):
+ return self.showOption(self.listPhotos()['Photos'], 'IdStr', '照片Id')
+
+ def sharePhoto(self):
+ IdStr = self.listPhotos()
+ request = EditPhotosRequest.EditPhotosRequest()
+ request.set_PhotoIds([IdStr])
+ request.set_StoreName(self.StoreName)
+ request.set_ShareExpireTime((int(round(time.time())) + 60 * 60) * 1000)
+ response = self.do_action_with_exception(request)
+ print(response)
+
+ request = GetPublicAccessUrlsRequest.GetPublicAccessUrlsRequest()
+ request.set_DomainType('OSS')
+ request.set_PhotoIds([IdStr])
+ request.set_StoreName(self.StoreName)
+ request.set_ZoomType('style/2')
+ response = self.do_action_with_exception(request)
+ print(response)
+
+def client(arn, sessionName):
+ ram_role_arn_credential = RamRoleArnCredential('LTAIWzPnmkJs2qpL', 'LIIq3HumctXPp0WT8c06yDiFbKKiVe',
+ arn, sessionName)
+ return AcsClient(region_id='cn-shanghai', credential=ram_role_arn_credential)
if __name__ == '__main__':
- client = UploadPhoto()
- client.getSL()
- request = client.createAction('ListPhotos')
- request.add_query_param('StoreName', client.StoreName)
- request.add_query_param('LibraryId', client.LibraryId)
- request.add_query_param('State', 'all')
- response = client.do_action_with_exception(request)
- print(response)
-
- IdStr = client.showOption(response['Photos'], 'IdStr', '照片Id')
-
- request = client.createAction('EditPhotos')
- request.add_query_param('PhotoId.1', IdStr)
- request.add_query_param('StoreName', client.StoreName)
- request.add_query_param('LibraryId', client.LibraryId)
- request.add_query_param('ShareExpireTime', int(round(time.time() * 1000)) + 60 * 60)
- response = client.do_action_with_exception(request)
- print(response)
-
- request = client.createAction('GetPublicAccessUrls')
- request.add_query_param('DomainType', 'OSS')
- request.add_query_param('PhotoId.1', IdStr)
- request.add_query_param('StoreName', client.StoreName)
- request.add_query_param('ZoomType', 'style/1')
- request.add_query_param('LibraryId', client.LibraryId)
- response = client.do_action_with_exception(request)
- print(response)
\ No newline at end of file
+
+ myClient = MyClient('acs:ram::1098806312754985:role/aliyunosstokengeneratorrole', 'pqh001')
+ myClient.listPhotoStores()
+ request = FetchLibrariesRequest.FetchLibrariesRequest()
+ request.set_StoreName(myClient.StoreName)
+ myClient.listLibraries()
+ storeName = myClient.StoreName
+ for Library in myClient.Libraries:
+ myClient = MyClient('acs:ram::1098806312754985:role/aliyunosstokengeneratorrole', Library['LibraryId'])
+ for id in myClient.listPhotos():
+ request=InactivatePhotosRequest.InactivatePhotosRequest()
+ request.set_StoreName(storeName)
+ request.set_PhotoIds([id['IdStr']])
+ response=myClient.do_action_with_exception(request)
+ print(response)
diff --git a/PixivSearch/dao/Comment.py b/PixivSearch/dao/Comment.py
new file mode 100644
index 0000000..ad893f4
--- /dev/null
+++ b/PixivSearch/dao/Comment.py
@@ -0,0 +1,132 @@
+import json
+import threading
+from concurrent import futures
+
+import requests
+import xlrd
+from lxml import etree
+
+from PixivSearch.dao.bangumi import episodeIdToCid
+
+
+class Comment:
+ lock = threading.Lock() # 多线程全局资源锁
+
+ def __init__(self, keywords_=None) -> None:
+ super().__init__()
+ self.obj = {'data': {}, 'flag': False}
+ self.keywords = keywords_
+
+ # 获取番剧合集弹幕排行榜
+ def getCommentSort(self, cids):
+
+ urls = []
+ for cid in cids:
+ urls.extend(getCidUrls(cid))
+ with futures.ThreadPoolExecutor(32) as executor:
+ executor.map(self.count, urls)
+ for index, data in enumerate(
+ sorted(self.obj["data"].items(), key=lambda d: d[1], reverse=True)[
+ :50]):
+ print('{index}:{data}'.format(index=index+1, data=data))
+
+ # 获取番剧合集弹幕排行榜
+ def count(self, url, desc=None):
+ bytes = requests.get(url).content
+ comment_selector = etree.HTML(bytes)
+ if not desc is None:
+ print(desc)
+ print("url=%s" % url)
+ for comment in comment_selector.xpath('//i//d/text()'):
+ if comment in self.obj["data"]:
+ with self.lock:
+ self.obj["data"][comment] = self.obj["data"][comment] + 1
+ else:
+ with self.lock:
+ self.obj["data"][comment] = 1
+ if not self.obj["flag"]:
+ for keyword in self.keywords:
+ if keyword in comment:
+ self.obj["flag"] = True
+
+
+# 根据cid获取历史弹幕地址
+def getCidUrls(cid):
+ urls = []
+ url = "https://comment.bilibili.com/rolldate,%d" % cid
+ req = requests.get(url)
+ if len(req.text) > 0:
+ for i in json.loads(req.text):
+ urls.append("https://comment.bilibili.com/dmroll,%s,%d" % (i['timestamp'], cid))
+ else:
+ urls.append("https://comment.bilibili.com/%d.xml" % cid)
+ return urls
+
+
+# 下载历史弹幕
+def parseXmlByHistory(path, cid,size=None):
+ dlist = set()
+ flag=parseXml(getCidUrls(cid), dlist, size)
+ if size is None or not size is None and flag:
+ f = open(path, 'wb')
+ f.write(b'')
+ for i in dlist:
+ f.write(('\r\n' + i).encode())
+ f.write(b'\r\n')
+ f.close()
+
+
+def parseXml(urls, dlist, size=None):
+ if isinstance(urls, str):
+ urls = [urls]
+ if not size is None:
+ size = float(size.strip('%')) / 100.0
+ for url in urls:
+ bytes = requests.get(url).content
+ comment_selector = etree.HTML(bytes)
+ list = comment_selector.xpath('//i//d/text()')
+ maxlimit = int(comment_selector.xpath('//i//maxlimit/text()')[0])
+
+ if len(list) > 0:
+ print('弹幕数:{list},最大弹幕数:{maxlimit},弹幕池填充:{p}'.format(list=len(list), maxlimit=maxlimit,
+ p='%.2f%%' % (len(list) / maxlimit * 100)))
+ for element in comment_selector.xpath('//i//d'):
+ if len(element.xpath("text()")) > 0:
+ fstr = '{content}'.format(p=str(element.xpath("@p")[0]),
+ content=str(element.xpath("text()")[0]))
+ dlist.add(fstr)
+
+ currentSize = len(dlist) / maxlimit
+ print('填充率:{l}'.format(l='%.2f%%' % (currentSize * 100)))
+ if not size is None and currentSize >= size:
+ return True
+ return False
+
+if __name__ == '__main__':
+ # parseXmlByHistory('10815558.xml', 10815558)
+ # Comment('').getCommentSort(episodeIdToCid(172095))
+ cids=[11664778,
+11662541,
+11661412,
+11664304,
+11666093,
+
+]
+
+ for cid in cids:
+ parseXmlByHistory('{cid}.xml'.format(cid=cid),cid)
+
+ # path = 'D:/QQ/1029559041/FileRecv/tmp001.xlsx'
+ # ExcelFile = xlrd.open_workbook(path)
+ #
+ #
+ # for sheetName in ExcelFile.sheet_names():
+ # sheet = ExcelFile.sheet_by_name(sheetName)
+ # for row in sheet.get_rows():
+ # try:
+ # row[5].value.index('日剧')
+ # cid = int(row[2].value)
+ # print('aid={aid},cid={cid},title={title}'.format(aid=int(row[1].value),cid=cid,title=row[5].value))
+ # parseXmlByHistory('{cid}.xml'.format(cid=cid),cid,'50%')
+ # except BaseException as e:
+ # repr(e)
diff --git a/PixivSearch/dao/bangumi.py b/PixivSearch/dao/bangumi.py
index 7fc523b..6b02cce 100644
--- a/PixivSearch/dao/bangumi.py
+++ b/PixivSearch/dao/bangumi.py
@@ -92,7 +92,7 @@ def listen():
_thread.start_new_thread(listen, ())
-
+#遍历所有专题视频收藏数信息
def getIds():
seasonIdList = []
page = 1
@@ -140,22 +140,12 @@ def getIds():
logger.error(repr(e))
continue
-
-# def testA():
-# req = requests.post('https://api.bilibili.com/x/report/web/heartbeat',
-# data={"aid": 29416,"cid":49052,"csrf": "c0d296db7e33085f9f4730cfee66660b"},
-# cookies=_cookies)
-# print(req.status_code)
-
-_cookies = {'DedeUserID': '4372744', 'DedeUserID__ckMd5': 'e8179b74444cae8e',
- 'SESSDATA': '919b17d2%2C1524917631%2C3eede719'}
-
-
-def getCid(aid, type=True):
+#根据aid获取cid
+def getCid(aid, type=None):
while True and aid > 0:
url = "https://api.bilibili.com/x/web-interface/archive/stat?aid=%d" % aid
print(url)
- req = requests.get(url, cookies=_cookies)
+ req = requests.get(url)
code = json.loads(req.text)["code"]
if code == 0:
req = requests.get("https://www.bilibili.com/video/av%d" % aid)
@@ -166,17 +156,20 @@ def getCid(aid, type=True):
cid = json_obj['videoData']['pages'][0]['cid']
print('cid=%s' % cid)
return cid
- if type:
- aid = aid - 1
+ if type is None:
+ break
else:
- aid = aid + 1
-
+ if type:
+ aid = aid - 1
+ else:
+ aid = aid + 1
+#根据aid获取cid
def getCids(aid):
s = {"min": getCid(aid, True), "max": getCid(aid, False)}
return s
-
+#获取专题所有cid
def episodeIdToCid(episode_id):
cids = []
url = "https://www.bilibili.com/bangumi/play/ep%d" % episode_id
@@ -189,59 +182,9 @@ def episodeIdToCid(episode_id):
return cids
-def parseXml(url):
- print("url=%s" % url)
- comment_selector = etree.HTML(requests.get(url).content)
- comment_content = comment_selector.xpath('//i')
- for comment_each in comment_content:
- comments = comment_each.xpath('//d/text()')
- if comments:
- for comment in comments:
- if comment in obj["data"]:
- with lock:
- obj["data"][comment] = obj["data"][comment] + 1
- else:
- with lock:
- obj["data"][comment] = 1
- if not obj["flag"]:
- for keyword in keywords:
- if keyword in comment:
- obj["flag"] = True
-
-
-lock = threading.Lock() # 多线程全局资源锁
-
-
-def loadData(cids):
- params = []
- for cid in cids:
- url = "https://comment.bilibili.com/rolldate,%d" % cid
- req = requests.get(url)
- urls = ["https://comment.bilibili.com/%d.xml" % cid]
- if len(req.text) > 0:
- for i in json.loads(req.text):
- urls.append("https://comment.bilibili.com/dmroll,%s,%d" % (i['timestamp'], cid))
- for url in urls:
- params.append(url)
- with futures.ThreadPoolExecutor(32) as executor:
- executor.map(parseXml, params)
- return obj
-
-
-def getCommentSort(cids, keywords_):
- global keywords, obj
- keywords = keywords_
- obj = {"data": {}, "flag": False}
- return loadData(cids)
-
if __name__ == '__main__':
- # print(getCids(29416))
- # obj = loadData(
- # [49052, 49053, 51525, 51526, 53407, 54180, 55295, 55296, 57255, 57256, 59288, 59289, 61559, 61560, 64034, 64035,
- # 67024, 67025, 69284, 73333, 73334, 74024, 74025], ['穹'])
- f = getCommentSort(episodeIdToCid(172095), [])
+ print(getCids(29416))
# obj = loadData([34807341], [])
- for i in sorted(f["data"].items(), key=lambda d: d[1], reverse=True)[:50]:
- print(i)
+
diff --git a/PixivSearch/logging.conf b/PixivSearch/logging.conf
index dc3325b..82c59e0 100644
--- a/PixivSearch/logging.conf
+++ b/PixivSearch/logging.conf
@@ -18,7 +18,7 @@ handlers=fileHandler
[handler_consoleHandler]
class=StreamHandler
-level=DEBUG
+level=INFO
formatter=fmt
args=(sys.stdout,)