From 667c78dc229e9fb227776bc8f7985a1ffb7cb01f Mon Sep 17 00:00:00 2001 From: WuXianChaoPin <1029559041@qq.com> Date: Mon, 30 Apr 2018 23:53:55 +0800 Subject: [PATCH] =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91=E3=80=81=E5=BC=B9?= =?UTF-8?q?=E5=B9=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PixivSearch/aliyun/photo/AliyunPhoto.py | 180 ++++++++++++------------ PixivSearch/dao/Comment.py | 132 +++++++++++++++++ PixivSearch/dao/bangumi.py | 85 ++--------- PixivSearch/logging.conf | 2 +- 4 files changed, 237 insertions(+), 162 deletions(-) create mode 100644 PixivSearch/dao/Comment.py diff --git a/PixivSearch/aliyun/photo/AliyunPhoto.py b/PixivSearch/aliyun/photo/AliyunPhoto.py index 69fb51e..615ffad 100644 --- a/PixivSearch/aliyun/photo/AliyunPhoto.py +++ b/PixivSearch/aliyun/photo/AliyunPhoto.py @@ -1,4 +1,3 @@ -import datetime import hashlib import json import os @@ -6,23 +5,27 @@ import time import aliyunsdkcore import oss2 as oss2 -from aliyunsdkcore.client import AcsClient, DEFAULT_SDK_CONNECTION_TIMEOUT_IN_SECONDS +from aliyunsdkcloudphoto.request.v20170711 import ListPhotoStoresRequest, FetchLibrariesRequest, \ + CreateTransactionRequest, CreatePhotoRequest, EditPhotosRequest, GetPublicAccessUrlsRequest, ListPhotosRequest, \ + DeletePhotosRequest, InactivatePhotosRequest +from aliyunsdkcore.auth.credentials import RamRoleArnCredential +from aliyunsdkcore.client import AcsClient +from aliyunsdkcore.profile import region_provider from aliyunsdkcore.request import CommonRequest +region_id = 'cn-shanghai' +ak = 'LTAIWzPnmkJs2qpL' +secret = 'LIIq3HumctXPp0WT8c06yDiFbKKiVe' +region_provider.add_endpoint('Oss', region_id, 'oss-cn-shanghai.aliyuncs.com') +region_provider.add_endpoint('CloudPhoto', region_id, 'cloudphoto.cn-shanghai.aliyuncs.com') +aliyunsdkcore.request.set_default_protocol_type("https") -class UploadPhoto(AcsClient): - def __init__(self, ak='LTAIeS8aBuPBZxV2', secret='hyPeTaDQBQs6jetYcqY0BUdpacXTH3', region_id="cn-hongkong", - auto_retry=True, max_retry_time=3, - user_agent=None, port=80, timeout=DEFAULT_SDK_CONNECTION_TIMEOUT_IN_SECONDS, public_key_id=None, - private_key=None, session_period=3600, credential=None, debug=False): - super().__init__(ak, secret, region_id, auto_retry, max_retry_time, user_agent, port, timeout, public_key_id, - private_key, session_period, credential, debug) - aliyunsdkcore.request.set_default_protocol_type("https") - self.domain = 'cloudphoto.cn-shanghai.aliyuncs.com' - self.version = '2017-07-11' - self.LibraryId = None - self.StoreName = None +class MyClient(AcsClient): + + def __init__(self, arn, sessionName): + super().__init__(region_id=region_id, credential=RamRoleArnCredential(ak, secret, arn, sessionName)) + self.StoreName=None def get_md5_01(self, file_path): md5 = None @@ -35,19 +38,6 @@ class UploadPhoto(AcsClient): md5 = str(hash_code).lower() return md5 - def assumeRole(self): - request = CommonRequest(domain='sts.aliyuncs.com', version='2015-04-01', - action_name='AssumeRole') - RoleSessionName = 'pqh' - request.add_query_param('RoleArn', 'acs:ram::1098806312754985:role/aliyuncloudphotodefaultrole') - request.add_query_param('RoleSessionName', RoleSessionName) - response = json.loads(self.do_action_with_exception(request).decode()) - print(response['Credentials']['SecurityToken']) - - def createAction(self, action): - return CommonRequest(domain=self.domain, version=self.version, - action_name=action) - def do_action_with_exception(self, acs_request): return json.loads(super().do_action_with_exception(acs_request).decode()) @@ -58,43 +48,30 @@ class UploadPhoto(AcsClient): print('{index}:{name}'.format(index=index + 1, name=option[key])) return choose[(int)(input('输入{desc}\n'.format(desc=desc))) - 1] - def getSL(self): - request = self.createAction('ListPhotoStores') + def listPhotoStores(self): + request = ListPhotoStoresRequest.ListPhotoStoresRequest() response = self.do_action_with_exception(request) print('PhotoStores:') photoStores = response['PhotoStores'] self.StoreName = self.showOption(photoStores, 'Name', 'StoreName') - # request = self.createAction('GetPhotoStore') - # request.add_query_param('StoreName', StoreName) - # response = self.do_action_with_exception(request) - # print(response) - - request = self.createAction('FetchLibraries') - request.add_query_param('StoreName', self.StoreName) + def listLibraries(self): + request = FetchLibrariesRequest.FetchLibrariesRequest() + request.set_StoreName(self.StoreName) response = self.do_action_with_exception(request) - Libraries = response['Libraries'] - self.LibraryId = self.showOption(Libraries, 'LibraryId', 'LibraryId') - - # request = self.createAction('GetLibrary') - # request.add_body_params('StoreName', StoreName) - # request.add_body_params('LibraryId', LibraryId) - # response = self.do_action_with_exception(request) - # print(response) + self.Libraries = response['Libraries'] def uploadPhoto(self): - if self.StoreName is None or self.LibraryId is None: - return '未选择StoreName、LibraryId' - - request = self.createAction('CreateTransaction') - filePath = input('输入上传文件路径') + if self.StoreName is None: + self.listPhotoStores() + request = CreateTransactionRequest.CreateTransactionRequest() + filePath = input('输入上传文件路径\n').replace('\\', '/') + filePath = 'C:/Users/47418/Desktop/照片/IMG_20170218_212837.jpg' fileName = filePath.split('/')[-1] - request.add_query_param('Size', os.path.getsize(filePath)) - request.add_query_param('Ext', fileName[-fileName[::-1].index('.'):]) - request.add_query_param('Md5', self.get_md5_01(filePath)) - request.add_query_param('StoreName', self.StoreName) - - request.add_query_param('LibraryId', self.LibraryId) + request.set_Size(os.path.getsize(filePath)) + request.set_Ext(fileName[-fileName[::-1].index('.'):]) + request.set_Md5(self.get_md5_01(filePath)) + request.set_StoreName(self.StoreName) response = self.do_action_with_exception(request) print(response) Upload = response['Transaction']['Upload'] @@ -104,48 +81,71 @@ class UploadPhoto(AcsClient): OssEndpoint = Upload['OssEndpoint'] ObjectKey = Upload['ObjectKey'] - auth = oss2.Auth(self.get_access_key(), self.get_access_secret()) + auth = oss2.StsAuth(self._signer._session_credential[0], self._signer._session_credential[1], + self._signer._session_credential[2]) bucket = oss2.Bucket(auth, OssEndpoint, Bucket) with open(filePath, 'rb') as fileobj: result = bucket.put_object(ObjectKey, fileobj) print('文件上传状态{status}'.format(status=result.status)) - request = self.createAction('CreatePhoto') - request.add_query_param('FileId', FileId) - request.add_query_param('PhotoTitle', fileName) - request.add_query_param('SessionId', SessionId) - request.add_query_param('StoreName', self.StoreName) - request.add_query_param('UploadType', 'manual') - request.add_query_param('LibraryId', self.LibraryId) + request = CreatePhotoRequest.CreatePhotoRequest() + request.set_FileId(FileId) + request.set_PhotoTitle(fileName) + request.set_SessionId(SessionId) + request.set_StoreName(self.StoreName) + request.set_UploadType('manual') + response = self.do_action_with_exception(request) + print(response) + + def listPhotos(self): + if self.StoreName == None: + self.listPhotoStores() + request = ListPhotosRequest.ListPhotosRequest() + request.set_StoreName(self.StoreName) + request.set_State('all') response = self.do_action_with_exception(request) print(response) + return response['Photos'] + + def getPhoto(self): + return self.showOption(self.listPhotos()['Photos'], 'IdStr', '照片Id') + + def sharePhoto(self): + IdStr = self.listPhotos() + request = EditPhotosRequest.EditPhotosRequest() + request.set_PhotoIds([IdStr]) + request.set_StoreName(self.StoreName) + request.set_ShareExpireTime((int(round(time.time())) + 60 * 60) * 1000) + response = self.do_action_with_exception(request) + print(response) + + request = GetPublicAccessUrlsRequest.GetPublicAccessUrlsRequest() + request.set_DomainType('OSS') + request.set_PhotoIds([IdStr]) + request.set_StoreName(self.StoreName) + request.set_ZoomType('style/2') + response = self.do_action_with_exception(request) + print(response) + +def client(arn, sessionName): + ram_role_arn_credential = RamRoleArnCredential('LTAIWzPnmkJs2qpL', 'LIIq3HumctXPp0WT8c06yDiFbKKiVe', + arn, sessionName) + return AcsClient(region_id='cn-shanghai', credential=ram_role_arn_credential) if __name__ == '__main__': - client = UploadPhoto() - client.getSL() - request = client.createAction('ListPhotos') - request.add_query_param('StoreName', client.StoreName) - request.add_query_param('LibraryId', client.LibraryId) - request.add_query_param('State', 'all') - response = client.do_action_with_exception(request) - print(response) - - IdStr = client.showOption(response['Photos'], 'IdStr', '照片Id') - - request = client.createAction('EditPhotos') - request.add_query_param('PhotoId.1', IdStr) - request.add_query_param('StoreName', client.StoreName) - request.add_query_param('LibraryId', client.LibraryId) - request.add_query_param('ShareExpireTime', int(round(time.time() * 1000)) + 60 * 60) - response = client.do_action_with_exception(request) - print(response) - - request = client.createAction('GetPublicAccessUrls') - request.add_query_param('DomainType', 'OSS') - request.add_query_param('PhotoId.1', IdStr) - request.add_query_param('StoreName', client.StoreName) - request.add_query_param('ZoomType', 'style/1') - request.add_query_param('LibraryId', client.LibraryId) - response = client.do_action_with_exception(request) - print(response) \ No newline at end of file + + myClient = MyClient('acs:ram::1098806312754985:role/aliyunosstokengeneratorrole', 'pqh001') + myClient.listPhotoStores() + request = FetchLibrariesRequest.FetchLibrariesRequest() + request.set_StoreName(myClient.StoreName) + myClient.listLibraries() + storeName = myClient.StoreName + for Library in myClient.Libraries: + myClient = MyClient('acs:ram::1098806312754985:role/aliyunosstokengeneratorrole', Library['LibraryId']) + for id in myClient.listPhotos(): + request=InactivatePhotosRequest.InactivatePhotosRequest() + request.set_StoreName(storeName) + request.set_PhotoIds([id['IdStr']]) + response=myClient.do_action_with_exception(request) + print(response) diff --git a/PixivSearch/dao/Comment.py b/PixivSearch/dao/Comment.py new file mode 100644 index 0000000..ad893f4 --- /dev/null +++ b/PixivSearch/dao/Comment.py @@ -0,0 +1,132 @@ +import json +import threading +from concurrent import futures + +import requests +import xlrd +from lxml import etree + +from PixivSearch.dao.bangumi import episodeIdToCid + + +class Comment: + lock = threading.Lock() # 多线程全局资源锁 + + def __init__(self, keywords_=None) -> None: + super().__init__() + self.obj = {'data': {}, 'flag': False} + self.keywords = keywords_ + + # 获取番剧合集弹幕排行榜 + def getCommentSort(self, cids): + + urls = [] + for cid in cids: + urls.extend(getCidUrls(cid)) + with futures.ThreadPoolExecutor(32) as executor: + executor.map(self.count, urls) + for index, data in enumerate( + sorted(self.obj["data"].items(), key=lambda d: d[1], reverse=True)[ + :50]): + print('{index}:{data}'.format(index=index+1, data=data)) + + # 获取番剧合集弹幕排行榜 + def count(self, url, desc=None): + bytes = requests.get(url).content + comment_selector = etree.HTML(bytes) + if not desc is None: + print(desc) + print("url=%s" % url) + for comment in comment_selector.xpath('//i//d/text()'): + if comment in self.obj["data"]: + with self.lock: + self.obj["data"][comment] = self.obj["data"][comment] + 1 + else: + with self.lock: + self.obj["data"][comment] = 1 + if not self.obj["flag"]: + for keyword in self.keywords: + if keyword in comment: + self.obj["flag"] = True + + +# 根据cid获取历史弹幕地址 +def getCidUrls(cid): + urls = [] + url = "https://comment.bilibili.com/rolldate,%d" % cid + req = requests.get(url) + if len(req.text) > 0: + for i in json.loads(req.text): + urls.append("https://comment.bilibili.com/dmroll,%s,%d" % (i['timestamp'], cid)) + else: + urls.append("https://comment.bilibili.com/%d.xml" % cid) + return urls + + +# 下载历史弹幕 +def parseXmlByHistory(path, cid,size=None): + dlist = set() + flag=parseXml(getCidUrls(cid), dlist, size) + if size is None or not size is None and flag: + f = open(path, 'wb') + f.write(b'') + for i in dlist: + f.write(('\r\n' + i).encode()) + f.write(b'\r\n') + f.close() + + +def parseXml(urls, dlist, size=None): + if isinstance(urls, str): + urls = [urls] + if not size is None: + size = float(size.strip('%')) / 100.0 + for url in urls: + bytes = requests.get(url).content + comment_selector = etree.HTML(bytes) + list = comment_selector.xpath('//i//d/text()') + maxlimit = int(comment_selector.xpath('//i//maxlimit/text()')[0]) + + if len(list) > 0: + print('弹幕数:{list},最大弹幕数:{maxlimit},弹幕池填充:{p}'.format(list=len(list), maxlimit=maxlimit, + p='%.2f%%' % (len(list) / maxlimit * 100))) + for element in comment_selector.xpath('//i//d'): + if len(element.xpath("text()")) > 0: + fstr = '{content}'.format(p=str(element.xpath("@p")[0]), + content=str(element.xpath("text()")[0])) + dlist.add(fstr) + + currentSize = len(dlist) / maxlimit + print('填充率:{l}'.format(l='%.2f%%' % (currentSize * 100))) + if not size is None and currentSize >= size: + return True + return False + +if __name__ == '__main__': + # parseXmlByHistory('10815558.xml', 10815558) + # Comment('').getCommentSort(episodeIdToCid(172095)) + cids=[11664778, +11662541, +11661412, +11664304, +11666093, + +] + + for cid in cids: + parseXmlByHistory('{cid}.xml'.format(cid=cid),cid) + + # path = 'D:/QQ/1029559041/FileRecv/tmp001.xlsx' + # ExcelFile = xlrd.open_workbook(path) + # + # + # for sheetName in ExcelFile.sheet_names(): + # sheet = ExcelFile.sheet_by_name(sheetName) + # for row in sheet.get_rows(): + # try: + # row[5].value.index('日剧') + # cid = int(row[2].value) + # print('aid={aid},cid={cid},title={title}'.format(aid=int(row[1].value),cid=cid,title=row[5].value)) + # parseXmlByHistory('{cid}.xml'.format(cid=cid),cid,'50%') + # except BaseException as e: + # repr(e) diff --git a/PixivSearch/dao/bangumi.py b/PixivSearch/dao/bangumi.py index 7fc523b..6b02cce 100644 --- a/PixivSearch/dao/bangumi.py +++ b/PixivSearch/dao/bangumi.py @@ -92,7 +92,7 @@ def listen(): _thread.start_new_thread(listen, ()) - +#遍历所有专题视频收藏数信息 def getIds(): seasonIdList = [] page = 1 @@ -140,22 +140,12 @@ def getIds(): logger.error(repr(e)) continue - -# def testA(): -# req = requests.post('https://api.bilibili.com/x/report/web/heartbeat', -# data={"aid": 29416,"cid":49052,"csrf": "c0d296db7e33085f9f4730cfee66660b"}, -# cookies=_cookies) -# print(req.status_code) - -_cookies = {'DedeUserID': '4372744', 'DedeUserID__ckMd5': 'e8179b74444cae8e', - 'SESSDATA': '919b17d2%2C1524917631%2C3eede719'} - - -def getCid(aid, type=True): +#根据aid获取cid +def getCid(aid, type=None): while True and aid > 0: url = "https://api.bilibili.com/x/web-interface/archive/stat?aid=%d" % aid print(url) - req = requests.get(url, cookies=_cookies) + req = requests.get(url) code = json.loads(req.text)["code"] if code == 0: req = requests.get("https://www.bilibili.com/video/av%d" % aid) @@ -166,17 +156,20 @@ def getCid(aid, type=True): cid = json_obj['videoData']['pages'][0]['cid'] print('cid=%s' % cid) return cid - if type: - aid = aid - 1 + if type is None: + break else: - aid = aid + 1 - + if type: + aid = aid - 1 + else: + aid = aid + 1 +#根据aid获取cid def getCids(aid): s = {"min": getCid(aid, True), "max": getCid(aid, False)} return s - +#获取专题所有cid def episodeIdToCid(episode_id): cids = [] url = "https://www.bilibili.com/bangumi/play/ep%d" % episode_id @@ -189,59 +182,9 @@ def episodeIdToCid(episode_id): return cids -def parseXml(url): - print("url=%s" % url) - comment_selector = etree.HTML(requests.get(url).content) - comment_content = comment_selector.xpath('//i') - for comment_each in comment_content: - comments = comment_each.xpath('//d/text()') - if comments: - for comment in comments: - if comment in obj["data"]: - with lock: - obj["data"][comment] = obj["data"][comment] + 1 - else: - with lock: - obj["data"][comment] = 1 - if not obj["flag"]: - for keyword in keywords: - if keyword in comment: - obj["flag"] = True - - -lock = threading.Lock() # 多线程全局资源锁 - - -def loadData(cids): - params = [] - for cid in cids: - url = "https://comment.bilibili.com/rolldate,%d" % cid - req = requests.get(url) - urls = ["https://comment.bilibili.com/%d.xml" % cid] - if len(req.text) > 0: - for i in json.loads(req.text): - urls.append("https://comment.bilibili.com/dmroll,%s,%d" % (i['timestamp'], cid)) - for url in urls: - params.append(url) - with futures.ThreadPoolExecutor(32) as executor: - executor.map(parseXml, params) - return obj - - -def getCommentSort(cids, keywords_): - global keywords, obj - keywords = keywords_ - obj = {"data": {}, "flag": False} - return loadData(cids) - if __name__ == '__main__': - # print(getCids(29416)) - # obj = loadData( - # [49052, 49053, 51525, 51526, 53407, 54180, 55295, 55296, 57255, 57256, 59288, 59289, 61559, 61560, 64034, 64035, - # 67024, 67025, 69284, 73333, 73334, 74024, 74025], ['穹']) - f = getCommentSort(episodeIdToCid(172095), []) + print(getCids(29416)) # obj = loadData([34807341], []) - for i in sorted(f["data"].items(), key=lambda d: d[1], reverse=True)[:50]: - print(i) + diff --git a/PixivSearch/logging.conf b/PixivSearch/logging.conf index dc3325b..82c59e0 100644 --- a/PixivSearch/logging.conf +++ b/PixivSearch/logging.conf @@ -18,7 +18,7 @@ handlers=fileHandler [handler_consoleHandler] class=StreamHandler -level=DEBUG +level=INFO formatter=fmt args=(sys.stdout,)