阿里云、弹幕

master
WuXianChaoPin 7 years ago
parent 4dbbb2aac2
commit 667c78dc22
  1. 180
      PixivSearch/aliyun/photo/AliyunPhoto.py
  2. 132
      PixivSearch/dao/Comment.py
  3. 85
      PixivSearch/dao/bangumi.py
  4. 2
      PixivSearch/logging.conf

@ -1,4 +1,3 @@
import datetime
import hashlib import hashlib
import json import json
import os import os
@ -6,23 +5,27 @@ import time
import aliyunsdkcore import aliyunsdkcore
import oss2 as oss2 import oss2 as oss2
from aliyunsdkcore.client import AcsClient, DEFAULT_SDK_CONNECTION_TIMEOUT_IN_SECONDS from aliyunsdkcloudphoto.request.v20170711 import ListPhotoStoresRequest, FetchLibrariesRequest, \
CreateTransactionRequest, CreatePhotoRequest, EditPhotosRequest, GetPublicAccessUrlsRequest, ListPhotosRequest, \
DeletePhotosRequest, InactivatePhotosRequest
from aliyunsdkcore.auth.credentials import RamRoleArnCredential
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.profile import region_provider
from aliyunsdkcore.request import CommonRequest from aliyunsdkcore.request import CommonRequest
region_id = 'cn-shanghai'
ak = 'LTAIWzPnmkJs2qpL'
secret = 'LIIq3HumctXPp0WT8c06yDiFbKKiVe'
region_provider.add_endpoint('Oss', region_id, 'oss-cn-shanghai.aliyuncs.com')
region_provider.add_endpoint('CloudPhoto', region_id, 'cloudphoto.cn-shanghai.aliyuncs.com')
aliyunsdkcore.request.set_default_protocol_type("https")
class UploadPhoto(AcsClient):
def __init__(self, ak='LTAIeS8aBuPBZxV2', secret='hyPeTaDQBQs6jetYcqY0BUdpacXTH3', region_id="cn-hongkong", class MyClient(AcsClient):
auto_retry=True, max_retry_time=3,
user_agent=None, port=80, timeout=DEFAULT_SDK_CONNECTION_TIMEOUT_IN_SECONDS, public_key_id=None, def __init__(self, arn, sessionName):
private_key=None, session_period=3600, credential=None, debug=False): super().__init__(region_id=region_id, credential=RamRoleArnCredential(ak, secret, arn, sessionName))
super().__init__(ak, secret, region_id, auto_retry, max_retry_time, user_agent, port, timeout, public_key_id, self.StoreName=None
private_key, session_period, credential, debug)
aliyunsdkcore.request.set_default_protocol_type("https")
self.domain = 'cloudphoto.cn-shanghai.aliyuncs.com'
self.version = '2017-07-11'
self.LibraryId = None
self.StoreName = None
def get_md5_01(self, file_path): def get_md5_01(self, file_path):
md5 = None md5 = None
@ -35,19 +38,6 @@ class UploadPhoto(AcsClient):
md5 = str(hash_code).lower() md5 = str(hash_code).lower()
return md5 return md5
def assumeRole(self):
request = CommonRequest(domain='sts.aliyuncs.com', version='2015-04-01',
action_name='AssumeRole')
RoleSessionName = 'pqh'
request.add_query_param('RoleArn', 'acs:ram::1098806312754985:role/aliyuncloudphotodefaultrole')
request.add_query_param('RoleSessionName', RoleSessionName)
response = json.loads(self.do_action_with_exception(request).decode())
print(response['Credentials']['SecurityToken'])
def createAction(self, action):
return CommonRequest(domain=self.domain, version=self.version,
action_name=action)
def do_action_with_exception(self, acs_request): def do_action_with_exception(self, acs_request):
return json.loads(super().do_action_with_exception(acs_request).decode()) return json.loads(super().do_action_with_exception(acs_request).decode())
@ -58,43 +48,30 @@ class UploadPhoto(AcsClient):
print('{index}:{name}'.format(index=index + 1, name=option[key])) print('{index}:{name}'.format(index=index + 1, name=option[key]))
return choose[(int)(input('输入{desc}\n'.format(desc=desc))) - 1] return choose[(int)(input('输入{desc}\n'.format(desc=desc))) - 1]
def getSL(self): def listPhotoStores(self):
request = self.createAction('ListPhotoStores') request = ListPhotoStoresRequest.ListPhotoStoresRequest()
response = self.do_action_with_exception(request) response = self.do_action_with_exception(request)
print('PhotoStores:') print('PhotoStores:')
photoStores = response['PhotoStores'] photoStores = response['PhotoStores']
self.StoreName = self.showOption(photoStores, 'Name', 'StoreName') self.StoreName = self.showOption(photoStores, 'Name', 'StoreName')
# request = self.createAction('GetPhotoStore') def listLibraries(self):
# request.add_query_param('StoreName', StoreName) request = FetchLibrariesRequest.FetchLibrariesRequest()
# response = self.do_action_with_exception(request) request.set_StoreName(self.StoreName)
# print(response)
request = self.createAction('FetchLibraries')
request.add_query_param('StoreName', self.StoreName)
response = self.do_action_with_exception(request) response = self.do_action_with_exception(request)
Libraries = response['Libraries'] self.Libraries = response['Libraries']
self.LibraryId = self.showOption(Libraries, 'LibraryId', 'LibraryId')
# request = self.createAction('GetLibrary')
# request.add_body_params('StoreName', StoreName)
# request.add_body_params('LibraryId', LibraryId)
# response = self.do_action_with_exception(request)
# print(response)
def uploadPhoto(self): def uploadPhoto(self):
if self.StoreName is None or self.LibraryId is None: if self.StoreName is None:
return '未选择StoreName、LibraryId' self.listPhotoStores()
request = CreateTransactionRequest.CreateTransactionRequest()
request = self.createAction('CreateTransaction') filePath = input('输入上传文件路径\n').replace('\\', '/')
filePath = input('输入上传文件路径') filePath = 'C:/Users/47418/Desktop/照片/IMG_20170218_212837.jpg'
fileName = filePath.split('/')[-1] fileName = filePath.split('/')[-1]
request.add_query_param('Size', os.path.getsize(filePath)) request.set_Size(os.path.getsize(filePath))
request.add_query_param('Ext', fileName[-fileName[::-1].index('.'):]) request.set_Ext(fileName[-fileName[::-1].index('.'):])
request.add_query_param('Md5', self.get_md5_01(filePath)) request.set_Md5(self.get_md5_01(filePath))
request.add_query_param('StoreName', self.StoreName) request.set_StoreName(self.StoreName)
request.add_query_param('LibraryId', self.LibraryId)
response = self.do_action_with_exception(request) response = self.do_action_with_exception(request)
print(response) print(response)
Upload = response['Transaction']['Upload'] Upload = response['Transaction']['Upload']
@ -104,48 +81,71 @@ class UploadPhoto(AcsClient):
OssEndpoint = Upload['OssEndpoint'] OssEndpoint = Upload['OssEndpoint']
ObjectKey = Upload['ObjectKey'] ObjectKey = Upload['ObjectKey']
auth = oss2.Auth(self.get_access_key(), self.get_access_secret()) auth = oss2.StsAuth(self._signer._session_credential[0], self._signer._session_credential[1],
self._signer._session_credential[2])
bucket = oss2.Bucket(auth, OssEndpoint, Bucket) bucket = oss2.Bucket(auth, OssEndpoint, Bucket)
with open(filePath, 'rb') as fileobj: with open(filePath, 'rb') as fileobj:
result = bucket.put_object(ObjectKey, fileobj) result = bucket.put_object(ObjectKey, fileobj)
print('文件上传状态{status}'.format(status=result.status)) print('文件上传状态{status}'.format(status=result.status))
request = self.createAction('CreatePhoto') request = CreatePhotoRequest.CreatePhotoRequest()
request.add_query_param('FileId', FileId) request.set_FileId(FileId)
request.add_query_param('PhotoTitle', fileName) request.set_PhotoTitle(fileName)
request.add_query_param('SessionId', SessionId) request.set_SessionId(SessionId)
request.add_query_param('StoreName', self.StoreName) request.set_StoreName(self.StoreName)
request.add_query_param('UploadType', 'manual') request.set_UploadType('manual')
request.add_query_param('LibraryId', self.LibraryId) response = self.do_action_with_exception(request)
print(response)
def listPhotos(self):
if self.StoreName == None:
self.listPhotoStores()
request = ListPhotosRequest.ListPhotosRequest()
request.set_StoreName(self.StoreName)
request.set_State('all')
response = self.do_action_with_exception(request) response = self.do_action_with_exception(request)
print(response) print(response)
return response['Photos']
def getPhoto(self):
return self.showOption(self.listPhotos()['Photos'], 'IdStr', '照片Id')
def sharePhoto(self):
IdStr = self.listPhotos()
request = EditPhotosRequest.EditPhotosRequest()
request.set_PhotoIds([IdStr])
request.set_StoreName(self.StoreName)
request.set_ShareExpireTime((int(round(time.time())) + 60 * 60) * 1000)
response = self.do_action_with_exception(request)
print(response)
request = GetPublicAccessUrlsRequest.GetPublicAccessUrlsRequest()
request.set_DomainType('OSS')
request.set_PhotoIds([IdStr])
request.set_StoreName(self.StoreName)
request.set_ZoomType('style/2')
response = self.do_action_with_exception(request)
print(response)
def client(arn, sessionName):
ram_role_arn_credential = RamRoleArnCredential('LTAIWzPnmkJs2qpL', 'LIIq3HumctXPp0WT8c06yDiFbKKiVe',
arn, sessionName)
return AcsClient(region_id='cn-shanghai', credential=ram_role_arn_credential)
if __name__ == '__main__': if __name__ == '__main__':
client = UploadPhoto()
client.getSL() myClient = MyClient('acs:ram::1098806312754985:role/aliyunosstokengeneratorrole', 'pqh001')
request = client.createAction('ListPhotos') myClient.listPhotoStores()
request.add_query_param('StoreName', client.StoreName) request = FetchLibrariesRequest.FetchLibrariesRequest()
request.add_query_param('LibraryId', client.LibraryId) request.set_StoreName(myClient.StoreName)
request.add_query_param('State', 'all') myClient.listLibraries()
response = client.do_action_with_exception(request) storeName = myClient.StoreName
print(response) for Library in myClient.Libraries:
myClient = MyClient('acs:ram::1098806312754985:role/aliyunosstokengeneratorrole', Library['LibraryId'])
IdStr = client.showOption(response['Photos'], 'IdStr', '照片Id') for id in myClient.listPhotos():
request=InactivatePhotosRequest.InactivatePhotosRequest()
request = client.createAction('EditPhotos') request.set_StoreName(storeName)
request.add_query_param('PhotoId.1', IdStr) request.set_PhotoIds([id['IdStr']])
request.add_query_param('StoreName', client.StoreName) response=myClient.do_action_with_exception(request)
request.add_query_param('LibraryId', client.LibraryId) print(response)
request.add_query_param('ShareExpireTime', int(round(time.time() * 1000)) + 60 * 60)
response = client.do_action_with_exception(request)
print(response)
request = client.createAction('GetPublicAccessUrls')
request.add_query_param('DomainType', 'OSS')
request.add_query_param('PhotoId.1', IdStr)
request.add_query_param('StoreName', client.StoreName)
request.add_query_param('ZoomType', 'style/1')
request.add_query_param('LibraryId', client.LibraryId)
response = client.do_action_with_exception(request)
print(response)

@ -0,0 +1,132 @@
import json
import threading
from concurrent import futures
import requests
import xlrd
from lxml import etree
from PixivSearch.dao.bangumi import episodeIdToCid
class Comment:
lock = threading.Lock() # 多线程全局资源锁
def __init__(self, keywords_=None) -> None:
super().__init__()
self.obj = {'data': {}, 'flag': False}
self.keywords = keywords_
# 获取番剧合集弹幕排行榜
def getCommentSort(self, cids):
urls = []
for cid in cids:
urls.extend(getCidUrls(cid))
with futures.ThreadPoolExecutor(32) as executor:
executor.map(self.count, urls)
for index, data in enumerate(
sorted(self.obj["data"].items(), key=lambda d: d[1], reverse=True)[
:50]):
print('{index}:{data}'.format(index=index+1, data=data))
# 获取番剧合集弹幕排行榜
def count(self, url, desc=None):
bytes = requests.get(url).content
comment_selector = etree.HTML(bytes)
if not desc is None:
print(desc)
print("url=%s" % url)
for comment in comment_selector.xpath('//i//d/text()'):
if comment in self.obj["data"]:
with self.lock:
self.obj["data"][comment] = self.obj["data"][comment] + 1
else:
with self.lock:
self.obj["data"][comment] = 1
if not self.obj["flag"]:
for keyword in self.keywords:
if keyword in comment:
self.obj["flag"] = True
# 根据cid获取历史弹幕地址
def getCidUrls(cid):
urls = []
url = "https://comment.bilibili.com/rolldate,%d" % cid
req = requests.get(url)
if len(req.text) > 0:
for i in json.loads(req.text):
urls.append("https://comment.bilibili.com/dmroll,%s,%d" % (i['timestamp'], cid))
else:
urls.append("https://comment.bilibili.com/%d.xml" % cid)
return urls
# 下载历史弹幕
def parseXmlByHistory(path, cid,size=None):
dlist = set()
flag=parseXml(getCidUrls(cid), dlist, size)
if size is None or not size is None and flag:
f = open(path, 'wb')
f.write(b'<?xml version="1.0" encoding="UTF-8"?><i>')
for i in dlist:
f.write(('\r\n' + i).encode())
f.write(b'\r\n</i>')
f.close()
def parseXml(urls, dlist, size=None):
if isinstance(urls, str):
urls = [urls]
if not size is None:
size = float(size.strip('%')) / 100.0
for url in urls:
bytes = requests.get(url).content
comment_selector = etree.HTML(bytes)
list = comment_selector.xpath('//i//d/text()')
maxlimit = int(comment_selector.xpath('//i//maxlimit/text()')[0])
if len(list) > 0:
print('弹幕数:{list},最大弹幕数:{maxlimit},弹幕池填充:{p}'.format(list=len(list), maxlimit=maxlimit,
p='%.2f%%' % (len(list) / maxlimit * 100)))
for element in comment_selector.xpath('//i//d'):
if len(element.xpath("text()")) > 0:
fstr = '<d p="{p}">{content}</d>'.format(p=str(element.xpath("@p")[0]),
content=str(element.xpath("text()")[0]))
dlist.add(fstr)
currentSize = len(dlist) / maxlimit
print('填充率:{l}'.format(l='%.2f%%' % (currentSize * 100)))
if not size is None and currentSize >= size:
return True
return False
if __name__ == '__main__':
# parseXmlByHistory('10815558.xml', 10815558)
# Comment('').getCommentSort(episodeIdToCid(172095))
cids=[11664778,
11662541,
11661412,
11664304,
11666093,
]
for cid in cids:
parseXmlByHistory('{cid}.xml'.format(cid=cid),cid)
# path = 'D:/QQ/1029559041/FileRecv/tmp001.xlsx'
# ExcelFile = xlrd.open_workbook(path)
#
#
# for sheetName in ExcelFile.sheet_names():
# sheet = ExcelFile.sheet_by_name(sheetName)
# for row in sheet.get_rows():
# try:
# row[5].value.index('日剧')
# cid = int(row[2].value)
# print('aid={aid},cid={cid},title={title}'.format(aid=int(row[1].value),cid=cid,title=row[5].value))
# parseXmlByHistory('{cid}.xml'.format(cid=cid),cid,'50%')
# except BaseException as e:
# repr(e)

@ -92,7 +92,7 @@ def listen():
_thread.start_new_thread(listen, ()) _thread.start_new_thread(listen, ())
#遍历所有专题视频收藏数信息
def getIds(): def getIds():
seasonIdList = [] seasonIdList = []
page = 1 page = 1
@ -140,22 +140,12 @@ def getIds():
logger.error(repr(e)) logger.error(repr(e))
continue continue
#根据aid获取cid
# def testA(): def getCid(aid, type=None):
# req = requests.post('https://api.bilibili.com/x/report/web/heartbeat',
# data={"aid": 29416,"cid":49052,"csrf": "c0d296db7e33085f9f4730cfee66660b"},
# cookies=_cookies)
# print(req.status_code)
_cookies = {'DedeUserID': '4372744', 'DedeUserID__ckMd5': 'e8179b74444cae8e',
'SESSDATA': '919b17d2%2C1524917631%2C3eede719'}
def getCid(aid, type=True):
while True and aid > 0: while True and aid > 0:
url = "https://api.bilibili.com/x/web-interface/archive/stat?aid=%d" % aid url = "https://api.bilibili.com/x/web-interface/archive/stat?aid=%d" % aid
print(url) print(url)
req = requests.get(url, cookies=_cookies) req = requests.get(url)
code = json.loads(req.text)["code"] code = json.loads(req.text)["code"]
if code == 0: if code == 0:
req = requests.get("https://www.bilibili.com/video/av%d" % aid) req = requests.get("https://www.bilibili.com/video/av%d" % aid)
@ -166,17 +156,20 @@ def getCid(aid, type=True):
cid = json_obj['videoData']['pages'][0]['cid'] cid = json_obj['videoData']['pages'][0]['cid']
print('cid=%s' % cid) print('cid=%s' % cid)
return cid return cid
if type: if type is None:
aid = aid - 1 break
else: else:
aid = aid + 1 if type:
aid = aid - 1
else:
aid = aid + 1
#根据aid获取cid
def getCids(aid): def getCids(aid):
s = {"min": getCid(aid, True), "max": getCid(aid, False)} s = {"min": getCid(aid, True), "max": getCid(aid, False)}
return s return s
#获取专题所有cid
def episodeIdToCid(episode_id): def episodeIdToCid(episode_id):
cids = [] cids = []
url = "https://www.bilibili.com/bangumi/play/ep%d" % episode_id url = "https://www.bilibili.com/bangumi/play/ep%d" % episode_id
@ -189,59 +182,9 @@ def episodeIdToCid(episode_id):
return cids return cids
def parseXml(url):
print("url=%s" % url)
comment_selector = etree.HTML(requests.get(url).content)
comment_content = comment_selector.xpath('//i')
for comment_each in comment_content:
comments = comment_each.xpath('//d/text()')
if comments:
for comment in comments:
if comment in obj["data"]:
with lock:
obj["data"][comment] = obj["data"][comment] + 1
else:
with lock:
obj["data"][comment] = 1
if not obj["flag"]:
for keyword in keywords:
if keyword in comment:
obj["flag"] = True
lock = threading.Lock() # 多线程全局资源锁
def loadData(cids):
params = []
for cid in cids:
url = "https://comment.bilibili.com/rolldate,%d" % cid
req = requests.get(url)
urls = ["https://comment.bilibili.com/%d.xml" % cid]
if len(req.text) > 0:
for i in json.loads(req.text):
urls.append("https://comment.bilibili.com/dmroll,%s,%d" % (i['timestamp'], cid))
for url in urls:
params.append(url)
with futures.ThreadPoolExecutor(32) as executor:
executor.map(parseXml, params)
return obj
def getCommentSort(cids, keywords_):
global keywords, obj
keywords = keywords_
obj = {"data": {}, "flag": False}
return loadData(cids)
if __name__ == '__main__': if __name__ == '__main__':
# print(getCids(29416)) print(getCids(29416))
# obj = loadData(
# [49052, 49053, 51525, 51526, 53407, 54180, 55295, 55296, 57255, 57256, 59288, 59289, 61559, 61560, 64034, 64035,
# 67024, 67025, 69284, 73333, 73334, 74024, 74025], ['穹'])
f = getCommentSort(episodeIdToCid(172095), [])
# obj = loadData([34807341], []) # obj = loadData([34807341], [])
for i in sorted(f["data"].items(), key=lambda d: d[1], reverse=True)[:50]:
print(i)

@ -18,7 +18,7 @@ handlers=fileHandler
[handler_consoleHandler] [handler_consoleHandler]
class=StreamHandler class=StreamHandler
level=DEBUG level=INFO
formatter=fmt formatter=fmt
args=(sys.stdout,) args=(sys.stdout,)

Loading…
Cancel
Save