import _thread import json import math import os from concurrent import futures from queue import Queue import django import requests from bs4 import BeautifulSoup from PixivSearch.settings import logger os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings") django.setup() from PixivSearch.model.config import mediaInfo, stat current_mediaInfo = None isStop = None def stop_(): global isStop isStop = True def save(params): if isStop: return logger.info(params) bangumi_id = params[0] season_id = params[1] media_id = params[2] url = "https://www.bilibili.com/bangumi/media/md%d" % media_id try: req = requests.get(url, timeout=10) except BaseException as e: logger.error(repr(e)) save(params) logger.info("request_url=%s,status_code=%d" % (url, req.status_code)) if req.status_code == 200: json_obj = getJsonText(req, 3) try: if 'mediaInfo' in json_obj and 'stat' in json_obj['mediaInfo'] and 'chn_name' in json_obj['mediaInfo']: stat_info = json_obj['mediaInfo']['stat'] print(stat_info) mediaInfo(bangumi_id=bangumi_id, season_id=season_id, media_id=media_id, chn_name=json_obj['mediaInfo']['chn_name']).save() global current_mediaInfo current_mediaInfo = mediaInfo.objects.get(pk=season_id) stat(id=season_id, danmakus=int(stat_info['danmakus']), favorites=stat_info['favorites'], views=stat_info['views']).save() except BaseException as e: logger.error(repr(e)) def getJsonText(req, index): tag = BeautifulSoup(req.text, 'lxml') script = tag.select("script")[index].text json_str = script[script.index("=") + 1:script.index("function") - 2] return json.loads(json_str) def get_(): global current_mediaInfo return current_mediaInfo page_size = 10 queue = Queue(page_size) def listen(): while True: ids = queue.get() try: executors = futures.ThreadPoolExecutor(page_size) with executors as executor: executor.map(save, ids) logger.info('结束爬虫') except BaseException as e: logger.error(repr(e)) _thread.start_new_thread(listen, ()) #遍历所有专题视频收藏数信息 def getIds(): seasonIdList = [] page = 1 pages = None name = 'seasonListCallback' global isStop isStop = False while isStop == False and (pages is None or page <= pages): url = 'https://bangumi.bilibili.com/web_api/season/index_global?page=%d&page_size=%d' % (page, page_size) logger.info(url) try: req = requests.get(url, timeout=10) if req.status_code == 200: json_obj = json.loads(req.text) if 'result' in json_obj and 'list' in json_obj['result']: bangumiList = json_obj['result']['list'] ids = [] for bangumi in bangumiList: if isStop: break if 'season_id' in bangumi: season_id = int(bangumi['season_id']) if season_id in seasonIdList: continue url = 'https://bangumi.bilibili.com/jsonp/seasoninfo/%d.ver?callback=%s&jsonp=jsonp' % ( season_id, name) logger.info(url) req = requests.get(url, timeout=10) if req.status_code == 200: child_json_obj = json.loads( req.text.replace('seasonListCallback(', '').replace(');', '')) if 'result' in child_json_obj and 'bangumi_id' in child_json_obj['result']: bangumi_id = int(child_json_obj['result']['bangumi_id']) if 'media' in child_json_obj['result']: media_id = int(child_json_obj['result']['media']['media_id']) ids.append((bangumi_id, season_id, media_id)) seasonIdList.append(season_id) if pages is None and 'count' in json_obj['result']: pages = int(math.ceil(int(json_obj['result']['count']) / page_size)) page = page + 1 logger.info('获取id数量%d' % len(ids)) queue.put(ids) except BaseException as e: logger.error(repr(e)) continue #根据aid获取cid def getCid(aid, type=None): while True and aid > 0: url = "https://api.bilibili.com/x/web-interface/archive/stat?aid=%d" % aid print(url) req = requests.get(url) code = json.loads(req.text)["code"] if code == 0: req = requests.get("https://www.bilibili.com/video/av%d" % aid) if req.status_code == 200: json_obj = getJsonText(req, 9) if "videoData" in json_obj and "pages" in json_obj['videoData'] and len( json_obj['videoData']['pages']) > 0 and "cid" in json_obj['videoData']['pages'][0]: cid = json_obj['videoData']['pages'][0]['cid'] print('cid=%s' % cid) return cid if type is None: break else: if type: aid = aid - 1 else: aid = aid + 1 #根据aid获取cid def getCids(aid): s = {"min": getCid(aid, True), "max": getCid(aid, False)} return s #获取专题所有cid def episodeIdToCid(episode_id): cids = [] url = "https://www.bilibili.com/bangumi/play/ep%d" % episode_id print("url=%s" % url) req = requests.get(url) json_obj = getJsonText(req, 8) if "epList" in json_obj: for i in json_obj["epList"]: cids.append(i['cid']) return cids if __name__ == '__main__': # print(getCids(29416)) req=requests.post('https://api.bilibili.com/x/v2/history/shadow/set','') # obj = loadData([34807341], [])