import json import os import time import django import requests import threading from bs4 import BeautifulSoup from PixivSearch.settings import logger os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings") django.setup() from PixivSearch.model.config import mediaInfo, stat current_mediaInfo = mediaInfo(id=0, chn_name='null') flag = True class bangumi(threading.Thread): begin = 0 end = 0 id = 0 flag = True def __init__(self, begin, end): threading.Thread.__init__(self) self.begin = begin self.end = end def save(self): req = requests.get("https://www.bilibili.com/bangumi/media/md%d" % self.id) if (req.status_code == 200): tag = BeautifulSoup(req.text, 'lxml') script = tag.select("script")[3].text json_str = script[script.index("=") + 1:script.index("function") - 2] json_obj = json.loads(json_str) stat_info = json_obj['mediaInfo']['stat'] print(json_obj['mediaInfo']['chn_name']) print(stat_info) mediaInfo(id=self.id, chn_name=json_obj['mediaInfo']['chn_name']).save() global current_mediaInfo current_mediaInfo = mediaInfo.objects.get(pk=self.id) stat(id=self.id, danmakus=int(stat_info['danmakus']), favorites=stat_info['favorites'], views=stat_info['views']).save() def run(self) -> None: self.go(self.begin, self.end) def get(self): global current_mediaInfo return current_mediaInfo def stop(self): global flag flag = False def go(self, start, end): global flag flag = True for num in range(start, end): if flag: time.sleep(1) logger.info("爬虫进度:%d" % num) self.id = num self.save() else: logger.info("停止爬虫") break