You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
62 lines
1.8 KiB
62 lines
1.8 KiB
7 years ago
|
import json
|
||
|
import os
|
||
|
import time
|
||
|
|
||
|
import django
|
||
|
import requests
|
||
|
import threading
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
from PixivSearch.settings import logger
|
||
|
|
||
|
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings")
|
||
|
django.setup()
|
||
|
from PixivSearch.model.config import mediaInfo, stat
|
||
|
|
||
|
current_mediaInfo = mediaInfo(id=0, chn_name='null')
|
||
|
flag = True
|
||
|
|
||
|
|
||
|
class bangumi(threading.Thread):
|
||
|
begin = 0
|
||
|
end = 0
|
||
|
id = 0
|
||
|
|
||
|
def __init__(self, begin, end):
|
||
|
threading.Thread.__init__(self)
|
||
|
self.begin = begin
|
||
|
self.end = end
|
||
|
|
||
|
def save(self):
|
||
|
req = requests.get("https://www.bilibili.com/bangumi/media/md%d" % self.id)
|
||
|
if (req.status_code == 200):
|
||
|
tag = BeautifulSoup(req.text, 'lxml')
|
||
|
script = tag.select("script")[3].text
|
||
|
json_str = script[script.index("=") + 1:script.index("function") - 2]
|
||
|
json_obj = json.loads(json_str)
|
||
|
stat_info = json_obj['mediaInfo']['stat']
|
||
|
print(json_obj['mediaInfo']['chn_name'])
|
||
|
print(stat_info)
|
||
|
mediaInfo(id=self.id, chn_name=json_obj['mediaInfo']['chn_name']).save()
|
||
|
global current_mediaInfo
|
||
|
current_mediaInfo = mediaInfo.objects.get(pk=self.id)
|
||
|
stat(id=self.id, danmakus=int(stat_info['danmakus']), favorites=stat_info['favorites'],
|
||
|
views=stat_info['views']).save()
|
||
|
|
||
|
def run(self) -> None:
|
||
|
self.go(self.begin, self.end)
|
||
|
|
||
|
def get(self):
|
||
|
return current_mediaInfo
|
||
|
|
||
|
def go(self, start, end):
|
||
|
for num in range(start, end):
|
||
|
if (flag):
|
||
|
time.sleep(1)
|
||
|
logger.info("爬虫进度:%d" % num)
|
||
|
self.id = num
|
||
|
self.save()
|
||
|
else:
|
||
|
logger.info("停止爬虫")
|
||
|
break
|