diff --git a/PixivSearch/dao/bangumi.py b/PixivSearch/dao/bangumi.py index 6a3c3eb..863709f 100644 --- a/PixivSearch/dao/bangumi.py +++ b/PixivSearch/dao/bangumi.py @@ -1,10 +1,12 @@ +import _thread import json import os +import random import time +from concurrent import futures import django import requests -import threading from bs4 import BeautifulSoup from PixivSearch.settings import logger @@ -15,56 +17,72 @@ from PixivSearch.model.config import mediaInfo, stat current_mediaInfo = mediaInfo(id=0, chn_name='null') -flag = True +isStop = False +executors = None -class bangumi(threading.Thread): - begin = 0 - end = 0 - id = 0 - flag = True - def __init__(self, begin, end): - threading.Thread.__init__(self) - self.begin = begin - self.end = end +def check(): + while True: + if isStop: + logger.info('停止多线程爬虫') + executors.shutdown() + break + time.sleep(1) - def save(self): - req = requests.get("https://www.bilibili.com/bangumi/media/md%d" % self.id) - if (req.status_code == 200): - tag = BeautifulSoup(req.text, 'lxml') - script = tag.select("script")[3].text - json_str = script[script.index("=") + 1:script.index("function") - 2] - json_obj = json.loads(json_str) + + + + +def save(md): + if isStop: + return + time.sleep(random.randint(1, 3)) + url = "https://www.bilibili.com/bangumi/media/md%d" % md + req = requests.get(url) + + logger.info("request_url=%s,status_code=%d" % (url,req.status_code)) + if (req.status_code == 200): + tag = BeautifulSoup(req.text, 'lxml') + script = tag.select("script")[3].text + json_str = script[script.index("=") + 1:script.index("function") - 2] + json_obj = json.loads(json_str) + try: stat_info = json_obj['mediaInfo']['stat'] print(json_obj['mediaInfo']['chn_name']) print(stat_info) - mediaInfo(id=self.id, chn_name=json_obj['mediaInfo']['chn_name']).save() + mediaInfo(id=md, chn_name=json_obj['mediaInfo']['chn_name']).save() global current_mediaInfo - current_mediaInfo = mediaInfo.objects.get(pk=self.id) - stat(id=self.id, danmakus=int(stat_info['danmakus']), favorites=stat_info['favorites'], + current_mediaInfo = mediaInfo.objects.get(pk=md) + stat(id=md, danmakus=int(stat_info['danmakus']), favorites=stat_info['favorites'], views=stat_info['views']).save() + except BaseException as e: + logger.error("发生异常") + logger.error(e) + +#asdasd +def get_(): + global current_mediaInfo + return current_mediaInfo + + +def threadSave(start, end): + ids = [] + + for id in range(start, end): + ids.append(id) + try: + global executors + executors = futures.ThreadPoolExecutor(32) + global isStop + isStop = False + with executors as executor: + executor.map(save, ids) + logger.info('结束爬虫') + except BaseException as e: + logger.error(e) + - def run(self) -> None: - self.go(self.begin, self.end) - - def get(self): - global current_mediaInfo - return current_mediaInfo - - def stop(self): - global flag - flag = False - - def go(self, start, end): - global flag - flag = True - for num in range(start, end): - if flag: - time.sleep(1) - logger.info("爬虫进度:%d" % num) - self.id = num - self.save() - else: - logger.info("停止爬虫") - break +def stop_(): + global isStop + isStop = True diff --git a/PixivSearch/logging.conf b/PixivSearch/logging.conf index 9203d73..005e412 100644 --- a/PixivSearch/logging.conf +++ b/PixivSearch/logging.conf @@ -29,4 +29,5 @@ formatter=fmt args=('pixiv.log','a','utf-8',False) [formatter_fmt] -format=%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s \ No newline at end of file +format=%(asctime)s - %(name)s - %(levelname)s - %(module)s :%(message)s +datefmt=%Y-%m-%d %H:%M:%S \ No newline at end of file diff --git a/PixivSearch/settings.py b/PixivSearch/settings.py index 928f83d..c1f0004 100644 --- a/PixivSearch/settings.py +++ b/PixivSearch/settings.py @@ -76,8 +76,12 @@ WSGI_APPLICATION = 'PixivSearch.wsgi.application' DATABASES = { 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + 'ENGINE': 'django.db.backends.mysql', + 'NAME': 'bangumi', + 'USER': 'bilibili', + 'PASSWORD': '2233', + 'HOST': '127.0.0.1', + 'PORT': '3306', } } @@ -106,13 +110,13 @@ AUTH_PASSWORD_VALIDATORS = [ LANGUAGE_CODE = 'en-us' -TIME_ZONE = 'UTC' +TIME_ZONE = 'Asia/Shanghai' USE_I18N = True USE_L10N = True -USE_TZ = True +USE_TZ = False # Static files (CSS, JavaScript, Images) diff --git a/PixivSearch/urls.py b/PixivSearch/urls.py index 0b174fd..ea77a9d 100644 --- a/PixivSearch/urls.py +++ b/PixivSearch/urls.py @@ -14,14 +14,13 @@ Including another URLconf 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) """ from django.conf.urls import url -from PixivSearch.view import search, index, download, saveConfig, save, get, start, stop +from PixivSearch.view import search, index, download, saveConfig, get, start, stop urlpatterns = [ url(r'^$', index), url(r'^pixiv/search', search), url(r'^pixiv/download', download), url(r'^tsdm', saveConfig), - url(r'^bangumi/save', save), url(r'^bangumi/get', get), url(r'^bangumi/start', start), url(r'^bangumi/stop', stop) diff --git a/PixivSearch/view.py b/PixivSearch/view.py index b2baba1..e34dc73 100644 --- a/PixivSearch/view.py +++ b/PixivSearch/view.py @@ -1,11 +1,12 @@ # coding=utf-8 +import _thread import os import django from django.http import Http404, StreamingHttpResponse, HttpResponse from django.shortcuts import render -from PixivSearch.dao.bangumi import bangumi +from PixivSearch.dao.bangumi import threadSave, get_, stop_ from PixivSearch.settings import logger os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings") @@ -82,23 +83,18 @@ def saveConfig(request): return render(request, 'addConfig.html') -def save(request): - media_id = int(request.GET.get('id')) - bangumi.save(media_id) - return HttpResponse("save success") - - def get(request): - return HttpResponse(str(bangumi.get(bangumi).__str__())) + return HttpResponse(str(get_().__str__())) +# 测试方法 def start(request): begin = int(request.GET.get('start')) end = int(request.GET.get('end')) - bangumi(begin=begin, end=end).start() - + _thread.start_new_thread(threadSave, (begin, end)) return HttpResponse("start success") + def stop(request): - bangumi.stop(bangumi) - return HttpResponse("stop success") \ No newline at end of file + stop_() + return HttpResponse("stop success")