提交

7 years ago · bc06a1a5fe
commit bc06a1a5fe
13 changed files with 494 additions and 0 deletions
--- a/PixivSearch/init.py
+++ b/PixivSearch/init.py
--- a/PixivSearch/logging.conf
+++ b/PixivSearch/logging.conf
@ -0,0 +1,32 @@
+[loggers]
+keys=root,main
+
+[handlers]
+keys=consoleHandler,fileHandler
+
+[formatters]
+keys=fmt
+
+[logger_root]
+level=DEBUG
+handlers=consoleHandler
+
+[logger_main]
+level=DEBUG
+qualname=file
+handlers=fileHandler
+
+[handler_consoleHandler]
+class=StreamHandler
+level=DEBUG
+formatter=fmt
+args=(sys.stdout,)
+
+[handler_fileHandler]
+class=FileHandler
+level=DEBUG
+formatter=fmt
+args=('pixiv.log','a','utf-8',False)
+
+[formatter_fmt]
+format=%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s
--- a/PixivSearch/module/init.py
+++ b/PixivSearch/module/init.py
--- a/PixivSearch/module/config.py
+++ b/PixivSearch/module/config.py
@ -0,0 +1,5 @@
+from django.db import models
+
+class param(models.Model):
+    param_name = models.CharField(max_length=10,primary_key=True)
+    param_value = models.CharField(max_length=128)
--- a/PixivSearch/pixiv.py
+++ b/PixivSearch/pixiv.py
@ -0,0 +1,132 @@
+#!/usr/bin/env python
+#coding:utf-8
+from concurrent import futures
+import threading
+import json
+import requests
+from bs4 import BeautifulSoup
+import sys
+from datetime import datetime
+import os
+import zipfile
+import logging.config
+
+headers = {
+    'X-Requested-With': 'XMLHttpRequest',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
+                  'Chrome/56.0.2924.87 Safari/537.36'
+}
+
+lock = threading.Lock()     # 多线程全局资源锁
+total = 1
+logging.config.fileConfig('PixivSearch/logging.conf')
+logger = logging.getLogger('file')
+
+def get_cookies():
+    _cookies = {}
+    array = "p_ab_id=5; p_ab_id_2=9; login_ever=yes; device_token=c8c37fdf24b917b4e7fb191fe11c5ca5; search_tools_toggle=1; _ga=GA1.2.887334537.1498628532; PHPSESSID=25745470_75a76e86ff3145b53e21b440183b4822; a_type=0; is_sensei_service_user=1; module_orders_mypage=%5B%7B%22name%22%3A%22recommended_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22everyone_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22following_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22mypixiv_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22fanbox%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22featured_tags%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22contests%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22sensei_courses%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22spotlight%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22booth_follow_items%22%2C%22visible%22%3Atrue%7D%5D; __utmt=1; __utma=235335808.887334537.1498628532.1498720739.1498819600.5; __utmb=235335808.1.10.1498819600; __utmc=235335808; __utmz=235335808.1498713152.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmv=235335808.|2=login%20ever=yes=1^3=plan=normal=1^5=gender=male=1^6=user_id=25745470=1^9=p_ab_id=5=1^10=p_ab_id_2=9=1^11=lang=zh_tw=1".split(';')
+    for row in array:
+        k, v = row.strip().split('=', 1)
+        _cookies[k] = v
+    return _cookies
+
+
+
+
+def crawl(url):
+    global total
+    req = requests.get(url, headers=headers, cookies=get_cookies()).text
+    tag=BeautifulSoup(req, 'lxml').select('#js-mount-point-search-result-list')[0].attrs['data-items']
+    imageNodes=json.loads(tag)
+    for imageNode in imageNodes:
+        with lock:
+            nodes.append(imageNode)
+
+def get_urls(search, page):
+    fmt = 'https://www.pixiv.net/search.php?word={}&order=date_d&p={}'
+    return [fmt.format(search, p) for p in range(1, page)]
+
+def get_Img(params):
+    params[1]['imgUrl']='https://i.pximg.net/img-original/img/'+params[1]['url'][-46:-15]
+
+    headers['referer']='https://www.pixiv.net/member_illust.php?mode=medium&illust_id='+params[1]['illustId']
+
+    suffix=".jpg"
+    logger.info('开始下载图片：%s%s' % (params[1]['imgUrl'], suffix))
+
+    s=requests.get(params[1]['imgUrl']+suffix, headers=headers, cookies=get_cookies())
+    if(s.status_code==404):
+        suffix='.png'
+        s=requests.get(params[1]['imgUrl']+suffix,headers=headers, cookies=get_cookies())
+    if(s.status_code==404):
+        logger.error('无法下载图片：%s' % (params[1]['illustTitle']))
+        return
+
+    logger.info('下载图片："%s"到%s'%(params[1]['illustTitle'],os.getcwd().replace('\\','/')+'/'+imgPath+params[1]['illustId']+suffix))
+    f = open(imgPath+params[1]['illustId']+suffix, 'wb') #写入多媒体文件要 b 这个参数
+    f.write(s.content)  #多媒体文件要是用conctent
+    f.close()
+
+    params[1]['localName']=params[1]['illustId']+suffix
+    logger.info('排行第%d名，收藏数%d，标题：%s，标签：%s，(%s)前投稿，链接：%s'%(params[0],params[1]['bookmarkCount'],params[1]['illustTitle'], ','.join(params[1]['tags']),'',params[1]['imgUrl']))
+
+def zip(inputFile,outFile):
+    f = zipfile.ZipFile(outFile,'w',zipfile.ZIP_DEFLATED)
+    for dirpath, dirnames, filenames in os.walk(inputFile):
+        for filename in filenames:
+            f.write(os.path.join(dirpath,filename),filename)
+    f.close()
+
+fsize=''
+def get_nodes(param):
+    global nodes,fsize,imgPath
+    nodes=[]
+    start = datetime.now()
+    urls = get_urls(param[1], int(param[2])+1)
+    logger.info('开始从P站获取图片数据')
+    with futures.ThreadPoolExecutor(32) as executor:
+        executor.map(crawl, urls)
+
+    # for url in urls:
+    #     crawl(url)
+
+    length=len(nodes)
+    logger.info('获取到%d张图片'%(length))
+    logger.info('对图片收藏数进行排序')
+    nodes=sorted(nodes, key=lambda v: v.get('bookmarkCount'), reverse=True)[:int(param[3])]    # 按star数降序排序
+    if(param[4]!=None and param[4]=='img'):
+        imgPath='PixivSearch/static/images/'
+        for file in os.listdir(imgPath):
+            os.remove(imgPath+file)
+        nodes_tup=[]
+        start_d=datetime.now()
+        for index,img in enumerate(nodes):
+            nodes_tup.append((index+1,img))
+            # get_Img((index+1,img))
+
+        with futures.ThreadPoolExecutor(32) as executor:
+                executor.map(get_Img, nodes_tup)
+        print('下载图片花费时间：%s' % (datetime.now() - start_d))
+        logger.info('%s张图片下载完毕'%(len(os.listdir(imgPath))))
+
+        zipPath='PixivSearch/static/download/' + param[1] + '.zip'
+        logger.info('图片打包到：%s' % (zipPath))
+        zip(imgPath,zipPath)
+        fsize = str(round(os.path.getsize(zipPath)/float(1024*1024),2))+'MB'
+        logger.info('图包大小：%s'%(fsize))
+
+    tip='从%d张图片中筛选出收藏数前%s的图片,处理耗时：%s'%(length,param[3],datetime.now()-start)
+    logger.info(tip)
+    return [nodes,tip,fsize]
+
+
+
+if __name__ == "__main__":
+    if (len(sys.argv))==5 and sys.argv[2].isdigit() and sys.argv[3].isdigit():
+        try:
+            get_nodes(sys.argv)
+        except BaseException as e:
+            repr(e)
+
+    else :
+        logger.error('参数不合法')
--- a/PixivSearch/settings.py
+++ b/PixivSearch/settings.py
@ -0,0 +1,124 @@
+"""
+Django settings for PixivSearch project.
+
+Generated by 'django-admin startproject' using Django 1.11.7.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/1.11/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/1.11/ref/settings/
+"""
+
+import os
+
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = 'dh3^+=iugoo*+p_ea4u3dh&b!_zlgs8*m9kc+#*f2eozglsqjh'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+
+ALLOWED_HOSTS = ['*']
+
+
+# Application definition
+
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+    'PixivSearch.module',
+]
+
+MIDDLEWARE = [
+    'django.middleware.security.SecurityMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.common.CommonMiddleware',
+    'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+]
+
+ROOT_URLCONF = 'PixivSearch.urls'
+
+TEMPLATES = [
+    {
+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
+        'DIRS': [os.path.join(os.path.dirname(__file__), 'templates').replace('\\','/')],
+        'APP_DIRS': True,
+        'OPTIONS': {
+            'context_processors': [
+                'django.template.context_processors.debug',
+                'django.template.context_processors.request',
+                'django.contrib.auth.context_processors.auth',
+                'django.contrib.messages.context_processors.messages',
+            ],
+        },
+    },
+]
+
+WSGI_APPLICATION = 'PixivSearch.wsgi.application'
+
+
+# Database
+# https://docs.djangoproject.com/en/1.11/ref/settings/#databases
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
+    }
+}
+
+
+# Password validation
+# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+    },
+]
+
+
+# Internationalization
+# https://docs.djangoproject.com/en/1.11/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = 'UTC'
+
+USE_I18N = True
+
+USE_L10N = True
+
+USE_TZ = True
+
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/1.11/howto/static-files/
+
+STATIC_URL = '/static/'
+STATICFILES_DIRS = [
+    os.path.join(os.path.dirname(__file__), 'static').replace('\\', '/'),
+]
--- a/PixivSearch/static/js/jquery-3.2.1.min.js
+++ b/PixivSearch/static/js/jquery-3.2.1.min.js
--- a/PixivSearch/templates/test.html
+++ b/PixivSearch/templates/test.html
@ -0,0 +1,63 @@
+<html xmlns="http://www.w3.org/1999/html">
+<head>
+    <style>
+        div{
+            text-align:center;
+        }
+        ol{list-style-type:demical;}
+        ol li{ list-style-position:outside;}
+    </style>
+    {% load staticfiles %}
+    <script src="{% static "js/jquery-3.2.1.min.js"%}"></script>
+    <script>
+        $(function () {
+
+        });
+
+        function check() {
+            $("[name=pageSize],[name=order]").each(function () {
+                if($(this).val()!=''&&$(this).val()<=0){
+                    $(this).val('');
+                    alert($(this).parent().text()+'不能小于等于0');
+                    $(this).focus();
+                }
+            });
+        }
+    </script>
+</head>
+<body>
+<form  action="/pixiv/search"  method="post" onsubmit="check()">
+    {% csrf_token %}
+    <label>关键字<input name="word" placeholder="R-18"/></label>
+    <label>页数<input name="pageSize" type="number" placeholder="10"/></label>
+    <label>排行数<input name="order" type="number" placeholder="10"/></label>
+    <label>结果显示类型：文本信息<input type="radio" value="" name="type" checked/></label>
+    <label>图文信息<input type="radio" value="img" name="type"/></label>
+
+    <input type="submit" />
+    {% if  download %}
+        <a href="{% static "/download/" %}{{download}}">图包提取(文件大小:{{ size}})</a>
+{#        <button id="download" onclick="window.open('{% static "/download/" %}{{download}}')">图包提取(文件大小:{{ size}})</button>#}
+    {% endif %}
+</form>
+
+<div>
+    <h1>{{ msg}}</h1>
+</div>
+<ol>
+    {% for imageNode in imageNodes %}
+        <li>
+            <div>
+                <h1>标题：<a href="https://www.pixiv.net/member_illust.php?mode=medium&illust_id={{imageNode.illustId}}" target="_blank">{{imageNode.illustTitle}}</a>，画师：<a href="https://www.pixiv.net/member.php?id={{ imageNode.userId}}">{{imageNode.userName}}</a>，收藏数：{{imageNode.bookmarkCount}}</h1>
+                {% if imageNode.localName %}
+                    <img src="{% static "images/"%}{{imageNode.localName}}">
+                {% endif %}
+            </div>
+        </li>
+    {% empty %}
+        <p>{{tip}}</p>
+    {% endfor %}
+
+</ol>
+</body>
+</html>
--- a/PixivSearch/tsdm.py
+++ b/PixivSearch/tsdm.py
--- a/PixivSearch/urls.py
+++ b/PixivSearch/urls.py
@ -0,0 +1,24 @@
+"""PixivSearch URL Configuration
+
+The `urlpatterns` list routes URLs to views. For more information please see:
+    https://docs.djangoproject.com/en/1.11/topics/http/urls/
+Examples:
+Function views
+    1. Add an import:  from my_app import views
+    2. Add a URL to urlpatterns:  url(r'^$', views.home, name='home')
+Class-based views
+    1. Add an import:  from other_app.views import Home
+    2. Add a URL to urlpatterns:  url(r'^$', Home.as_view(), name='home')
+Including another URLconf
+    1. Import the include() function: from django.conf.urls import url, include
+    2. Add a URL to urlpatterns:  url(r'^blog/', include('blog.urls'))
+"""
+from django.conf.urls import url
+from PixivSearch.view import search, index, download, haha
+
+urlpatterns = [
+    url(r'^$', index),
+    url(r'^pixiv/search', search),
+    url(r'^pixiv/download', download),
+    url(r'^tsdm',haha)
+]
--- a/PixivSearch/view.py
+++ b/PixivSearch/view.py
@ -0,0 +1,72 @@
+# coding=utf-8
+import os
+
+from django.http import Http404, StreamingHttpResponse, HttpResponse
+from django.shortcuts import render
+
+from PixivSearch import pixiv
+from PixivSearch.module import config
+from PixivSearch.pixiv import get_nodes
+
+logger = pixiv.logging.getLogger('file')
+
+
+def search(request):
+    if request.POST.get('word') != '':
+        word = request.POST.get('word')
+    else:
+        word = 'R-18'
+    if request.POST.get('pageSize') != '':
+        pageSize = request.POST.get('pageSize')
+    else:
+        pageSize = 10
+    if request.POST.get('order') != '':
+        order = request.POST.get('order')
+    else:
+        order = 10
+    if request.POST.get('type') != '':
+        type = request.POST.get('type')
+    else:
+        type = None
+    try:
+        logger.info("word:%s,pageSize:%d,order:%d,type:%s", word, pageSize, order, type)
+        array = get_nodes([0, word, pageSize, order, type])
+        if len(array[0]) > 0:
+            if request.POST.get('type') == 'img':
+                nodes = {'imageNodes': array[0], 'msg': array[1], 'download': word + '.zip', 'size': array[2]}
+            else:
+                nodes = {'imageNodes': array[0], 'msg': array[1]}
+        else:
+            nodes = {'tip': '没有返回结果'}
+        return render(request, 'test.html', nodes)
+    except ValueError:
+        raise Http404()
+
+
+def index(request):
+    return render(request, 'test.html', {'tip': '输入参数进行搜索'})
+
+
+def download(request):
+    # do something...
+    def file_iterator(file_name, chunk_size=512):
+        with open(file_name) as f:
+            while True:
+                c = f.read(chunk_size)
+                if c:
+                    yield c
+                else:
+                    break
+
+    fileName = os.path.join(os.path.dirname(__file__), 'static/download/').replace('\\', '/') + request.GET.get(
+        'fileName').encode('utf-8')
+    response = StreamingHttpResponse(file_iterator(fileName))
+    response['Content-Type'] = 'application/octet-stream'
+    response['Content-Disposition'] = 'attachment;filename="{0}"'.format(request.GET.get('fileName').encode('utf-8'))
+    return response
+
+
+def haha(request):
+    p=config.param(param_name='123',param_value='456')
+    p.save()
+    return HttpResponse("<html><body>%s</body></html>" % (config.param.param_name))
--- a/PixivSearch/wsgi.py
+++ b/PixivSearch/wsgi.py
@ -0,0 +1,16 @@
+"""
+WSGI config for PixivSearch project.
+
+It exposes the WSGI callable as a module-level variable named ``application``.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/
+"""
+
+import os
+
+from django.core.wsgi import get_wsgi_application
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings")
+
+application = get_wsgi_application()
--- a/manage.py
+++ b/manage.py
@ -0,0 +1,22 @@
+#!/usr/bin/env python
+import os
+import sys
+
+if __name__ == "__main__":
+    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings")
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError:
+        # The above import may fail for some other reason. Ensure that the
+        # issue is really that Django is missing to avoid masking other
+        # exceptions on Python 2.
+        try:
+            import django
+        except ImportError:
+            raise ImportError(
+                "Couldn't import Django. Are you sure it's installed and "
+                "available on your PYTHONPATH environment variable? Did you "
+                "forget to activate a virtual environment?"
+            )
+        raise
+    execute_from_command_line(sys.argv)