master
10295 7 years ago
commit bc06a1a5fe
  1. 0
      PixivSearch/__init__.py
  2. 32
      PixivSearch/logging.conf
  3. 0
      PixivSearch/module/__init__.py
  4. 5
      PixivSearch/module/config.py
  5. 132
      PixivSearch/pixiv.py
  6. 124
      PixivSearch/settings.py
  7. 4
      PixivSearch/static/js/jquery-3.2.1.min.js
  8. 63
      PixivSearch/templates/test.html
  9. 0
      PixivSearch/tsdm.py
  10. 24
      PixivSearch/urls.py
  11. 72
      PixivSearch/view.py
  12. 16
      PixivSearch/wsgi.py
  13. 22
      manage.py

@ -0,0 +1,32 @@
[loggers]
keys=root,main
[handlers]
keys=consoleHandler,fileHandler
[formatters]
keys=fmt
[logger_root]
level=DEBUG
handlers=consoleHandler
[logger_main]
level=DEBUG
qualname=file
handlers=fileHandler
[handler_consoleHandler]
class=StreamHandler
level=DEBUG
formatter=fmt
args=(sys.stdout,)
[handler_fileHandler]
class=FileHandler
level=DEBUG
formatter=fmt
args=('pixiv.log','a','utf-8',False)
[formatter_fmt]
format=%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s

@ -0,0 +1,5 @@
from django.db import models
class param(models.Model):
param_name = models.CharField(max_length=10,primary_key=True)
param_value = models.CharField(max_length=128)

@ -0,0 +1,132 @@
#!/usr/bin/env python
#coding:utf-8
from concurrent import futures
import threading
import json
import requests
from bs4 import BeautifulSoup
import sys
from datetime import datetime
import os
import zipfile
import logging.config
headers = {
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/56.0.2924.87 Safari/537.36'
}
lock = threading.Lock() # 多线程全局资源锁
total = 1
logging.config.fileConfig('PixivSearch/logging.conf')
logger = logging.getLogger('file')
def get_cookies():
_cookies = {}
array = "p_ab_id=5; p_ab_id_2=9; login_ever=yes; device_token=c8c37fdf24b917b4e7fb191fe11c5ca5; search_tools_toggle=1; _ga=GA1.2.887334537.1498628532; PHPSESSID=25745470_75a76e86ff3145b53e21b440183b4822; a_type=0; is_sensei_service_user=1; module_orders_mypage=%5B%7B%22name%22%3A%22recommended_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22everyone_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22following_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22mypixiv_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22fanbox%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22featured_tags%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22contests%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22sensei_courses%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22spotlight%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22booth_follow_items%22%2C%22visible%22%3Atrue%7D%5D; __utmt=1; __utma=235335808.887334537.1498628532.1498720739.1498819600.5; __utmb=235335808.1.10.1498819600; __utmc=235335808; __utmz=235335808.1498713152.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmv=235335808.|2=login%20ever=yes=1^3=plan=normal=1^5=gender=male=1^6=user_id=25745470=1^9=p_ab_id=5=1^10=p_ab_id_2=9=1^11=lang=zh_tw=1".split(';')
for row in array:
k, v = row.strip().split('=', 1)
_cookies[k] = v
return _cookies
def crawl(url):
global total
req = requests.get(url, headers=headers, cookies=get_cookies()).text
tag=BeautifulSoup(req, 'lxml').select('#js-mount-point-search-result-list')[0].attrs['data-items']
imageNodes=json.loads(tag)
for imageNode in imageNodes:
with lock:
nodes.append(imageNode)
def get_urls(search, page):
fmt = 'https://www.pixiv.net/search.php?word={}&order=date_d&p={}'
return [fmt.format(search, p) for p in range(1, page)]
def get_Img(params):
params[1]['imgUrl']='https://i.pximg.net/img-original/img/'+params[1]['url'][-46:-15]
headers['referer']='https://www.pixiv.net/member_illust.php?mode=medium&illust_id='+params[1]['illustId']
suffix=".jpg"
logger.info('开始下载图片:%s%s' % (params[1]['imgUrl'], suffix))
s=requests.get(params[1]['imgUrl']+suffix, headers=headers, cookies=get_cookies())
if(s.status_code==404):
suffix='.png'
s=requests.get(params[1]['imgUrl']+suffix,headers=headers, cookies=get_cookies())
if(s.status_code==404):
logger.error('无法下载图片:%s' % (params[1]['illustTitle']))
return
logger.info('下载图片:"%s"%s'%(params[1]['illustTitle'],os.getcwd().replace('\\','/')+'/'+imgPath+params[1]['illustId']+suffix))
f = open(imgPath+params[1]['illustId']+suffix, 'wb') #写入多媒体文件要 b 这个参数
f.write(s.content) #多媒体文件要是用conctent
f.close()
params[1]['localName']=params[1]['illustId']+suffix
logger.info('排行第%d名,收藏数%d,标题:%s,标签:%s,(%s)前投稿,链接:%s'%(params[0],params[1]['bookmarkCount'],params[1]['illustTitle'], ','.join(params[1]['tags']),'',params[1]['imgUrl']))
def zip(inputFile,outFile):
f = zipfile.ZipFile(outFile,'w',zipfile.ZIP_DEFLATED)
for dirpath, dirnames, filenames in os.walk(inputFile):
for filename in filenames:
f.write(os.path.join(dirpath,filename),filename)
f.close()
fsize=''
def get_nodes(param):
global nodes,fsize,imgPath
nodes=[]
start = datetime.now()
urls = get_urls(param[1], int(param[2])+1)
logger.info('开始从P站获取图片数据')
with futures.ThreadPoolExecutor(32) as executor:
executor.map(crawl, urls)
# for url in urls:
# crawl(url)
length=len(nodes)
logger.info('获取到%d张图片'%(length))
logger.info('对图片收藏数进行排序')
nodes=sorted(nodes, key=lambda v: v.get('bookmarkCount'), reverse=True)[:int(param[3])] # 按star数降序排序
if(param[4]!=None and param[4]=='img'):
imgPath='PixivSearch/static/images/'
for file in os.listdir(imgPath):
os.remove(imgPath+file)
nodes_tup=[]
start_d=datetime.now()
for index,img in enumerate(nodes):
nodes_tup.append((index+1,img))
# get_Img((index+1,img))
with futures.ThreadPoolExecutor(32) as executor:
executor.map(get_Img, nodes_tup)
print('下载图片花费时间:%s' % (datetime.now() - start_d))
logger.info('%s张图片下载完毕'%(len(os.listdir(imgPath))))
zipPath='PixivSearch/static/download/' + param[1] + '.zip'
logger.info('图片打包到:%s' % (zipPath))
zip(imgPath,zipPath)
fsize = str(round(os.path.getsize(zipPath)/float(1024*1024),2))+'MB'
logger.info('图包大小:%s'%(fsize))
tip='%d张图片中筛选出收藏数前%s的图片,处理耗时:%s'%(length,param[3],datetime.now()-start)
logger.info(tip)
return [nodes,tip,fsize]
if __name__ == "__main__":
if (len(sys.argv))==5 and sys.argv[2].isdigit() and sys.argv[3].isdigit():
try:
get_nodes(sys.argv)
except BaseException as e:
repr(e)
else :
logger.error('参数不合法')

@ -0,0 +1,124 @@
"""
Django settings for PixivSearch project.
Generated by 'django-admin startproject' using Django 1.11.7.
For more information on this file, see
https://docs.djangoproject.com/en/1.11/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.11/ref/settings/
"""
import os
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'dh3^+=iugoo*+p_ea4u3dh&b!_zlgs8*m9kc+#*f2eozglsqjh'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = ['*']
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'PixivSearch.module',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'PixivSearch.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [os.path.join(os.path.dirname(__file__), 'templates').replace('\\','/')],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'PixivSearch.wsgi.application'
# Database
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
}
}
# Password validation
# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/1.11/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'UTC'
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.11/howto/static-files/
STATIC_URL = '/static/'
STATICFILES_DIRS = [
os.path.join(os.path.dirname(__file__), 'static').replace('\\', '/'),
]

File diff suppressed because one or more lines are too long

@ -0,0 +1,63 @@
<html xmlns="http://www.w3.org/1999/html">
<head>
<style>
div{
text-align:center;
}
ol{list-style-type:demical;}
ol li{ list-style-position:outside;}
</style>
{% load staticfiles %}
<script src="{% static "js/jquery-3.2.1.min.js"%}"></script>
<script>
$(function () {
});
function check() {
$("[name=pageSize],[name=order]").each(function () {
if($(this).val()!=''&&$(this).val()<=0){
$(this).val('');
alert($(this).parent().text()+'不能小于等于0');
$(this).focus();
}
});
}
</script>
</head>
<body>
<form action="/pixiv/search" method="post" onsubmit="check()">
{% csrf_token %}
<label>关键字<input name="word" placeholder="R-18"/></label>
<label>页数<input name="pageSize" type="number" placeholder="10"/></label>
<label>排行数<input name="order" type="number" placeholder="10"/></label>
<label>结果显示类型:文本信息<input type="radio" value="" name="type" checked/></label>
<label>图文信息<input type="radio" value="img" name="type"/></label>
<input type="submit" />
{% if download %}
<a href="{% static "/download/" %}{{download}}">图包提取(文件大小:{{ size}})</a>
{# <button id="download" onclick="window.open('{% static "/download/" %}{{download}}')">图包提取(文件大小:{{ size}})</button>#}
{% endif %}
</form>
<div>
<h1>{{ msg}}</h1>
</div>
<ol>
{% for imageNode in imageNodes %}
<li>
<div>
<h1>标题:<a href="https://www.pixiv.net/member_illust.php?mode=medium&illust_id={{imageNode.illustId}}" target="_blank">{{imageNode.illustTitle}}</a>,画师:<a href="https://www.pixiv.net/member.php?id={{ imageNode.userId}}">{{imageNode.userName}}</a>,收藏数:{{imageNode.bookmarkCount}}</h1>
{% if imageNode.localName %}
<img src="{% static "images/"%}{{imageNode.localName}}">
{% endif %}
</div>
</li>
{% empty %}
<p>{{tip}}</p>
{% endfor %}
</ol>
</body>
</html>

@ -0,0 +1,24 @@
"""PixivSearch URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/1.11/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: url(r'^$', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.conf.urls import url, include
2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
"""
from django.conf.urls import url
from PixivSearch.view import search, index, download, haha
urlpatterns = [
url(r'^$', index),
url(r'^pixiv/search', search),
url(r'^pixiv/download', download),
url(r'^tsdm',haha)
]

@ -0,0 +1,72 @@
# coding=utf-8
import os
from django.http import Http404, StreamingHttpResponse, HttpResponse
from django.shortcuts import render
from PixivSearch import pixiv
from PixivSearch.module import config
from PixivSearch.pixiv import get_nodes
logger = pixiv.logging.getLogger('file')
def search(request):
if request.POST.get('word') != '':
word = request.POST.get('word')
else:
word = 'R-18'
if request.POST.get('pageSize') != '':
pageSize = request.POST.get('pageSize')
else:
pageSize = 10
if request.POST.get('order') != '':
order = request.POST.get('order')
else:
order = 10
if request.POST.get('type') != '':
type = request.POST.get('type')
else:
type = None
try:
logger.info("word:%s,pageSize:%d,order:%d,type:%s", word, pageSize, order, type)
array = get_nodes([0, word, pageSize, order, type])
if len(array[0]) > 0:
if request.POST.get('type') == 'img':
nodes = {'imageNodes': array[0], 'msg': array[1], 'download': word + '.zip', 'size': array[2]}
else:
nodes = {'imageNodes': array[0], 'msg': array[1]}
else:
nodes = {'tip': '没有返回结果'}
return render(request, 'test.html', nodes)
except ValueError:
raise Http404()
def index(request):
return render(request, 'test.html', {'tip': '输入参数进行搜索'})
def download(request):
# do something...
def file_iterator(file_name, chunk_size=512):
with open(file_name) as f:
while True:
c = f.read(chunk_size)
if c:
yield c
else:
break
fileName = os.path.join(os.path.dirname(__file__), 'static/download/').replace('\\', '/') + request.GET.get(
'fileName').encode('utf-8')
response = StreamingHttpResponse(file_iterator(fileName))
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment;filename="{0}"'.format(request.GET.get('fileName').encode('utf-8'))
return response
def haha(request):
p=config.param(param_name='123',param_value='456')
p.save()
return HttpResponse("<html><body>%s</body></html>" % (config.param.param_name))

@ -0,0 +1,16 @@
"""
WSGI config for PixivSearch project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings")
application = get_wsgi_application()

@ -0,0 +1,22 @@
#!/usr/bin/env python
import os
import sys
if __name__ == "__main__":
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings")
try:
from django.core.management import execute_from_command_line
except ImportError:
# The above import may fail for some other reason. Ensure that the
# issue is really that Django is missing to avoid masking other
# exceptions on Python 2.
try:
import django
except ImportError:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
)
raise
execute_from_command_line(sys.argv)
Loading…
Cancel
Save