master
commit
bc06a1a5fe
@ -0,0 +1,32 @@ |
||||
[loggers] |
||||
keys=root,main |
||||
|
||||
[handlers] |
||||
keys=consoleHandler,fileHandler |
||||
|
||||
[formatters] |
||||
keys=fmt |
||||
|
||||
[logger_root] |
||||
level=DEBUG |
||||
handlers=consoleHandler |
||||
|
||||
[logger_main] |
||||
level=DEBUG |
||||
qualname=file |
||||
handlers=fileHandler |
||||
|
||||
[handler_consoleHandler] |
||||
class=StreamHandler |
||||
level=DEBUG |
||||
formatter=fmt |
||||
args=(sys.stdout,) |
||||
|
||||
[handler_fileHandler] |
||||
class=FileHandler |
||||
level=DEBUG |
||||
formatter=fmt |
||||
args=('pixiv.log','a','utf-8',False) |
||||
|
||||
[formatter_fmt] |
||||
format=%(asctime)s - %(module)s:%(lineno)d - %(levelname)s - %(message)s |
@ -0,0 +1,5 @@ |
||||
from django.db import models |
||||
|
||||
class param(models.Model): |
||||
param_name = models.CharField(max_length=10,primary_key=True) |
||||
param_value = models.CharField(max_length=128) |
@ -0,0 +1,132 @@ |
||||
#!/usr/bin/env python |
||||
#coding:utf-8 |
||||
from concurrent import futures |
||||
import threading |
||||
import json |
||||
import requests |
||||
from bs4 import BeautifulSoup |
||||
import sys |
||||
from datetime import datetime |
||||
import os |
||||
import zipfile |
||||
import logging.config |
||||
|
||||
headers = { |
||||
'X-Requested-With': 'XMLHttpRequest', |
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' |
||||
'Chrome/56.0.2924.87 Safari/537.36' |
||||
} |
||||
|
||||
lock = threading.Lock() # 多线程全局资源锁 |
||||
total = 1 |
||||
logging.config.fileConfig('PixivSearch/logging.conf') |
||||
logger = logging.getLogger('file') |
||||
|
||||
def get_cookies(): |
||||
_cookies = {} |
||||
array = "p_ab_id=5; p_ab_id_2=9; login_ever=yes; device_token=c8c37fdf24b917b4e7fb191fe11c5ca5; search_tools_toggle=1; _ga=GA1.2.887334537.1498628532; PHPSESSID=25745470_75a76e86ff3145b53e21b440183b4822; a_type=0; is_sensei_service_user=1; module_orders_mypage=%5B%7B%22name%22%3A%22recommended_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22everyone_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22following_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22mypixiv_new_illusts%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22fanbox%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22featured_tags%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22contests%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22sensei_courses%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22spotlight%22%2C%22visible%22%3Atrue%7D%2C%7B%22name%22%3A%22booth_follow_items%22%2C%22visible%22%3Atrue%7D%5D; __utmt=1; __utma=235335808.887334537.1498628532.1498720739.1498819600.5; __utmb=235335808.1.10.1498819600; __utmc=235335808; __utmz=235335808.1498713152.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmv=235335808.|2=login%20ever=yes=1^3=plan=normal=1^5=gender=male=1^6=user_id=25745470=1^9=p_ab_id=5=1^10=p_ab_id_2=9=1^11=lang=zh_tw=1".split(';') |
||||
for row in array: |
||||
k, v = row.strip().split('=', 1) |
||||
_cookies[k] = v |
||||
return _cookies |
||||
|
||||
|
||||
|
||||
|
||||
def crawl(url): |
||||
global total |
||||
req = requests.get(url, headers=headers, cookies=get_cookies()).text |
||||
tag=BeautifulSoup(req, 'lxml').select('#js-mount-point-search-result-list')[0].attrs['data-items'] |
||||
imageNodes=json.loads(tag) |
||||
for imageNode in imageNodes: |
||||
with lock: |
||||
nodes.append(imageNode) |
||||
|
||||
def get_urls(search, page): |
||||
fmt = 'https://www.pixiv.net/search.php?word={}&order=date_d&p={}' |
||||
return [fmt.format(search, p) for p in range(1, page)] |
||||
|
||||
def get_Img(params): |
||||
params[1]['imgUrl']='https://i.pximg.net/img-original/img/'+params[1]['url'][-46:-15] |
||||
|
||||
headers['referer']='https://www.pixiv.net/member_illust.php?mode=medium&illust_id='+params[1]['illustId'] |
||||
|
||||
suffix=".jpg" |
||||
logger.info('开始下载图片:%s%s' % (params[1]['imgUrl'], suffix)) |
||||
|
||||
s=requests.get(params[1]['imgUrl']+suffix, headers=headers, cookies=get_cookies()) |
||||
if(s.status_code==404): |
||||
suffix='.png' |
||||
s=requests.get(params[1]['imgUrl']+suffix,headers=headers, cookies=get_cookies()) |
||||
if(s.status_code==404): |
||||
logger.error('无法下载图片:%s' % (params[1]['illustTitle'])) |
||||
return |
||||
|
||||
logger.info('下载图片:"%s"到%s'%(params[1]['illustTitle'],os.getcwd().replace('\\','/')+'/'+imgPath+params[1]['illustId']+suffix)) |
||||
f = open(imgPath+params[1]['illustId']+suffix, 'wb') #写入多媒体文件要 b 这个参数 |
||||
f.write(s.content) #多媒体文件要是用conctent |
||||
f.close() |
||||
|
||||
params[1]['localName']=params[1]['illustId']+suffix |
||||
logger.info('排行第%d名,收藏数%d,标题:%s,标签:%s,(%s)前投稿,链接:%s'%(params[0],params[1]['bookmarkCount'],params[1]['illustTitle'], ','.join(params[1]['tags']),'',params[1]['imgUrl'])) |
||||
|
||||
def zip(inputFile,outFile): |
||||
f = zipfile.ZipFile(outFile,'w',zipfile.ZIP_DEFLATED) |
||||
for dirpath, dirnames, filenames in os.walk(inputFile): |
||||
for filename in filenames: |
||||
f.write(os.path.join(dirpath,filename),filename) |
||||
f.close() |
||||
|
||||
fsize='' |
||||
def get_nodes(param): |
||||
global nodes,fsize,imgPath |
||||
nodes=[] |
||||
start = datetime.now() |
||||
urls = get_urls(param[1], int(param[2])+1) |
||||
logger.info('开始从P站获取图片数据') |
||||
with futures.ThreadPoolExecutor(32) as executor: |
||||
executor.map(crawl, urls) |
||||
|
||||
# for url in urls: |
||||
# crawl(url) |
||||
|
||||
length=len(nodes) |
||||
logger.info('获取到%d张图片'%(length)) |
||||
logger.info('对图片收藏数进行排序') |
||||
nodes=sorted(nodes, key=lambda v: v.get('bookmarkCount'), reverse=True)[:int(param[3])] # 按star数降序排序 |
||||
if(param[4]!=None and param[4]=='img'): |
||||
imgPath='PixivSearch/static/images/' |
||||
for file in os.listdir(imgPath): |
||||
os.remove(imgPath+file) |
||||
nodes_tup=[] |
||||
start_d=datetime.now() |
||||
for index,img in enumerate(nodes): |
||||
nodes_tup.append((index+1,img)) |
||||
# get_Img((index+1,img)) |
||||
|
||||
with futures.ThreadPoolExecutor(32) as executor: |
||||
executor.map(get_Img, nodes_tup) |
||||
print('下载图片花费时间:%s' % (datetime.now() - start_d)) |
||||
logger.info('%s张图片下载完毕'%(len(os.listdir(imgPath)))) |
||||
|
||||
zipPath='PixivSearch/static/download/' + param[1] + '.zip' |
||||
logger.info('图片打包到:%s' % (zipPath)) |
||||
zip(imgPath,zipPath) |
||||
fsize = str(round(os.path.getsize(zipPath)/float(1024*1024),2))+'MB' |
||||
logger.info('图包大小:%s'%(fsize)) |
||||
|
||||
tip='从%d张图片中筛选出收藏数前%s的图片,处理耗时:%s'%(length,param[3],datetime.now()-start) |
||||
logger.info(tip) |
||||
return [nodes,tip,fsize] |
||||
|
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
if (len(sys.argv))==5 and sys.argv[2].isdigit() and sys.argv[3].isdigit(): |
||||
try: |
||||
get_nodes(sys.argv) |
||||
except BaseException as e: |
||||
repr(e) |
||||
|
||||
else : |
||||
logger.error('参数不合法') |
@ -0,0 +1,124 @@ |
||||
""" |
||||
Django settings for PixivSearch project. |
||||
|
||||
Generated by 'django-admin startproject' using Django 1.11.7. |
||||
|
||||
For more information on this file, see |
||||
https://docs.djangoproject.com/en/1.11/topics/settings/ |
||||
|
||||
For the full list of settings and their values, see |
||||
https://docs.djangoproject.com/en/1.11/ref/settings/ |
||||
""" |
||||
|
||||
import os |
||||
|
||||
# Build paths inside the project like this: os.path.join(BASE_DIR, ...) |
||||
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
||||
|
||||
|
||||
# Quick-start development settings - unsuitable for production |
||||
# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ |
||||
|
||||
# SECURITY WARNING: keep the secret key used in production secret! |
||||
SECRET_KEY = 'dh3^+=iugoo*+p_ea4u3dh&b!_zlgs8*m9kc+#*f2eozglsqjh' |
||||
|
||||
# SECURITY WARNING: don't run with debug turned on in production! |
||||
DEBUG = True |
||||
|
||||
ALLOWED_HOSTS = ['*'] |
||||
|
||||
|
||||
# Application definition |
||||
|
||||
INSTALLED_APPS = [ |
||||
'django.contrib.admin', |
||||
'django.contrib.auth', |
||||
'django.contrib.contenttypes', |
||||
'django.contrib.sessions', |
||||
'django.contrib.messages', |
||||
'django.contrib.staticfiles', |
||||
'PixivSearch.module', |
||||
] |
||||
|
||||
MIDDLEWARE = [ |
||||
'django.middleware.security.SecurityMiddleware', |
||||
'django.contrib.sessions.middleware.SessionMiddleware', |
||||
'django.middleware.common.CommonMiddleware', |
||||
'django.middleware.csrf.CsrfViewMiddleware', |
||||
'django.contrib.auth.middleware.AuthenticationMiddleware', |
||||
'django.contrib.messages.middleware.MessageMiddleware', |
||||
'django.middleware.clickjacking.XFrameOptionsMiddleware', |
||||
] |
||||
|
||||
ROOT_URLCONF = 'PixivSearch.urls' |
||||
|
||||
TEMPLATES = [ |
||||
{ |
||||
'BACKEND': 'django.template.backends.django.DjangoTemplates', |
||||
'DIRS': [os.path.join(os.path.dirname(__file__), 'templates').replace('\\','/')], |
||||
'APP_DIRS': True, |
||||
'OPTIONS': { |
||||
'context_processors': [ |
||||
'django.template.context_processors.debug', |
||||
'django.template.context_processors.request', |
||||
'django.contrib.auth.context_processors.auth', |
||||
'django.contrib.messages.context_processors.messages', |
||||
], |
||||
}, |
||||
}, |
||||
] |
||||
|
||||
WSGI_APPLICATION = 'PixivSearch.wsgi.application' |
||||
|
||||
|
||||
# Database |
||||
# https://docs.djangoproject.com/en/1.11/ref/settings/#databases |
||||
|
||||
DATABASES = { |
||||
'default': { |
||||
'ENGINE': 'django.db.backends.sqlite3', |
||||
'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), |
||||
} |
||||
} |
||||
|
||||
|
||||
# Password validation |
||||
# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators |
||||
|
||||
AUTH_PASSWORD_VALIDATORS = [ |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', |
||||
}, |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', |
||||
}, |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', |
||||
}, |
||||
{ |
||||
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', |
||||
}, |
||||
] |
||||
|
||||
|
||||
# Internationalization |
||||
# https://docs.djangoproject.com/en/1.11/topics/i18n/ |
||||
|
||||
LANGUAGE_CODE = 'en-us' |
||||
|
||||
TIME_ZONE = 'UTC' |
||||
|
||||
USE_I18N = True |
||||
|
||||
USE_L10N = True |
||||
|
||||
USE_TZ = True |
||||
|
||||
|
||||
# Static files (CSS, JavaScript, Images) |
||||
# https://docs.djangoproject.com/en/1.11/howto/static-files/ |
||||
|
||||
STATIC_URL = '/static/' |
||||
STATICFILES_DIRS = [ |
||||
os.path.join(os.path.dirname(__file__), 'static').replace('\\', '/'), |
||||
] |
File diff suppressed because one or more lines are too long
@ -0,0 +1,63 @@ |
||||
<html xmlns="http://www.w3.org/1999/html"> |
||||
<head> |
||||
<style> |
||||
div{ |
||||
text-align:center; |
||||
} |
||||
ol{list-style-type:demical;} |
||||
ol li{ list-style-position:outside;} |
||||
</style> |
||||
{% load staticfiles %} |
||||
<script src="{% static "js/jquery-3.2.1.min.js"%}"></script> |
||||
<script> |
||||
$(function () { |
||||
|
||||
}); |
||||
|
||||
function check() { |
||||
$("[name=pageSize],[name=order]").each(function () { |
||||
if($(this).val()!=''&&$(this).val()<=0){ |
||||
$(this).val(''); |
||||
alert($(this).parent().text()+'不能小于等于0'); |
||||
$(this).focus(); |
||||
} |
||||
}); |
||||
} |
||||
</script> |
||||
</head> |
||||
<body> |
||||
<form action="/pixiv/search" method="post" onsubmit="check()"> |
||||
{% csrf_token %} |
||||
<label>关键字<input name="word" placeholder="R-18"/></label> |
||||
<label>页数<input name="pageSize" type="number" placeholder="10"/></label> |
||||
<label>排行数<input name="order" type="number" placeholder="10"/></label> |
||||
<label>结果显示类型:文本信息<input type="radio" value="" name="type" checked/></label> |
||||
<label>图文信息<input type="radio" value="img" name="type"/></label> |
||||
|
||||
<input type="submit" /> |
||||
{% if download %} |
||||
<a href="{% static "/download/" %}{{download}}">图包提取(文件大小:{{ size}})</a> |
||||
{# <button id="download" onclick="window.open('{% static "/download/" %}{{download}}')">图包提取(文件大小:{{ size}})</button>#} |
||||
{% endif %} |
||||
</form> |
||||
|
||||
<div> |
||||
<h1>{{ msg}}</h1> |
||||
</div> |
||||
<ol> |
||||
{% for imageNode in imageNodes %} |
||||
<li> |
||||
<div> |
||||
<h1>标题:<a href="https://www.pixiv.net/member_illust.php?mode=medium&illust_id={{imageNode.illustId}}" target="_blank">{{imageNode.illustTitle}}</a>,画师:<a href="https://www.pixiv.net/member.php?id={{ imageNode.userId}}">{{imageNode.userName}}</a>,收藏数:{{imageNode.bookmarkCount}}</h1> |
||||
{% if imageNode.localName %} |
||||
<img src="{% static "images/"%}{{imageNode.localName}}"> |
||||
{% endif %} |
||||
</div> |
||||
</li> |
||||
{% empty %} |
||||
<p>{{tip}}</p> |
||||
{% endfor %} |
||||
|
||||
</ol> |
||||
</body> |
||||
</html> |
@ -0,0 +1,24 @@ |
||||
"""PixivSearch URL Configuration |
||||
|
||||
The `urlpatterns` list routes URLs to views. For more information please see: |
||||
https://docs.djangoproject.com/en/1.11/topics/http/urls/ |
||||
Examples: |
||||
Function views |
||||
1. Add an import: from my_app import views |
||||
2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') |
||||
Class-based views |
||||
1. Add an import: from other_app.views import Home |
||||
2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') |
||||
Including another URLconf |
||||
1. Import the include() function: from django.conf.urls import url, include |
||||
2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) |
||||
""" |
||||
from django.conf.urls import url |
||||
from PixivSearch.view import search, index, download, haha |
||||
|
||||
urlpatterns = [ |
||||
url(r'^$', index), |
||||
url(r'^pixiv/search', search), |
||||
url(r'^pixiv/download', download), |
||||
url(r'^tsdm',haha) |
||||
] |
@ -0,0 +1,72 @@ |
||||
# coding=utf-8 |
||||
import os |
||||
|
||||
from django.http import Http404, StreamingHttpResponse, HttpResponse |
||||
from django.shortcuts import render |
||||
|
||||
from PixivSearch import pixiv |
||||
from PixivSearch.module import config |
||||
from PixivSearch.pixiv import get_nodes |
||||
|
||||
logger = pixiv.logging.getLogger('file') |
||||
|
||||
|
||||
def search(request): |
||||
if request.POST.get('word') != '': |
||||
word = request.POST.get('word') |
||||
else: |
||||
word = 'R-18' |
||||
if request.POST.get('pageSize') != '': |
||||
pageSize = request.POST.get('pageSize') |
||||
else: |
||||
pageSize = 10 |
||||
if request.POST.get('order') != '': |
||||
order = request.POST.get('order') |
||||
else: |
||||
order = 10 |
||||
if request.POST.get('type') != '': |
||||
type = request.POST.get('type') |
||||
else: |
||||
type = None |
||||
try: |
||||
logger.info("word:%s,pageSize:%d,order:%d,type:%s", word, pageSize, order, type) |
||||
array = get_nodes([0, word, pageSize, order, type]) |
||||
if len(array[0]) > 0: |
||||
if request.POST.get('type') == 'img': |
||||
nodes = {'imageNodes': array[0], 'msg': array[1], 'download': word + '.zip', 'size': array[2]} |
||||
else: |
||||
nodes = {'imageNodes': array[0], 'msg': array[1]} |
||||
else: |
||||
nodes = {'tip': '没有返回结果'} |
||||
return render(request, 'test.html', nodes) |
||||
except ValueError: |
||||
raise Http404() |
||||
|
||||
|
||||
def index(request): |
||||
return render(request, 'test.html', {'tip': '输入参数进行搜索'}) |
||||
|
||||
|
||||
def download(request): |
||||
# do something... |
||||
def file_iterator(file_name, chunk_size=512): |
||||
with open(file_name) as f: |
||||
while True: |
||||
c = f.read(chunk_size) |
||||
if c: |
||||
yield c |
||||
else: |
||||
break |
||||
|
||||
fileName = os.path.join(os.path.dirname(__file__), 'static/download/').replace('\\', '/') + request.GET.get( |
||||
'fileName').encode('utf-8') |
||||
response = StreamingHttpResponse(file_iterator(fileName)) |
||||
response['Content-Type'] = 'application/octet-stream' |
||||
response['Content-Disposition'] = 'attachment;filename="{0}"'.format(request.GET.get('fileName').encode('utf-8')) |
||||
return response |
||||
|
||||
|
||||
def haha(request): |
||||
p=config.param(param_name='123',param_value='456') |
||||
p.save() |
||||
return HttpResponse("<html><body>%s</body></html>" % (config.param.param_name)) |
@ -0,0 +1,16 @@ |
||||
""" |
||||
WSGI config for PixivSearch project. |
||||
|
||||
It exposes the WSGI callable as a module-level variable named ``application``. |
||||
|
||||
For more information on this file, see |
||||
https://docs.djangoproject.com/en/1.11/howto/deployment/wsgi/ |
||||
""" |
||||
|
||||
import os |
||||
|
||||
from django.core.wsgi import get_wsgi_application |
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings") |
||||
|
||||
application = get_wsgi_application() |
@ -0,0 +1,22 @@ |
||||
#!/usr/bin/env python |
||||
import os |
||||
import sys |
||||
|
||||
if __name__ == "__main__": |
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "PixivSearch.settings") |
||||
try: |
||||
from django.core.management import execute_from_command_line |
||||
except ImportError: |
||||
# The above import may fail for some other reason. Ensure that the |
||||
# issue is really that Django is missing to avoid masking other |
||||
# exceptions on Python 2. |
||||
try: |
||||
import django |
||||
except ImportError: |
||||
raise ImportError( |
||||
"Couldn't import Django. Are you sure it's installed and " |
||||
"available on your PYTHONPATH environment variable? Did you " |
||||
"forget to activate a virtual environment?" |
||||
) |
||||
raise |
||||
execute_from_command_line(sys.argv) |
Loading…
Reference in new issue