parent
a8962d052d
commit
a777f835ec
@ -0,0 +1,112 @@ |
||||
import json |
||||
import os |
||||
import re |
||||
from concurrent import futures |
||||
from datetime import datetime |
||||
|
||||
import requests |
||||
|
||||
|
||||
def get_cookies(): |
||||
_cookies = {} |
||||
array = "BDqhfp=fate%26%26NaN%26%260%26%261; BIDUPSID=8689C23BFD1526702A4EF173F3A809DD; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BAIDUID=8689C23BFD152670722FAAEB4DDC55FA:FG=1; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm".split( |
||||
';') |
||||
for row in array: |
||||
k, v = row.strip().split('=', 1) |
||||
_cookies[k] = v |
||||
return _cookies |
||||
|
||||
|
||||
# 图片保存路径 |
||||
savePath = None |
||||
threadNum = 10 |
||||
startTime = None |
||||
|
||||
|
||||
def getBaiduImage(word): |
||||
global startTime, savePath |
||||
params = [] |
||||
startTime = datetime.now() |
||||
start = threadNum |
||||
i = 0 |
||||
filepath = None |
||||
savePath = r'{savePath}/{word}'.format(savePath=savePath, word=word) |
||||
if not os.path.exists(savePath): |
||||
os.makedirs(savePath) |
||||
while True: |
||||
try: |
||||
url = r"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={" \ |
||||
r"queryWord}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word={" \ |
||||
r"word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&pn={pn}&rn={rn}&gsm=3c&1523890541764= " |
||||
url = url.format(queryWord=word, word=word, pn=start, rn=threadNum) |
||||
print('request url:%s' % url) |
||||
req = requests.get(url) |
||||
if req.status_code == 200: |
||||
req.encoding = 'utf-8' |
||||
obj = json.loads(req.text.replace('\\\'', '')) |
||||
if len(obj['data']) == 1: |
||||
break |
||||
for img in obj['data']: |
||||
if 'fromPageTitle' in img: |
||||
print('图片:%s\t添加到下载队列' % img['fromPageTitleEnc']) |
||||
if 'replaceUrl' in img: |
||||
url = img['replaceUrl'][0]['ObjURL'] |
||||
params.append((url, i)) |
||||
i += 1 |
||||
if not filepath is None and os.path.exists(filepath): |
||||
os.remove(filepath) |
||||
filepath = r'{savePath}/图片下载队列填充:{i}'.format(savePath=savePath, word=word, i=i) |
||||
file = open(filepath, 'w') |
||||
file.close() |
||||
start += threadNum |
||||
except BaseException as e: |
||||
print(repr(e)) |
||||
if not filepath is None and os.path.exists(filepath): |
||||
os.remove(filepath) |
||||
executors = futures.ThreadPoolExecutor(threadNum) |
||||
try: |
||||
with executors as executor: |
||||
executor.map(downImage, params) |
||||
except BaseException as e: |
||||
print(repr(e)) |
||||
|
||||
|
||||
def downImage(params): |
||||
try: |
||||
url = params[0] |
||||
index = params[1] |
||||
print(r'开始下载图片{url}'.format(url=url)) |
||||
imgurl = requests.get(url, headers={"Referer": "image.baidu.com"}) |
||||
if imgurl.status_code == 200: |
||||
format = url[-url[::-1].index('.'):] |
||||
imgPath = r'{savePath}/fate_{index}.{format}'.format(savePath=savePath, |
||||
index=index, |
||||
format=format) |
||||
f = open(imgPath, 'wb') |
||||
f.write(imgurl.content) |
||||
f.close() |
||||
print(r'图片{url}成功下载到{imgPath}'.format(url=url, imgPath=imgPath)) |
||||
except BaseException as e: |
||||
print(repr(e)) |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
str = '' |
||||
while True: |
||||
str = input('输入要下载图片的关键字,输入 exit 退出程序\n') |
||||
if not str == 'exit': |
||||
while True: |
||||
savePath = input('输入图片存放目录:例如 E:/图片,注意目录之间使用正斜杠隔开"/"\n') |
||||
if re.fullmatch(r"[a-zA-z]:(/[\u4e00-\u9fa5_a-zA-Z0-9]+)+", savePath) is None: |
||||
print(r'图片目录{savePath}不合法请重新输入'.format(savePath=savePath)) |
||||
else: |
||||
break |
||||
getBaiduImage(str) |
||||
print(r'使用{threadNum}线程成功下载{count}张图片到目录{path}下,耗时:{second}'.format(threadNum=threadNum, |
||||
count=len(os.listdir(savePath)), |
||||
path=savePath, |
||||
second=datetime.now() - startTime)) |
||||
flag = False |
||||
else: |
||||
print('exit') |
||||
break |
Loading…
Reference in new issue