From a777f835ec488b918dbaac92785bff03354a8140 Mon Sep 17 00:00:00 2001 From: 10295 <1029559041@qq.com> Date: Wed, 25 Apr 2018 00:10:12 +0800 Subject: [PATCH] =?UTF-8?q?=E9=98=BF=E9=87=8C=E4=BA=91=E6=99=BA=E8=83=BD?= =?UTF-8?q?=E4=BA=91=E7=9B=B8=E5=86=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- PixivSearch/baidu/downLoadBduImg.py | 112 ++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 PixivSearch/baidu/downLoadBduImg.py diff --git a/PixivSearch/baidu/downLoadBduImg.py b/PixivSearch/baidu/downLoadBduImg.py new file mode 100644 index 0000000..5085c44 --- /dev/null +++ b/PixivSearch/baidu/downLoadBduImg.py @@ -0,0 +1,112 @@ +import json +import os +import re +from concurrent import futures +from datetime import datetime + +import requests + + +def get_cookies(): + _cookies = {} + array = "BDqhfp=fate%26%26NaN%26%260%26%261; BIDUPSID=8689C23BFD1526702A4EF173F3A809DD; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BAIDUID=8689C23BFD152670722FAAEB4DDC55FA:FG=1; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm".split( + ';') + for row in array: + k, v = row.strip().split('=', 1) + _cookies[k] = v + return _cookies + + +# 图片保存路径 +savePath = None +threadNum = 10 +startTime = None + + +def getBaiduImage(word): + global startTime, savePath + params = [] + startTime = datetime.now() + start = threadNum + i = 0 + filepath = None + savePath = r'{savePath}/{word}'.format(savePath=savePath, word=word) + if not os.path.exists(savePath): + os.makedirs(savePath) + while True: + try: + url = r"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={" \ + r"queryWord}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word={" \ + r"word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&pn={pn}&rn={rn}&gsm=3c&1523890541764= " + url = url.format(queryWord=word, word=word, pn=start, rn=threadNum) + print('request url:%s' % url) + req = requests.get(url) + if req.status_code == 200: + req.encoding = 'utf-8' + obj = json.loads(req.text.replace('\\\'', '')) + if len(obj['data']) == 1: + break + for img in obj['data']: + if 'fromPageTitle' in img: + print('图片:%s\t添加到下载队列' % img['fromPageTitleEnc']) + if 'replaceUrl' in img: + url = img['replaceUrl'][0]['ObjURL'] + params.append((url, i)) + i += 1 + if not filepath is None and os.path.exists(filepath): + os.remove(filepath) + filepath = r'{savePath}/图片下载队列填充:{i}'.format(savePath=savePath, word=word, i=i) + file = open(filepath, 'w') + file.close() + start += threadNum + except BaseException as e: + print(repr(e)) + if not filepath is None and os.path.exists(filepath): + os.remove(filepath) + executors = futures.ThreadPoolExecutor(threadNum) + try: + with executors as executor: + executor.map(downImage, params) + except BaseException as e: + print(repr(e)) + + +def downImage(params): + try: + url = params[0] + index = params[1] + print(r'开始下载图片{url}'.format(url=url)) + imgurl = requests.get(url, headers={"Referer": "image.baidu.com"}) + if imgurl.status_code == 200: + format = url[-url[::-1].index('.'):] + imgPath = r'{savePath}/fate_{index}.{format}'.format(savePath=savePath, + index=index, + format=format) + f = open(imgPath, 'wb') + f.write(imgurl.content) + f.close() + print(r'图片{url}成功下载到{imgPath}'.format(url=url, imgPath=imgPath)) + except BaseException as e: + print(repr(e)) + + +if __name__ == '__main__': + str = '' + while True: + str = input('输入要下载图片的关键字,输入 exit 退出程序\n') + if not str == 'exit': + while True: + savePath = input('输入图片存放目录:例如 E:/图片,注意目录之间使用正斜杠隔开"/"\n') + if re.fullmatch(r"[a-zA-z]:(/[\u4e00-\u9fa5_a-zA-Z0-9]+)+", savePath) is None: + print(r'图片目录{savePath}不合法请重新输入'.format(savePath=savePath)) + else: + break + getBaiduImage(str) + print(r'使用{threadNum}线程成功下载{count}张图片到目录{path}下,耗时:{second}'.format(threadNum=threadNum, + count=len(os.listdir(savePath)), + path=savePath, + second=datetime.now() - startTime)) + flag = False + else: + print('exit') + break