阿里云智能云相册

7 years ago · a777f835ec
parent a8962d052d
commit a777f835ec
1 changed files with 112 additions and 0 deletions
--- a/PixivSearch/baidu/downLoadBduImg.py
+++ b/PixivSearch/baidu/downLoadBduImg.py
@ -0,0 +1,112 @@
+import json
+import os
+import re
+from concurrent import futures
+from datetime import datetime
+
+import requests
+
+
+def get_cookies():
+    _cookies = {}
+    array = "BDqhfp=fate%26%26NaN%26%260%26%261; BIDUPSID=8689C23BFD1526702A4EF173F3A809DD; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BAIDUID=8689C23BFD152670722FAAEB4DDC55FA:FG=1; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm".split(
+        ';')
+    for row in array:
+        k, v = row.strip().split('=', 1)
+        _cookies[k] = v
+    return _cookies
+
+
+# 图片保存路径
+savePath = None
+threadNum = 10
+startTime = None
+
+
+def getBaiduImage(word):
+    global startTime, savePath
+    params = []
+    startTime = datetime.now()
+    start = threadNum
+    i = 0
+    filepath = None
+    savePath = r'{savePath}/{word}'.format(savePath=savePath, word=word)
+    if not os.path.exists(savePath):
+        os.makedirs(savePath)
+    while True:
+        try:
+            url = r"https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={" \
+                  r"queryWord}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&word={" \
+                  r"word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&pn={pn}&rn={rn}&gsm=3c&1523890541764= "
+            url = url.format(queryWord=word, word=word, pn=start, rn=threadNum)
+            print('request url:%s' % url)
+            req = requests.get(url)
+            if req.status_code == 200:
+                req.encoding = 'utf-8'
+                obj = json.loads(req.text.replace('\\\'', ''))
+                if len(obj['data']) == 1:
+                    break
+                for img in obj['data']:
+                    if 'fromPageTitle' in img:
+                        print('图片:%s\t添加到下载队列' % img['fromPageTitleEnc'])
+                        if 'replaceUrl' in img:
+                            url = img['replaceUrl'][0]['ObjURL']
+                            params.append((url, i))
+                            i += 1
+                            if not filepath is None and os.path.exists(filepath):
+                                os.remove(filepath)
+                            filepath = r'{savePath}/图片下载队列填充：{i}'.format(savePath=savePath, word=word, i=i)
+                            file = open(filepath, 'w')
+                            file.close()
+                start += threadNum
+        except BaseException as e:
+            print(repr(e))
+    if not filepath is None and os.path.exists(filepath):
+        os.remove(filepath)
+    executors = futures.ThreadPoolExecutor(threadNum)
+    try:
+        with executors as executor:
+            executor.map(downImage, params)
+    except BaseException as e:
+        print(repr(e))
+
+
+def downImage(params):
+    try:
+        url = params[0]
+        index = params[1]
+        print(r'开始下载图片{url}'.format(url=url))
+        imgurl = requests.get(url, headers={"Referer": "image.baidu.com"})
+        if imgurl.status_code == 200:
+            format = url[-url[::-1].index('.'):]
+            imgPath = r'{savePath}/fate_{index}.{format}'.format(savePath=savePath,
+                                                                 index=index,
+                                                                 format=format)
+            f = open(imgPath, 'wb')
+            f.write(imgurl.content)
+            f.close()
+            print(r'图片{url}成功下载到{imgPath}'.format(url=url, imgPath=imgPath))
+    except BaseException as e:
+        print(repr(e))
+
+
+if __name__ == '__main__':
+    str = ''
+    while True:
+        str = input('输入要下载图片的关键字，输入 exit  退出程序\n')
+        if not str == 'exit':
+            while True:
+                savePath = input('输入图片存放目录：例如 E:/图片,注意目录之间使用正斜杠隔开"/"\n')
+                if re.fullmatch(r"[a-zA-z]:(/[\u4e00-\u9fa5_a-zA-Z0-9]+)+", savePath) is None:
+                    print(r'图片目录{savePath}不合法请重新输入'.format(savePath=savePath))
+                else:
+                    break
+            getBaiduImage(str)
+            print(r'使用{threadNum}线程成功下载{count}张图片到目录{path}下，耗时：{second}'.format(threadNum=threadNum,
+                                                                                count=len(os.listdir(savePath)),
+                                                                                path=savePath,
+                                                                                second=datetime.now() - startTime))
+            flag = False
+        else:
+            print('exit')
+            break