账户有效性判断

4 years ago · ceeba74df5
parent a64425575f
commit ceeba74df5
1 changed files with 26 additions and 21 deletions
--- a/main.py
+++ b/main.py
@ -135,7 +135,7 @@ def parse(content):
    return params_list


-suffix = '77726476706e69737468656265737421f7b9569d2936695e790c88b8991b203a18454272'
+suffix = '77726476706e69737468656265737421e7e056d2303166567f068ea89941227bfcd3ca21bd0c'
 base_url = f'https://libcon.bupt.edu.cn/http/{suffix}'
 profession = "计算机软件与理论"
 keyword = f'(专业%3A"{profession}")'
@ -351,31 +351,36 @@ def run(max=10, last_page=100, page_size=20):
                    **params)
                writeInfo(f'下载接口={url}')
                res = session.get(url, headers=headers)
-                if check(res) and 'downloadliterature.do' in res.url:
+                if check(res):
                    res_html = BeautifulSoup(res.content, "html.parser")
-                    downloadIframe = res_html.select_one('#downloadIframe')
-                    if downloadIframe:
-                        res = session.get(downloadIframe["src"])
-                        if check(res) and 'download.ashx' in res.url:
-                            writeInfo("成功获取真实下载地址={path}".format(path=res.url))
-                            res = session.get(res.url, headers=headers, stream=True)
-                            if check(res) and 'pdf' in res.headers['Content-Type']:
-                                des = res.headers['Content-Disposition'].split(';')
-                                if len(des) == 2 and len(des[1].split('=')) == 2:
-                                    count = count + 1
-                                    writeInfo(f'当前采集进度{count}/{max},{round(count / max, 4) * 100}%')
-                                    if count <= max:
-                                        save(des, res, params)
-                                        if count == max:
-                                            break
+                    if 'downloadliterature.do' in res.url:
+                        downloadIframe = res_html.select_one('#downloadIframe')
+                        if downloadIframe:
+                            res = session.get(downloadIframe["src"])
+                            if check(res) and 'download.ashx' in res.url:
+                                writeInfo("成功获取真实下载地址={path}".format(path=res.url))
+                                res = session.get(res.url, headers=headers, stream=True)
+                                if check(res) and 'pdf' in res.headers['Content-Type']:
+                                    des = res.headers['Content-Disposition'].split(';')
+                                    if len(des) == 2 and len(des[1].split('=')) == 2:
+                                        count = count + 1
+                                        writeInfo(f'当前采集进度{count}/{max},{round(count / max, 4) * 100}%')
+                                        if count <= max:
+                                            save(des, res, params)
+                                            if count == max:
+                                                break
+                                    else:
+                                        writeError("非法响应类型")
                                else:
-                                    writeError("非法响应类型")
+                                    writeError("无法获取文档信息")
                            else:
-                                writeError("无法获取文档信息")
+                                writeError("无法获取文档真实下载地址")
                        else:
-                            writeError("无法获取文档真实下载地址")
+                            writeError("无法获取真实下载地址")
+                    elif res_html.select_one('title').text == '交易':
+                        raise Exception(res_html.select_one('div.NotWork>span').text)
                    else:
-                        writeError("无法获取真实下载地址")
+                        raise Exception('发生未知错误！！！')
                else:
                    writeError('error code={code}'.format(code=res.status_code))
            break