账户有效性判断

master
pan 4 years ago
parent a64425575f
commit ceeba74df5
  1. 47
      main.py

@ -135,7 +135,7 @@ def parse(content):
return params_list
suffix = '77726476706e69737468656265737421f7b9569d2936695e790c88b8991b203a18454272'
suffix = '77726476706e69737468656265737421e7e056d2303166567f068ea89941227bfcd3ca21bd0c'
base_url = f'https://libcon.bupt.edu.cn/http/{suffix}'
profession = "计算机软件与理论"
keyword = f'(专业%3A"{profession}")'
@ -351,31 +351,36 @@ def run(max=10, last_page=100, page_size=20):
**params)
writeInfo(f'下载接口={url}')
res = session.get(url, headers=headers)
if check(res) and 'downloadliterature.do' in res.url:
if check(res):
res_html = BeautifulSoup(res.content, "html.parser")
downloadIframe = res_html.select_one('#downloadIframe')
if downloadIframe:
res = session.get(downloadIframe["src"])
if check(res) and 'download.ashx' in res.url:
writeInfo("成功获取真实下载地址={path}".format(path=res.url))
res = session.get(res.url, headers=headers, stream=True)
if check(res) and 'pdf' in res.headers['Content-Type']:
des = res.headers['Content-Disposition'].split(';')
if len(des) == 2 and len(des[1].split('=')) == 2:
count = count + 1
writeInfo(f'当前采集进度{count}/{max},{round(count / max, 4) * 100}%')
if count <= max:
save(des, res, params)
if count == max:
break
if 'downloadliterature.do' in res.url:
downloadIframe = res_html.select_one('#downloadIframe')
if downloadIframe:
res = session.get(downloadIframe["src"])
if check(res) and 'download.ashx' in res.url:
writeInfo("成功获取真实下载地址={path}".format(path=res.url))
res = session.get(res.url, headers=headers, stream=True)
if check(res) and 'pdf' in res.headers['Content-Type']:
des = res.headers['Content-Disposition'].split(';')
if len(des) == 2 and len(des[1].split('=')) == 2:
count = count + 1
writeInfo(f'当前采集进度{count}/{max},{round(count / max, 4) * 100}%')
if count <= max:
save(des, res, params)
if count == max:
break
else:
writeError("非法响应类型")
else:
writeError("非法响应类型")
writeError("无法获取文档信息")
else:
writeError("无法获取文档信息")
writeError("无法获取文档真实下载地址")
else:
writeError("无法获取文档真实下载地址")
writeError("无法获取真实下载地址")
elif res_html.select_one('title').text == '交易':
raise Exception(res_html.select_one('div.NotWork>span').text)
else:
writeError("无法获取真实下载地址")
raise Exception('发生未知错误!!!')
else:
writeError('error code={code}'.format(code=res.status_code))
break

Loading…
Cancel
Save