账户有效性判断

master
pan 4 years ago
parent a64425575f
commit ceeba74df5
  1. 47
      main.py

@ -135,7 +135,7 @@ def parse(content):
return params_list return params_list
suffix = '77726476706e69737468656265737421f7b9569d2936695e790c88b8991b203a18454272' suffix = '77726476706e69737468656265737421e7e056d2303166567f068ea89941227bfcd3ca21bd0c'
base_url = f'https://libcon.bupt.edu.cn/http/{suffix}' base_url = f'https://libcon.bupt.edu.cn/http/{suffix}'
profession = "计算机软件与理论" profession = "计算机软件与理论"
keyword = f'(专业%3A"{profession}")' keyword = f'(专业%3A"{profession}")'
@ -351,31 +351,36 @@ def run(max=10, last_page=100, page_size=20):
**params) **params)
writeInfo(f'下载接口={url}') writeInfo(f'下载接口={url}')
res = session.get(url, headers=headers) res = session.get(url, headers=headers)
if check(res) and 'downloadliterature.do' in res.url: if check(res):
res_html = BeautifulSoup(res.content, "html.parser") res_html = BeautifulSoup(res.content, "html.parser")
downloadIframe = res_html.select_one('#downloadIframe') if 'downloadliterature.do' in res.url:
if downloadIframe: downloadIframe = res_html.select_one('#downloadIframe')
res = session.get(downloadIframe["src"]) if downloadIframe:
if check(res) and 'download.ashx' in res.url: res = session.get(downloadIframe["src"])
writeInfo("成功获取真实下载地址={path}".format(path=res.url)) if check(res) and 'download.ashx' in res.url:
res = session.get(res.url, headers=headers, stream=True) writeInfo("成功获取真实下载地址={path}".format(path=res.url))
if check(res) and 'pdf' in res.headers['Content-Type']: res = session.get(res.url, headers=headers, stream=True)
des = res.headers['Content-Disposition'].split(';') if check(res) and 'pdf' in res.headers['Content-Type']:
if len(des) == 2 and len(des[1].split('=')) == 2: des = res.headers['Content-Disposition'].split(';')
count = count + 1 if len(des) == 2 and len(des[1].split('=')) == 2:
writeInfo(f'当前采集进度{count}/{max},{round(count / max, 4) * 100}%') count = count + 1
if count <= max: writeInfo(f'当前采集进度{count}/{max},{round(count / max, 4) * 100}%')
save(des, res, params) if count <= max:
if count == max: save(des, res, params)
break if count == max:
break
else:
writeError("非法响应类型")
else: else:
writeError("非法响应类型") writeError("无法获取文档信息")
else: else:
writeError("无法获取文档信息") writeError("无法获取文档真实下载地址")
else: else:
writeError("无法获取文档真实下载地址") writeError("无法获取真实下载地址")
elif res_html.select_one('title').text == '交易':
raise Exception(res_html.select_one('div.NotWork>span').text)
else: else:
writeError("无法获取真实下载地址") raise Exception('发生未知错误!!!')
else: else:
writeError('error code={code}'.format(code=res.status_code)) writeError('error code={code}'.format(code=res.status_code))
break break

Loading…
Cancel
Save