|
|
|
@ -135,7 +135,7 @@ def parse(content): |
|
|
|
|
return params_list |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
suffix = '77726476706e69737468656265737421f7b9569d2936695e790c88b8991b203a18454272' |
|
|
|
|
suffix = '77726476706e69737468656265737421e7e056d2303166567f068ea89941227bfcd3ca21bd0c' |
|
|
|
|
base_url = f'https://libcon.bupt.edu.cn/http/{suffix}' |
|
|
|
|
profession = "计算机软件与理论" |
|
|
|
|
keyword = f'(专业%3A"{profession}")' |
|
|
|
@ -351,8 +351,9 @@ def run(max=10, last_page=100, page_size=20): |
|
|
|
|
**params) |
|
|
|
|
writeInfo(f'下载接口={url}') |
|
|
|
|
res = session.get(url, headers=headers) |
|
|
|
|
if check(res) and 'downloadliterature.do' in res.url: |
|
|
|
|
if check(res): |
|
|
|
|
res_html = BeautifulSoup(res.content, "html.parser") |
|
|
|
|
if 'downloadliterature.do' in res.url: |
|
|
|
|
downloadIframe = res_html.select_one('#downloadIframe') |
|
|
|
|
if downloadIframe: |
|
|
|
|
res = session.get(downloadIframe["src"]) |
|
|
|
@ -376,6 +377,10 @@ def run(max=10, last_page=100, page_size=20): |
|
|
|
|
writeError("无法获取文档真实下载地址") |
|
|
|
|
else: |
|
|
|
|
writeError("无法获取真实下载地址") |
|
|
|
|
elif res_html.select_one('title').text == '交易': |
|
|
|
|
raise Exception(res_html.select_one('div.NotWork>span').text) |
|
|
|
|
else: |
|
|
|
|
raise Exception('发生未知错误!!!') |
|
|
|
|
else: |
|
|
|
|
writeError('error code={code}'.format(code=res.status_code)) |
|
|
|
|
break |
|
|
|
|