import time import math import requests import json from bs4 import BeautifulSoup # Create a new instance of the Firefox driver from selenium.webdriver.chrome import webdriver driver = webdriver.WebDriver( executable_path="D:\JetBrains\PycharmProjects\python_requirements_summary\chromedriver.exe") start_url = 'https://mi.aliyun.com/?spm=5176.8070854.az_625vhq.1.22242c4fIPqBfQ' def getReq(domain): try: return requests.post('http://icp.chinaz.com/hklt.net', {"type": "host", "s": domain, "guid": "%24guid"}, timeout=10) except: time.sleep(3) return getReq(domain) driver.get(start_url) token = driver.execute_script('return window.umToken') driver.quit() pageSize = 50 current_page = 1 last_page = None while last_page is None or current_page <= last_page: session = requests.Session() url = 'https://domainapi.aliyun.com/onsale/search?fetchSearchTotal=true&token={0}¤tPage={1}&pageSize={2}&productType=2&minPrice=0&maxPrice=50&searchIntro=false&keywordAsPrefix=false&keywordAsSuffix=false&exKeywordAsPrefix=false&exKeywordAsSuffix=false&exKeywordAsPrefix2=false&exKeywordAsSuffix2=false'.format(token, current_page, pageSize) res = session.get(url, headers={ 'Referer': 'https://mi.aliyun.com/?spm=5176.8070854.az_625vhq.1.22242c4fIPqBfQ', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36', 'Sec-Fetch-Mode': 'no-cors' }) res_json = json.loads(res.content) if 'data' in res_json: if last_page is None and 'searchTotal' in res_json['data']: last_page = min(100,math.ceil(res_json['data']['searchTotal'] / pageSize)) if 'pageResult' in res_json['data'] and 'data' in res_json['data']['pageResult']: for data in res_json['data']['pageResult']['data']: domain = data['domainName'] price = data['price'] res = getReq(domain) if res.status_code == 200: res_html = BeautifulSoup(res.content, 'html.parser') if res_html.select_one("p[class='tc col-red fz18 YaHei pb20']") is None: print('{0}已备案,价格{1}'.format(domain, price)) else: print('{0}未备案'.format(domain)) else: print('{0}备案信息获取失败'.format(domain)) else: print(res_json) current_page = current_page + 1