bilibili动态爬虫

master
pan 4 years ago
parent 873dadc628
commit 06108b8b6b
  1. 1
      .gitignore
  2. 1
      bilibili/.gitignore
  3. 35
      bilibili/space.py
  4. 52
      domain/checkDomain.py
  5. 4
      domaintest.py
  6. 15
      requirement.txt
  7. 3
      test.py

1
.gitignore vendored

@ -206,3 +206,4 @@ fabric.properties
*.xlsx
/log/
/Scripts/

@ -0,0 +1 @@
*.json

@ -0,0 +1,35 @@
import json
import time
import requests
host_uid = 207539637
cards = []
offset_dynamic_id = 0
while True:
url = f"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history?visitor_uid=0&host_uid={host_uid}&offset_dynamic_id={offset_dynamic_id}&need_top=1&platform=web"
print(f'url={url}')
res = requests.get(url)
if res.status_code == 200:
res_json = json.loads(res.content)
print('解析成功')
try:
if res_json["data"] and res_json["data"]["cards"]:
for card in res_json["data"]["cards"]:
if card["card"]:
card_json = json.loads(card["card"])
if "item" in card_json and "description" in card_json["item"] and "弹幕彩蛋" in card_json["item"][
"description"]:
print(card_json["item"]["description"])
cards.append(card_json["item"]["description"])
if res_json["data"]["cards"][-1]["desc"] and res_json["data"]["cards"][-1]["desc"]["dynamic_id_str"]:
offset_dynamic_id = res_json["data"]["cards"][-1]["desc"]["dynamic_id_str"]
time.sleep(3)
print('添加动态')
except KeyError as e:
break
else:
raise Exception(f"status_code={res.status_code}")
with open(file='test.json', mode='w', encoding='UTF-8') as f:
f.write(str(cards))

@ -1,3 +1,4 @@
from ntpath import join
import os
import string
import time
@ -23,28 +24,21 @@ def colname_to_num(colname):
power *= len(string.ascii_lowercase)
return col
return col-1
def toCode(x):
return chr(x+ord('a'))
def column_to_name(colnum):
if type(colnum) is not int:
return colnum
def getKey(index):
colCode = ''
key = 'a'
loop = index // len(string.ascii_uppercase);
if(loop>0):
colCode += getKey(loop-1);
key = chr(ord(key)+index%len(string.ascii_uppercase))
colCode += key
return colCode
str = ''
str_l = len(string.ascii_lowercase)
while (not (colnum // str_l == 0 and colnum % str_l == 0)):
temp = str_l - 1
if (colnum % str_l == 0):
str += chr(temp + ord('a'))
else:
str += chr(colnum % str_l - 1 + ord('a'))
colnum //= str_l
# 倒序输出拼写的字符串
return str[::-1]
def aliyun(domain: str, error_file: str, datas: list):
@ -93,13 +87,13 @@ def check(top_domain: list, start: str, end: str, supportMany: bool, todo):
ws.cell(1, 2, '价格')
if os.path.exists(error_file):
os.remove(error_file)
for i in range(colname_to_num(start), colname_to_num(end)+1):
for i in range(colname_to_num(start), colname_to_num(end) + 1):
datas = list()
if supportMany:
todo([f'{column_to_name(i)}.{x}' for x in top_domain], error_file, datas)
todo([f'{getKey(i)}.{x}' for x in top_domain], error_file, datas)
else:
for j in top_domain:
domain = f'{column_to_name(i)}.{j}'
domain = f'{getKey(i)}.{j}'
todo(domain, error_file, datas)
for index, data in enumerate(datas):
@ -111,26 +105,28 @@ def check(top_domain: list, start: str, end: str, supportMany: bool, todo):
def namesilo(domains: list, error_file: str, datas: list):
# 通过元组构造表单内容,解决参数名重名问题
data = tuple([('tlds[]', x.split('.')[1]) for x in domains] + [('domains[]', x) for x in domains])
# 使用MultipartEncoder把表单内容转换为multipart/form-data类型
m = MultipartEncoder(
fields=data)
res = requests.post('https://www.namesilo.com/public/api/domains/bulk-check', data=m,
headers={'Content-Type': m.content_type})
headers={'Content-Type': m.content_type},timeout=10)
if res.status_code == 200:
json_res = json.loads(res.content)
if 'result' in json_res and json_res['result'] == 'success' and 'data' in json_res and 'checkId' in json_res[
'data']:
checkId = json_res['data']['checkId']
url = f'https://www.namesilo.com/public/api/domains/results/{checkId}'
res = requests.get(url)
res = requests.get(url,timeout=10)
if res.status_code == 200:
json_res = json.loads(res.content)
for domain in json_res['data']['domains']:
d = domain['domain']
if domain['available']:
price = domain['currentPrice']
writeInfo(f'{d}可注册,价格{price}')
datas.append((d, f'{round(price, 2)}$'))
price = float(domain["currentPrice"])
writeInfo(f'{d}可注册,价格{price}$')
datas.append((d, price))
else:
writeInfo(f'{d}已注册')
@ -144,6 +140,4 @@ def namesilo(domains: list, error_file: str, datas: list):
f.write(f'{domain}\n')
raise Exception('请求异常')
# check(['top', 'fun', 'online', 'ink', 'ren', 'site', 'asia'], 'aaa', 'aag', False, aliyun)

@ -0,0 +1,4 @@
import string
from domain.checkDomain import check, colname_to_num, getKey, namesilo
check(['com', 'top', 'xyz','buzz'], 'a', 'zzzz', True, namesilo)

@ -1,14 +1,29 @@
beautifulsoup4==4.9.1
bs4==0.0.1
certifi==2019.11.28
chardet==3.0.4
deprecation==2.0.7
et-xmlfile==1.0.1
huaweicloud-sdk-python==1.0.21
idna==2.8
iso8601==0.1.12
keystoneauth1==3.4.0
lxml==4.5.2
openpyxl==3.0.7
packaging==20.1
pbr==5.4.4
peewee==3.13.3
pinger==0.1.3
pip==21.1.3
pyparsing==2.4.6
requests==2.22.0
requests-toolbelt==0.9.1
setuptools==57.1.0
six==1.14.0
soupsieve==2.0.1
stevedore==1.32.0
urllib3==1.25.8
xbox==0.1.3
xlrd==2.0.1
xlutils==2.0.0
xlwt==1.3.0

@ -1,3 +0,0 @@
from domain.checkDomain import check, namesilo
check(['icu', 'buzz', 'xyz'], 'aaa', 'zzz', True, namesilo)
Loading…
Cancel
Save