You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
48 lines
1.0 KiB
48 lines
1.0 KiB
5 years ago
|
# 手机爬虫
|
||
|
import gzip
|
||
|
import zlib
|
||
|
|
||
|
|
||
|
class MobilePhoneCrawler():
|
||
|
def __init__(self) -> None:
|
||
|
super().__init__()
|
||
|
# 限制5000条数据
|
||
|
self._max_count = 5000
|
||
|
# 手机列表数据
|
||
|
self._mobile_list = []
|
||
|
|
||
|
@property
|
||
|
def max_count(self):
|
||
|
return self._max_count
|
||
|
|
||
|
@max_count.setter
|
||
|
def max_count(self, value):
|
||
|
self._max_count = value
|
||
|
|
||
|
@property
|
||
|
def mobile_list(self):
|
||
|
return self._mobile_list
|
||
|
|
||
|
# 获取列表数据
|
||
|
def get_page(self):
|
||
|
pass
|
||
|
|
||
|
# 获取手机详情数据
|
||
|
def get_mobile(self, base_url,param_url,**kwargs):
|
||
|
pass
|
||
|
|
||
|
# 保存手机数据
|
||
|
def save_mobile(self, mobile):
|
||
|
pass
|
||
|
|
||
|
def get_req(self,url,**kwargs):
|
||
|
pass
|
||
|
|
||
|
# 解压网页数据
|
||
|
def uzipData(self, data):
|
||
|
if data.startswith(b'\x1f\x8b'):
|
||
|
return gzip.decompress(data)
|
||
|
elif data.startswith(b'\xec\xbd'):
|
||
|
return zlib.decompress(data, -zlib.MAX_WBITS)
|
||
|
else:
|
||
|
return data
|