diff --git a/Crawler.py b/Crawler.py index 4a17481..21400b6 100644 --- a/Crawler.py +++ b/Crawler.py @@ -3,13 +3,13 @@ import gzip import zlib -class MobilePhoneCrawler(): +class MobiePhoneCrawler(): def __init__(self) -> None: super().__init__() # 限制5000条数据 self._max_count = 5000 # 手机列表数据 - self._mobile_list = [] + self._mobie_list = [] @property def max_count(self): @@ -20,19 +20,19 @@ class MobilePhoneCrawler(): self._max_count = value @property - def mobile_list(self): - return self._mobile_list + def mobie_list(self): + return self._mobie_list # 获取列表数据 def get_page(self): pass # 获取手机详情数据 - def get_mobile(self, base_url,param_url,**kwargs): + def get_mobie(self, base_url, param_url, **kwargs): pass # 保存手机数据 - def save_mobile(self, mobile): + def save_mobie(self, mobie): pass def get_req(self,url,**kwargs): diff --git a/main.py b/main.py index 89325cd..ba74c7e 100644 --- a/main.py +++ b/main.py @@ -9,14 +9,14 @@ from bs4 import BeautifulSoup # 手机实体类 from openpyxl import load_workbook, Workbook -from Crawler import MobilePhoneCrawler +from Crawler import MobiePhoneCrawler from config.config import cf, config_path from config.log import writeInfo, writeError # 评测中心手机爬虫 # http://product.cnmo.com/all/product.html -class CnmoCrawler(MobilePhoneCrawler): +class CnmoCrawler(MobiePhoneCrawler): def __init__(self) -> None: super().__init__() self.threads = [] @@ -62,7 +62,7 @@ class CnmoCrawler(MobilePhoneCrawler): # 解析列表数据 li_s = res_html.select("ul.all-con-con-ul.cf>li") for li in li_s: - if len(self.mobile_list) > self.max_count: + if len(self.mobie_list) > self.max_count: return p = li.select_one('p.red') # 多线程获取手机详情参数 @@ -95,7 +95,7 @@ class CnmoCrawler(MobilePhoneCrawler): self.clear_data() writeInfo('清洗脏数据完毕') - def get_mobile(self, base_url, param_url, **kwargs): + def get_mobie(self, base_url, param_url, **kwargs): # 字典存储手机详细参数 param_dict = {} writeInfo("开始解析手机详情参数页{0}".format(param_url)) @@ -106,28 +106,28 @@ class CnmoCrawler(MobilePhoneCrawler): score_res_html = BeautifulSoup(self.uzipData(score_res.content), 'html.parser') param_dict['网友综合评分'] = score_res_html.select_one('div.pro-comm-stars').find_next('span', {'class': 'red'}).text - mobile_res = self.get_req(param_url) + mobie_res = self.get_req(param_url) # 判断响应状态码,200正常返回 - if mobile_res is not None and mobile_res.status_code == 200: + if mobie_res is not None and mobie_res.status_code == 200: # 调用解析器解析网页请求体 try: - mobile_res_html = BeautifulSoup(self.uzipData(mobile_res.content), 'html.parser') - phone_name = mobile_res_html.select_one('#proName>a').text + mobie_res_html = BeautifulSoup(self.uzipData(mobie_res.content), 'html.parser') + phone_name = mobie_res_html.select_one('#proName>a').text param_dict['手机名称'] = phone_name writeInfo("开始解析手机{0}详细参数".format(phone_name)) # 参考价格 - param_dict['参考价格'] = mobile_res_html.select_one('span:contains(参考价格)').find_next().text + param_dict['参考价格'] = mobie_res_html.select_one('span:contains(参考价格)').find_next().text # 电商报价 - param_dict['电商报价'] = mobile_res_html.select_one('span:contains(电商报价)').next_sibling.strip() + param_dict['电商报价'] = mobie_res_html.select_one('span:contains(电商报价)').next_sibling.strip() # 获取参数名 - param_name_list = mobile_res_html.select('div.right>p') + param_name_list = mobie_res_html.select('div.right>p') for param_name in param_name_list: # 获取参数值 param_dict[param_name['paramname']] = param_name['paramvalue'] # 获取锁,用于线程同步 self.threadLock.acquire() - self.save_mobile(dict(param_dict, **kwargs)) + self.save_mobie(dict(param_dict, **kwargs)) # 释放锁,开启下一个线程 self.threadLock.release() except Exception as e: @@ -135,13 +135,13 @@ class CnmoCrawler(MobilePhoneCrawler): else: writeError("解析手机详情参数页{0}失败".format(param_url)) - def save_mobile(self, mobile, ingore=False): - self.mobile_list.append(mobile) - writeInfo("当前已爬取{0}台手机".format(len(self.mobile_list))) - if not ingore and len(self.mobile_list) % self.data_size == 0: - self.save_excel(self.mobile_list[-self.data_size:]) - elif ingore and len(self.mobile_list) % self.data_size != 0: - self.save_excel(self.mobile_list[-(len(self.mobile_list) % self.data_size):]) + def save_mobie(self, mobie, ingore=False): + self.mobie_list.append(mobie) + writeInfo("当前已爬取{0}台手机".format(len(self.mobie_list))) + if not ingore and len(self.mobie_list) % self.data_size == 0: + self.save_excel(self.mobie_list[-self.data_size:]) + elif ingore and len(self.mobie_list) % self.data_size != 0: + self.save_excel(self.mobie_list[-(len(self.mobie_list) % self.data_size):]) else: writeInfo('缓存数据不足{0}条或没有剩余数据,不需要写入'.format(self.data_size)) @@ -221,4 +221,4 @@ class myThread(threading.Thread): self.kwargs = kwargs def run(self) -> None: - self.crawler.get_mobile(self.base_url, self.param_url, **self.kwargs) + self.crawler.get_mobie(self.base_url, self.param_url, **self.kwargs)