From 55ad14771d71018cd43fed31f8b1a3ca0ff18801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BD=98=E5=95=9F=E5=8D=8E?= <1029559041@qq.com> Date: Mon, 23 Sep 2019 00:44:43 +0800 Subject: [PATCH] init --- config.ini | 4 ++-- main.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/config.ini b/config.ini index 9f34528..8decee5 100644 --- a/config.ini +++ b/config.ini @@ -16,8 +16,8 @@ param_name=手机名称,参考价格,电商报价,上市时间,网友综合评 ;非空参数个数(从左到右保留指定个数的参数) param_required_index=8 ;达到指定缓冲数据量写入一次报表 -data_size=10 +data_size=100 ;线程池大小 thread_count=5 ;采集数据量 -max_count=30 \ No newline at end of file +max_count=5000 \ No newline at end of file diff --git a/main.py b/main.py index 90f9f06..abf0a03 100644 --- a/main.py +++ b/main.py @@ -368,8 +368,11 @@ class CnmoCrawler(MobilePhoneCrawler): raise Exception("列表页:{0}解析失败".format(current_page_url)) def run(self): - self.get_page() - writeInfo('采集数据完毕,开始清洗脏数据') + try: + self.get_page() + writeInfo('采集数据完毕,开始清洗脏数据') + except Exception as e: + writeError("采集数据出现异常,开始清洗脏数据") self.clear_data() writeInfo('清洗脏数据完毕')