修复浙江爬虫日期过滤:添加客户端二次校验防止获取非目标日期数据
This commit is contained in:
@@ -295,6 +295,11 @@ class ZhejiangSpider(BaseSpider):
|
|||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
for rec in records:
|
for rec in records:
|
||||||
|
# 客户端日期二次过滤:跳过不在目标日期范围内的记录
|
||||||
|
rec_date = rec.get("webdate", "").split(" ")[0]
|
||||||
|
if date_filter and rec_date and rec_date != start_date:
|
||||||
|
continue
|
||||||
|
|
||||||
link = rec.get("linkurl", "")
|
link = rec.get("linkurl", "")
|
||||||
if link and not link.startswith("http"):
|
if link and not link.startswith("http"):
|
||||||
link = self.config["base_url"] + link
|
link = self.config["base_url"] + link
|
||||||
|
|||||||
Reference in New Issue
Block a user