修复浙江爬虫日期过滤:添加客户端二次校验防止获取非目标日期数据
This commit is contained in:
@@ -295,6 +295,11 @@ class ZhejiangSpider(BaseSpider):
|
||||
|
||||
count = 0
|
||||
for rec in records:
|
||||
# 客户端日期二次过滤:跳过不在目标日期范围内的记录
|
||||
rec_date = rec.get("webdate", "").split(" ")[0]
|
||||
if date_filter and rec_date and rec_date != start_date:
|
||||
continue
|
||||
|
||||
link = rec.get("linkurl", "")
|
||||
if link and not link.startswith("http"):
|
||||
link = self.config["base_url"] + link
|
||||
|
||||
Reference in New Issue
Block a user