fix: 发布时间从详情页内容提取,避免pipeline二次请求失败
This commit is contained in:
@@ -149,10 +149,10 @@ class ProcessingPipeline:
|
||||
|
||||
# 基础字段映射:优先使用项目名称(已处理掉批准文号的名称)
|
||||
record["名称"] = item.get("项目名称", item.get("标题", ""))
|
||||
pub_date = item.get("发布日期", item.get("项目发布时间", ""))
|
||||
record["发布时间"] = pub_date
|
||||
# 项目发布时间修复:使用与发布时间相同的值,确保格式一致
|
||||
record["项目发布时间"] = pub_date # 台州招标计划 JDY 使用此字段名
|
||||
# 优先使用爬虫从详情页内容提取的发布时间(含时分秒)
|
||||
pub_time = item.get("详情页发布时间", "")
|
||||
record["发布时间"] = pub_time
|
||||
record["项目发布时间"] = pub_time
|
||||
record["地区"] = item.get("地区", "")
|
||||
record["招标阶段"] = item.get("公告类型", notice_type)
|
||||
record["来源"] = item.get("来源", "")
|
||||
|
||||
Reference in New Issue
Block a user