feat: 添加招标估算金额字段并修复项目名称显示

- 移除台州招标文件公示相关配置
- 添加浙江招标公告招标估算金额字段
- 修复项目名称匹配规则,优先使用处理后的项目名称
- 更新简道云字段映射
- 添加测试文件
This commit is contained in:
ztb-system
2026-02-24 19:55:56 +08:00
parent d2fa06801f
commit d823936436
6 changed files with 171 additions and 49 deletions

View File

@@ -50,7 +50,6 @@ TAIZHOU_CONFIG = {
# 公告类型(工程建设) # 公告类型(工程建设)
"notice_types": { "notice_types": {
"招标计划公示": "002001014", "招标计划公示": "002001014",
"招标文件公示": "002001001",
"招标公告": "002001002", "招标公告": "002001002",
"资格预审公告": "002001003", "资格预审公告": "002001003",
"中标候选人公示": "002001005", "中标候选人公示": "002001005",
@@ -106,7 +105,7 @@ REGION_CONFIGS = {
"zhejiang:招标公告": { "zhejiang:招标公告": {
"region_name": "浙江招标公告", "region_name": "浙江招标公告",
"link_field": "公告链接", "link_field": "公告链接",
"ai_fields": ["批准文号", "投标截止日"], "ai_fields": ["批准文号", "投标截止日", "招标估算金额"],
}, },
"zhejiang:澄清修改": { "zhejiang:澄清修改": {
"region_name": "浙江澄清修改", "region_name": "浙江澄清修改",
@@ -118,13 +117,7 @@ REGION_CONFIGS = {
"link_field": "公告链接", "link_field": "公告链接",
"ai_fields": ["批准文号", "类型", "地区", "招标时间"], "ai_fields": ["批准文号", "类型", "地区", "招标时间"],
}, },
"taizhou:招标文件公示": {
"region_name": "台州招标文件公示",
"link_field": "招标文件链接",
"ai_fields": [
"类型", "地区", "批准文号", "投标截止日", "预估金额",
],
},
"taizhou:招标计划公示": { "taizhou:招标计划公示": {
"region_name": "台州招标计划", "region_name": "台州招标计划",
"link_field": "公告链接", "link_field": "公告链接",
@@ -374,6 +367,15 @@ DEEPSEEK_PROMPTS = {
请直接返回招标时间,不要其他解释。 请直接返回招标时间,不要其他解释。
如果未找到,请返回"文档未提及"""", 如果未找到,请返回"文档未提及"""",
"招标估算金额": """请从招标公告中提取项目的估算金额。
查找关键词:估算金额、预计投资、预算金额、项目总投资、招标估算价、投资估算
请直接返回金额,带上单位(万元或元)。
示例1234.56万元、2466285元
如果未提及,请返回"文档未提及"""",
} }
# ============ 简道云配置 ============ # ============ 简道云配置 ============
@@ -381,20 +383,7 @@ DEEPSEEK_PROMPTS = {
JDY_CONFIG = { JDY_CONFIG = {
"api_key": "JmxuXmkew33mvQttRD3ftSfQoOEX6R9J", "api_key": "JmxuXmkew33mvQttRD3ftSfQoOEX6R9J",
"forms": { "forms": {
"台州招标文件公示": {
"app_id": "6965f35749afd00072b33c4a",
"entry_id": "6965f35a962fab0113b87876",
"field_mapping": {
"项目发布时间": "_widget_1768289120174",
"批准文号": "_widget_1768289120166",
"名称": "_widget_1768289120167",
"类型": "_widget_1768289120168",
"投标截止日": "_widget_1768289120169",
"预估金额": "_widget_1768289120170",
"招标文件链接": "_widget_1768349415371",
"招标阶段": "_widget_1768289432065",
},
},
"台州招标计划": { "台州招标计划": {
"app_id": "6965f35749afd00072b33c4a", "app_id": "6965f35749afd00072b33c4a",
"entry_id": "6965f35a962fab0113b87876", "entry_id": "6965f35a962fab0113b87876",
@@ -439,6 +428,7 @@ JDY_CONFIG = {
"批准文号": "_widget_1768289557665", "批准文号": "_widget_1768289557665",
"名称": "_widget_1768349686082", "名称": "_widget_1768349686082",
"投标截止日": "_widget_1768289557654", "投标截止日": "_widget_1768289557654",
"招标估算金额": "_widget_1771910059524",
"公告链接": "_widget_1768290058232", "公告链接": "_widget_1768290058232",
"招标阶段": "_widget_1768289909408", "招标阶段": "_widget_1768289909408",
}, },

View File

@@ -50,7 +50,6 @@ TAIZHOU_CONFIG = {
# 公告类型(工程建设) # 公告类型(工程建设)
"notice_types": { "notice_types": {
"招标计划公示": "002001014", "招标计划公示": "002001014",
"招标文件公示": "002001001",
"招标公告": "002001002", "招标公告": "002001002",
"资格预审公告": "002001003", "资格预审公告": "002001003",
"中标候选人公示": "002001005", "中标候选人公示": "002001005",
@@ -106,20 +105,14 @@ REGION_CONFIGS = {
"zhejiang:招标公告": { "zhejiang:招标公告": {
"region_name": "浙江招标公告", "region_name": "浙江招标公告",
"link_field": "公告链接", "link_field": "公告链接",
"ai_fields": ["批准文号", "投标截止日"], "ai_fields": ["批准文号", "投标截止日", "招标估算金额"],
}, },
"zhejiang:澄清修改": { "zhejiang:澄清修改": {
"region_name": "浙江澄清修改", "region_name": "浙江澄清修改",
"link_field": "澄清文件链接", "link_field": "澄清文件链接",
"ai_fields": ["批准文号"], "ai_fields": ["批准文号"],
}, },
"taizhou:招标文件公示": {
"region_name": "台州招标文件公示",
"link_field": "招标文件链接",
"ai_fields": [
"类型", "地区", "批准文号", "投标截止日", "预估金额",
],
},
} }
# DeepSeek 提示词模板 # DeepSeek 提示词模板
@@ -356,6 +349,15 @@ DEEPSEEK_PROMPTS = {
如果某项未提及用""代替。 如果某项未提及用""代替。
如果确实未找到付款相关内容,请返回"文档未提及"""", 如果确实未找到付款相关内容,请返回"文档未提及"""",
"招标估算金额": """请从招标公告中提取项目的估算金额。
查找关键词:估算金额、预计投资、预算金额、项目总投资、招标估算价、投资估算
请直接返回金额,带上单位(万元或元)。
示例1234.56万元、2466285元
如果未提及,请返回"文档未提及"""",
} }
# ============ 简道云配置 ============ # ============ 简道云配置 ============
@@ -363,20 +365,7 @@ DEEPSEEK_PROMPTS = {
JDY_CONFIG = { JDY_CONFIG = {
"api_key": "JmxuXmkew33mvQttRD3ftSfQoOEX6R9J", "api_key": "JmxuXmkew33mvQttRD3ftSfQoOEX6R9J",
"forms": { "forms": {
"台州招标文件公示": {
"app_id": "6965f35749afd00072b33c4a",
"entry_id": "6965f35a962fab0113b87876",
"field_mapping": {
"项目发布时间": "_widget_1768289120174",
"批准文号": "_widget_1768289120166",
"名称": "_widget_1768289120167",
"类型": "_widget_1768289120168",
"投标截止日": "_widget_1768289120169",
"预估金额": "_widget_1768289120170",
"招标文件链接": "_widget_1768349415371",
"招标阶段": "_widget_1768289432065",
},
},
"浙江招标文件公示": { "浙江招标文件公示": {
"app_id": "6965f35749afd00072b33c4a", "app_id": "6965f35749afd00072b33c4a",
"entry_id": "6965f50e955c9b638888e7d2", "entry_id": "6965f50e955c9b638888e7d2",
@@ -409,6 +398,7 @@ JDY_CONFIG = {
"批准文号": "_widget_1768289557665", "批准文号": "_widget_1768289557665",
"名称": "_widget_1768349686082", "名称": "_widget_1768349686082",
"投标截止日": "_widget_1768289557654", "投标截止日": "_widget_1768289557654",
"招标估算金额": "_widget_1771910059524",
"公告链接": "_widget_1768290058232", "公告链接": "_widget_1768290058232",
"招标阶段": "_widget_1768289909408", "招标阶段": "_widget_1768289909408",
}, },

View File

@@ -20,7 +20,7 @@ class JiandaoyunUploader:
BASE_URL = "https://api.jiandaoyun.com/api/v5" BASE_URL = "https://api.jiandaoyun.com/api/v5"
# 需要转换为数字的字段 # 需要转换为数字的字段
NUMERIC_FIELDS = {"最高限价", "最高投标限价", "预估金额"} NUMERIC_FIELDS = {"最高限价", "最高投标限价", "预估金额", "招标估算金额"}
def __init__(self, api_key: str = None): def __init__(self, api_key: str = None):
self.api_key = api_key or JDY_CONFIG["api_key"] self.api_key = api_key or JDY_CONFIG["api_key"]

View File

@@ -147,8 +147,8 @@ class ProcessingPipeline:
"""将爬虫输出字段映射为处理所需字段""" """将爬虫输出字段映射为处理所需字段"""
record = {} record = {}
# 基础字段映射 # 基础字段映射:优先使用项目名称(已处理掉批准文号的名称)
record["名称"] = item.get("标题", item.get("项目名称", "")) record["名称"] = item.get("项目名称", item.get("标题", ""))
pub_date = item.get("发布日期", item.get("项目发布时间", "")) pub_date = item.get("发布日期", item.get("项目发布时间", ""))
record["发布时间"] = pub_date record["发布时间"] = pub_date
# 项目发布时间修复:使用与发布时间相同的值,确保格式一致 # 项目发布时间修复:使用与发布时间相同的值,确保格式一致

66
test_name_fix.py Normal file
View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
"""
测试项目名称修复效果
"""
import json
import csv
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from processors.pipeline import ProcessingPipeline
def test_name_fix():
"""测试项目名称修复"""
# 1. 读取原始CSV数据
csv_file = "data/浙江省公共资源交易中心_20260224_132741.csv"
if not os.path.exists(csv_file):
print(f"原始数据文件不存在: {csv_file}")
return
print(f"读取原始数据: {csv_file}")
# 读取CSV文件转换为字典列表
with open(csv_file, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
raw_items = list(reader)
print(f"原始数据条数: {len(raw_items)}")
# 2. 创建处理管道实例(只需要字段映射部分)
pipeline = ProcessingPipeline()
# 3. 测试字段映射
print("\n=== 测试字段映射 ===")
test_results = []
for i, item in enumerate(raw_items[:5]): # 只测试前5条
print(f"\n--- 第 {i+1} 条 ---")
print(f"原始标题: {item.get('标题', '')[:60]}")
# 调用字段映射方法
record = pipeline._map_fields(item, "公告链接", "招标公告")
print(f"处理后名称: {record.get('名称', '')}")
print(f"项目名称: {record.get('项目名称', '')}")
test_results.append({
'原始标题': item.get('标题', ''),
'处理后名称': record.get('名称', ''),
'项目名称': record.get('项目名称', '')
})
# 4. 保存测试结果
output_file = "data/项目名称修复测试结果.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump({
'测试时间': '2026-02-24',
'测试结果': test_results
}, f, ensure_ascii=False, indent=2)
print(f"\n测试结果已保存到: {output_file}")
if __name__ == "__main__":
test_name_fix()

76
test_upload_jdy.py Normal file
View File

@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
"""
测试上传招标公告数据到简道云
"""
import json
import logging
import os
# 添加当前目录到模块搜索路径
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 导入简道云上传器
from processors.jiandaoyun import JiandaoyunUploader
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def upload_test_data():
"""
上传测试数据到简道云
"""
# 最新的AI处理结果文件
json_file = "data/浙江招标公告_AI处理_20260224_133102.json"
region_name = "浙江招标公告"
logger.info(f"开始上传 {json_file} 到简道云")
# 1. 读取JSON文件
if not os.path.exists(json_file):
logger.error(f"JSON文件不存在: {json_file}")
return
try:
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
except Exception as e:
logger.error(f"读取JSON文件失败: {e}")
return
# 2. 提取记录数据
records = data.get('data', [])
if not records:
logger.error("JSON文件中没有数据")
return
logger.info(f"读取完成,共 {len(records)} 条记录")
# 3. 上传到简道云
uploader = JiandaoyunUploader()
result = uploader.upload_records(region_name, records)
# 4. 输出结果
logger.info(f"上传完成: 成功 {result['success']}, 失败 {result['failed']}")
if result['failed'] > 0:
logger.error("上传失败的记录:")
for error in result.get('errors', []):
logger.error(f" - {error}")
return result
def main():
"""
主函数
"""
logger.info("=== 测试简道云上传 ===")
result = upload_test_data()
logger.info("=== 测试完成 ===")
if __name__ == "__main__":
main()