From d823936436595d9d449b7f882aa290630d65b953 Mon Sep 17 00:00:00 2001 From: ztb-system Date: Tue, 24 Feb 2026 19:55:56 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E6=8B=9B=E6=A0=87?= =?UTF-8?q?=E4=BC=B0=E7=AE=97=E9=87=91=E9=A2=9D=E5=AD=97=E6=AE=B5=E5=B9=B6?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=A1=B9=E7=9B=AE=E5=90=8D=E7=A7=B0=E6=98=BE?= =?UTF-8?q?=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除台州招标文件公示相关配置 - 添加浙江招标公告招标估算金额字段 - 修复项目名称匹配规则,优先使用处理后的项目名称 - 更新简道云字段映射 - 添加测试文件 --- config.py | 36 +++++++------------ config_fixed.py | 36 +++++++------------ processors/jiandaoyun.py | 2 +- processors/pipeline.py | 4 +-- test_name_fix.py | 66 ++++++++++++++++++++++++++++++++++ test_upload_jdy.py | 76 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 171 insertions(+), 49 deletions(-) create mode 100644 test_name_fix.py create mode 100644 test_upload_jdy.py diff --git a/config.py b/config.py index dc94fe4..1cf7ad9 100644 --- a/config.py +++ b/config.py @@ -50,7 +50,6 @@ TAIZHOU_CONFIG = { # 公告类型(工程建设) "notice_types": { "招标计划公示": "002001014", - "招标文件公示": "002001001", "招标公告": "002001002", "资格预审公告": "002001003", "中标候选人公示": "002001005", @@ -106,7 +105,7 @@ REGION_CONFIGS = { "zhejiang:招标公告": { "region_name": "浙江招标公告", "link_field": "公告链接", - "ai_fields": ["批准文号", "投标截止日"], + "ai_fields": ["批准文号", "投标截止日", "招标估算金额"], }, "zhejiang:澄清修改": { "region_name": "浙江澄清修改", @@ -118,13 +117,7 @@ REGION_CONFIGS = { "link_field": "公告链接", "ai_fields": ["批准文号", "类型", "地区", "招标时间"], }, - "taizhou:招标文件公示": { - "region_name": "台州招标文件公示", - "link_field": "招标文件链接", - "ai_fields": [ - "类型", "地区", "批准文号", "投标截止日", "预估金额", - ], - }, + "taizhou:招标计划公示": { "region_name": "台州招标计划", "link_field": "公告链接", @@ -374,6 +367,15 @@ DEEPSEEK_PROMPTS = { 请直接返回招标时间,不要其他解释。 如果未找到,请返回"文档未提及"。""", + + "招标估算金额": """请从招标公告中提取项目的估算金额。 + +查找关键词:估算金额、预计投资、预算金额、项目总投资、招标估算价、投资估算 + +请直接返回金额,带上单位(万元或元)。 +示例:1234.56万元、2466285元 + +如果未提及,请返回"文档未提及"。""", } # ============ 简道云配置 ============ @@ -381,20 +383,7 @@ DEEPSEEK_PROMPTS = { JDY_CONFIG = { "api_key": "JmxuXmkew33mvQttRD3ftSfQoOEX6R9J", "forms": { - "台州招标文件公示": { - "app_id": "6965f35749afd00072b33c4a", - "entry_id": "6965f35a962fab0113b87876", - "field_mapping": { - "项目发布时间": "_widget_1768289120174", - "批准文号": "_widget_1768289120166", - "名称": "_widget_1768289120167", - "类型": "_widget_1768289120168", - "投标截止日": "_widget_1768289120169", - "预估金额": "_widget_1768289120170", - "招标文件链接": "_widget_1768349415371", - "招标阶段": "_widget_1768289432065", - }, - }, + "台州招标计划": { "app_id": "6965f35749afd00072b33c4a", "entry_id": "6965f35a962fab0113b87876", @@ -439,6 +428,7 @@ JDY_CONFIG = { "批准文号": "_widget_1768289557665", "名称": "_widget_1768349686082", "投标截止日": "_widget_1768289557654", + "招标估算金额": "_widget_1771910059524", "公告链接": "_widget_1768290058232", "招标阶段": "_widget_1768289909408", }, diff --git a/config_fixed.py b/config_fixed.py index 312561b..6905d85 100644 --- a/config_fixed.py +++ b/config_fixed.py @@ -50,7 +50,6 @@ TAIZHOU_CONFIG = { # 公告类型(工程建设) "notice_types": { "招标计划公示": "002001014", - "招标文件公示": "002001001", "招标公告": "002001002", "资格预审公告": "002001003", "中标候选人公示": "002001005", @@ -106,20 +105,14 @@ REGION_CONFIGS = { "zhejiang:招标公告": { "region_name": "浙江招标公告", "link_field": "公告链接", - "ai_fields": ["批准文号", "投标截止日"], + "ai_fields": ["批准文号", "投标截止日", "招标估算金额"], }, "zhejiang:澄清修改": { "region_name": "浙江澄清修改", "link_field": "澄清文件链接", "ai_fields": ["批准文号"], }, - "taizhou:招标文件公示": { - "region_name": "台州招标文件公示", - "link_field": "招标文件链接", - "ai_fields": [ - "类型", "地区", "批准文号", "投标截止日", "预估金额", - ], - }, + } # DeepSeek 提示词模板 @@ -356,6 +349,15 @@ DEEPSEEK_PROMPTS = { 如果某项未提及用"无"代替。 如果确实未找到付款相关内容,请返回"文档未提及"。""", + + "招标估算金额": """请从招标公告中提取项目的估算金额。 + +查找关键词:估算金额、预计投资、预算金额、项目总投资、招标估算价、投资估算 + +请直接返回金额,带上单位(万元或元)。 +示例:1234.56万元、2466285元 + +如果未提及,请返回"文档未提及"。""", } # ============ 简道云配置 ============ @@ -363,20 +365,7 @@ DEEPSEEK_PROMPTS = { JDY_CONFIG = { "api_key": "JmxuXmkew33mvQttRD3ftSfQoOEX6R9J", "forms": { - "台州招标文件公示": { - "app_id": "6965f35749afd00072b33c4a", - "entry_id": "6965f35a962fab0113b87876", - "field_mapping": { - "项目发布时间": "_widget_1768289120174", - "批准文号": "_widget_1768289120166", - "名称": "_widget_1768289120167", - "类型": "_widget_1768289120168", - "投标截止日": "_widget_1768289120169", - "预估金额": "_widget_1768289120170", - "招标文件链接": "_widget_1768349415371", - "招标阶段": "_widget_1768289432065", - }, - }, + "浙江招标文件公示": { "app_id": "6965f35749afd00072b33c4a", "entry_id": "6965f50e955c9b638888e7d2", @@ -409,6 +398,7 @@ JDY_CONFIG = { "批准文号": "_widget_1768289557665", "名称": "_widget_1768349686082", "投标截止日": "_widget_1768289557654", + "招标估算金额": "_widget_1771910059524", "公告链接": "_widget_1768290058232", "招标阶段": "_widget_1768289909408", }, diff --git a/processors/jiandaoyun.py b/processors/jiandaoyun.py index 2fb6f42..6ac20b8 100644 --- a/processors/jiandaoyun.py +++ b/processors/jiandaoyun.py @@ -20,7 +20,7 @@ class JiandaoyunUploader: BASE_URL = "https://api.jiandaoyun.com/api/v5" # 需要转换为数字的字段 - NUMERIC_FIELDS = {"最高限价", "最高投标限价", "预估金额"} + NUMERIC_FIELDS = {"最高限价", "最高投标限价", "预估金额", "招标估算金额"} def __init__(self, api_key: str = None): self.api_key = api_key or JDY_CONFIG["api_key"] diff --git a/processors/pipeline.py b/processors/pipeline.py index 61bb470..fcf390e 100644 --- a/processors/pipeline.py +++ b/processors/pipeline.py @@ -147,8 +147,8 @@ class ProcessingPipeline: """将爬虫输出字段映射为处理所需字段""" record = {} - # 基础字段映射 - record["名称"] = item.get("标题", item.get("项目名称", "")) + # 基础字段映射:优先使用项目名称(已处理掉批准文号的名称) + record["名称"] = item.get("项目名称", item.get("标题", "")) pub_date = item.get("发布日期", item.get("项目发布时间", "")) record["发布时间"] = pub_date # 项目发布时间修复:使用与发布时间相同的值,确保格式一致 diff --git a/test_name_fix.py b/test_name_fix.py new file mode 100644 index 0000000..e6f4c32 --- /dev/null +++ b/test_name_fix.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +测试项目名称修复效果 +""" +import json +import csv +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from processors.pipeline import ProcessingPipeline + +def test_name_fix(): + """测试项目名称修复""" + + # 1. 读取原始CSV数据 + csv_file = "data/浙江省公共资源交易中心_20260224_132741.csv" + if not os.path.exists(csv_file): + print(f"原始数据文件不存在: {csv_file}") + return + + print(f"读取原始数据: {csv_file}") + + # 读取CSV文件,转换为字典列表 + with open(csv_file, 'r', encoding='utf-8-sig') as f: + reader = csv.DictReader(f) + raw_items = list(reader) + + print(f"原始数据条数: {len(raw_items)}") + + # 2. 创建处理管道实例(只需要字段映射部分) + pipeline = ProcessingPipeline() + + # 3. 测试字段映射 + print("\n=== 测试字段映射 ===") + test_results = [] + + for i, item in enumerate(raw_items[:5]): # 只测试前5条 + print(f"\n--- 第 {i+1} 条 ---") + print(f"原始标题: {item.get('标题', '')[:60]}") + + # 调用字段映射方法 + record = pipeline._map_fields(item, "公告链接", "招标公告") + + print(f"处理后名称: {record.get('名称', '')}") + print(f"项目名称: {record.get('项目名称', '')}") + + test_results.append({ + '原始标题': item.get('标题', ''), + '处理后名称': record.get('名称', ''), + '项目名称': record.get('项目名称', '') + }) + + # 4. 保存测试结果 + output_file = "data/项目名称修复测试结果.json" + with open(output_file, 'w', encoding='utf-8') as f: + json.dump({ + '测试时间': '2026-02-24', + '测试结果': test_results + }, f, ensure_ascii=False, indent=2) + + print(f"\n测试结果已保存到: {output_file}") + +if __name__ == "__main__": + test_name_fix() diff --git a/test_upload_jdy.py b/test_upload_jdy.py new file mode 100644 index 0000000..ac02546 --- /dev/null +++ b/test_upload_jdy.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +""" +测试上传招标公告数据到简道云 +""" +import json +import logging +import os + +# 添加当前目录到模块搜索路径 +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +# 导入简道云上传器 +from processors.jiandaoyun import JiandaoyunUploader + +# 配置日志 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + +def upload_test_data(): + """ + 上传测试数据到简道云 + """ + # 最新的AI处理结果文件 + json_file = "data/浙江招标公告_AI处理_20260224_133102.json" + region_name = "浙江招标公告" + + logger.info(f"开始上传 {json_file} 到简道云") + + # 1. 读取JSON文件 + if not os.path.exists(json_file): + logger.error(f"JSON文件不存在: {json_file}") + return + + try: + with open(json_file, 'r', encoding='utf-8') as f: + data = json.load(f) + except Exception as e: + logger.error(f"读取JSON文件失败: {e}") + return + + # 2. 提取记录数据 + records = data.get('data', []) + if not records: + logger.error("JSON文件中没有数据") + return + + logger.info(f"读取完成,共 {len(records)} 条记录") + + # 3. 上传到简道云 + uploader = JiandaoyunUploader() + result = uploader.upload_records(region_name, records) + + # 4. 输出结果 + logger.info(f"上传完成: 成功 {result['success']}, 失败 {result['failed']}") + + if result['failed'] > 0: + logger.error("上传失败的记录:") + for error in result.get('errors', []): + logger.error(f" - {error}") + + return result + +def main(): + """ + 主函数 + """ + logger.info("=== 测试简道云上传 ===") + result = upload_test_data() + logger.info("=== 测试完成 ===") + +if __name__ == "__main__": + main()