67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
测试项目名称修复效果
|
|||
|
|
"""
|
|||
|
|
import json
|
|||
|
|
import csv
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
|
|||
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
|
|||
|
|
from processors.pipeline import ProcessingPipeline
|
|||
|
|
|
|||
|
|
def test_name_fix():
|
|||
|
|
"""测试项目名称修复"""
|
|||
|
|
|
|||
|
|
# 1. 读取原始CSV数据
|
|||
|
|
csv_file = "data/浙江省公共资源交易中心_20260224_132741.csv"
|
|||
|
|
if not os.path.exists(csv_file):
|
|||
|
|
print(f"原始数据文件不存在: {csv_file}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"读取原始数据: {csv_file}")
|
|||
|
|
|
|||
|
|
# 读取CSV文件,转换为字典列表
|
|||
|
|
with open(csv_file, 'r', encoding='utf-8-sig') as f:
|
|||
|
|
reader = csv.DictReader(f)
|
|||
|
|
raw_items = list(reader)
|
|||
|
|
|
|||
|
|
print(f"原始数据条数: {len(raw_items)}")
|
|||
|
|
|
|||
|
|
# 2. 创建处理管道实例(只需要字段映射部分)
|
|||
|
|
pipeline = ProcessingPipeline()
|
|||
|
|
|
|||
|
|
# 3. 测试字段映射
|
|||
|
|
print("\n=== 测试字段映射 ===")
|
|||
|
|
test_results = []
|
|||
|
|
|
|||
|
|
for i, item in enumerate(raw_items[:5]): # 只测试前5条
|
|||
|
|
print(f"\n--- 第 {i+1} 条 ---")
|
|||
|
|
print(f"原始标题: {item.get('标题', '')[:60]}")
|
|||
|
|
|
|||
|
|
# 调用字段映射方法
|
|||
|
|
record = pipeline._map_fields(item, "公告链接", "招标公告")
|
|||
|
|
|
|||
|
|
print(f"处理后名称: {record.get('名称', '')}")
|
|||
|
|
print(f"项目名称: {record.get('项目名称', '')}")
|
|||
|
|
|
|||
|
|
test_results.append({
|
|||
|
|
'原始标题': item.get('标题', ''),
|
|||
|
|
'处理后名称': record.get('名称', ''),
|
|||
|
|
'项目名称': record.get('项目名称', '')
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 4. 保存测试结果
|
|||
|
|
output_file = "data/项目名称修复测试结果.json"
|
|||
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump({
|
|||
|
|
'测试时间': '2026-02-24',
|
|||
|
|
'测试结果': test_results
|
|||
|
|
}, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f"\n测试结果已保存到: {output_file}")
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
test_name_fix()
|