67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
测试项目名称修复效果
|
||
"""
|
||
import json
|
||
import csv
|
||
import os
|
||
import sys
|
||
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
from processors.pipeline import ProcessingPipeline
|
||
|
||
def test_name_fix():
|
||
"""测试项目名称修复"""
|
||
|
||
# 1. 读取原始CSV数据
|
||
csv_file = "data/浙江省公共资源交易中心_20260224_132741.csv"
|
||
if not os.path.exists(csv_file):
|
||
print(f"原始数据文件不存在: {csv_file}")
|
||
return
|
||
|
||
print(f"读取原始数据: {csv_file}")
|
||
|
||
# 读取CSV文件,转换为字典列表
|
||
with open(csv_file, 'r', encoding='utf-8-sig') as f:
|
||
reader = csv.DictReader(f)
|
||
raw_items = list(reader)
|
||
|
||
print(f"原始数据条数: {len(raw_items)}")
|
||
|
||
# 2. 创建处理管道实例(只需要字段映射部分)
|
||
pipeline = ProcessingPipeline()
|
||
|
||
# 3. 测试字段映射
|
||
print("\n=== 测试字段映射 ===")
|
||
test_results = []
|
||
|
||
for i, item in enumerate(raw_items[:5]): # 只测试前5条
|
||
print(f"\n--- 第 {i+1} 条 ---")
|
||
print(f"原始标题: {item.get('标题', '')[:60]}")
|
||
|
||
# 调用字段映射方法
|
||
record = pipeline._map_fields(item, "公告链接", "招标公告")
|
||
|
||
print(f"处理后名称: {record.get('名称', '')}")
|
||
print(f"项目名称: {record.get('项目名称', '')}")
|
||
|
||
test_results.append({
|
||
'原始标题': item.get('标题', ''),
|
||
'处理后名称': record.get('名称', ''),
|
||
'项目名称': record.get('项目名称', '')
|
||
})
|
||
|
||
# 4. 保存测试结果
|
||
output_file = "data/项目名称修复测试结果.json"
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
json.dump({
|
||
'测试时间': '2026-02-24',
|
||
'测试结果': test_results
|
||
}, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f"\n测试结果已保存到: {output_file}")
|
||
|
||
if __name__ == "__main__":
|
||
test_name_fix()
|