Files
ztb/test_fix_verification.py
2026-02-13 18:15:20 +08:00

71 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
测试修复后的项目名称和批准文号提取逻辑
"""
import logging
import sys
import os
# 添加当前目录到模块搜索路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_project_name_extraction_fix():
"""测试修复后的项目名称提取逻辑"""
logger.info("开始测试修复后的项目名称提取逻辑")
# 测试用例
test_titles = [
"湖堤生态修复工程[A3306010720060234001001]",
"[招标文件](测-试临海市房建施工0212-2招标文件公示[A3300000090000695005001]",
"[招标文件]通途路(大闸路-湖西路)拓宽改造工程(监理)项目招标文件预公示[A3302010220026373001001]",
"[招标文件]集成电路链主企业配套产业园南片B、H、FG地块及配套项目-B地块建设工程01地块施工招标文件公示[A3306021280001738001001]",
"[招标文件]临海市副中心城市片区基础设施更新改造工程—沿河路、前王路及镇政府停车场改造提升招标文件公示[A3300000090000689001001]",
"[招标文件]宁波市海曙绿道提升工程(施工)招标文件预公示[A3302030230026386001001]",
"[招标文件]嘉科微二号园一号楼改造提升工程设计采购施工总承包(EPC)招标文件公示[A3304010550007317001001]",
]
# 导入爬虫的解析函数
from spiders.zhejiang import ZhejiangSpider
for title in test_titles:
logger.info(f"\n测试标题: {title}")
# 模拟解析过程 - 注意_parse_record 函数的参数是 (record, source)
# record 应该是 API 返回的原始记录,包含 "title" 字段
api_record = {
"title": title,
"linkurl": "",
"webdate": "2026-02-13",
"infod": "",
"categoryname": "",
}
# 调用爬虫的解析函数
parsed_item = ZhejiangSpider._parse_record(api_record, "测试")
logger.info(f" 提取结果:")
logger.info(f" 项目名称: {parsed_item.get('项目名称', '未提取')}")
logger.info(f" 项目批准文号: {parsed_item.get('项目批准文号', '未提取')}")
# 验证批准文号是否从项目名称中删除
project_name = parsed_item.get('项目名称', '')
approval_number = parsed_item.get('项目批准文号', '')
if approval_number and approval_number in project_name:
logger.error(f" ❌ 错误: 批准文号 '{approval_number}' 仍在项目名称中")
else:
logger.info(f" ✅ 正确: 批准文号已从项目名称中删除")
def main():
"""主函数"""
test_project_name_extraction_fix()
if __name__ == "__main__":
main()