Files
ztb/test_project_name_extraction.py
2026-02-13 18:15:20 +08:00

62 lines
2.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
测试项目名称提取逻辑
"""
import logging
import sys
import os
# 添加当前目录到模块搜索路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def test_project_name_extraction():
"""测试项目名称提取逻辑"""
logger.info("开始测试项目名称提取逻辑")
# 测试用例
test_titles = [
"[招标文件](测-试临海市房建施工0212-2招标文件公示[A3300000090000695005001]",
"[招标文件]通途路(大闸路-湖西路)拓宽改造工程(监理)项目招标文件预公示[A3302010220026373001001]",
"[招标文件]集成电路链主企业配套产业园南片B、H、FG地块及配套项目-B地块建设工程01地块施工招标文件公示[A3306021280001738001001]",
"[招标文件]临海市副中心城市片区基础设施更新改造工程—沿河路、前王路及镇政府停车场改造提升招标文件公示[A3300000090000689001001]",
"[招标文件]宁波市海曙绿道提升工程(施工)招标文件预公示[A3302030230026386001001]",
"[招标文件]嘉科微二号园一号楼改造提升工程设计采购施工总承包(EPC)招标文件公示[A3304010550007317001001]",
]
# 导入正则表达式
import re
for title in test_titles:
logger.info(f"\n测试标题: {title}")
# 使用修改后的标题解析逻辑
title_pattern = r"\[(?:招标文件|招标公告)\]\s*(.*?)\s*\[([A-Z0-9]+)\]\s*$"
match = re.search(title_pattern, title)
if match:
project_name = match.group(1).strip()
# 删除结尾的"招标文件公示"、"招标文件预公示"等后缀
suffixes = ["招标文件公示", "招标文件预公示", "招标公告", "招标预公告"]
for suffix in suffixes:
if project_name.endswith(suffix):
project_name = project_name[:-len(suffix)].strip()
project_approval = match.group(2).strip()
logger.info(f" 提取结果:")
logger.info(f" 项目名称: {project_name}")
logger.info(f" 项目批准文号: {project_approval}")
else:
logger.warning(" 标题解析失败")
def main():
"""主函数"""
test_project_name_extraction()
if __name__ == "__main__":
main()