Files
ztb/test_single_item.py
2026-02-13 18:15:20 +08:00

101 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
测试单条数据处理
"""
import logging
import sys
import os
# 添加当前目录到模块搜索路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 导入配置和处理器
from config import REGION_CONFIGS, PROCESSING_CONFIG
from spiders.zhejiang import ZhejiangSpider
from processors.pipeline import ProcessingPipeline
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 测试URL
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html"
# 模拟爬虫结果
TEST_ITEM = {
"标题": "[招标文件](测-试临海市房建施工0212-2招标文件公示[A3300000090000695005001]",
"发布日期": "2026-02-12",
"地区": "临海市",
"公告类型": "招标文件公示",
"链接": TEST_URL,
"来源": "浙江省公共资源交易中心"
}
def main():
"""主函数"""
logger.info("开始单条数据测试")
# 1. 测试标题解析
from spiders.zhejiang import ZhejiangSpider
# 模拟标题解析
import re
title = TEST_ITEM["标题"]
# 使用修复后的正则表达式
title_pattern = r"\[(?:招标文件|招标公告)\]\s*(.*?)\s*\[([A-Z0-9]+)\]\s*$"
match = re.search(title_pattern, title)
if match:
project_name = match.group(1).strip()
project_approval = match.group(2).strip()
logger.info(f"标题解析结果:")
logger.info(f" 项目名称: {project_name}")
logger.info(f" 项目批准文号: {project_approval}")
else:
logger.warning("标题解析失败")
# 2. 测试处理管道
logger.info("\n测试处理管道:")
pipeline = ProcessingPipeline()
# 模拟ZhejiangSpider的处理过程添加项目名称和项目批准文号
test_item_with_fields = TEST_ITEM.copy()
# 使用修复后的标题解析
title = test_item_with_fields["标题"]
title_pattern = r"\[(?:招标文件|招标公告)\]\s*(.*?)\s*\[([A-Z0-9]+)\]\s*$"
match = re.search(title_pattern, title)
if match:
test_item_with_fields["项目名称"] = match.group(1).strip()
test_item_with_fields["项目批准文号"] = match.group(2).strip()
logger.info(f"添加字段后的测试项:")
logger.info(f" 项目名称: {test_item_with_fields.get('项目名称', '')}")
logger.info(f" 项目批准文号: {test_item_with_fields.get('项目批准文号', '')}")
# 模拟爬虫结果列表
results = [test_item_with_fields]
# 处理结果
processed = pipeline.process_results(
results,
site="zhejiang",
notice_type="招标文件公示",
upload=False
)
# 分析结果
if processed:
record = processed[0]
logger.info("\n处理结果:")
logger.info(f" 项目名称: {record.get('项目名称', '文档未提及')}")
logger.info(f" 项目批准文号: {record.get('项目批准文号', '文档未提及')}")
logger.info(f" 批准文号: {record.get('批准文号', '文档未提及')}")
logger.info(f" 最高投标限价: {record.get('最高投标限价', '文档未提及')}")
logger.info(f" 最高限价: {record.get('最高限价', '文档未提及')}")
else:
logger.error("处理失败")
if __name__ == "__main__":
main()