101 lines
3.4 KiB
Python
101 lines
3.4 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
测试单条数据处理
|
||
"""
|
||
import logging
|
||
import sys
|
||
import os
|
||
|
||
# 添加当前目录到模块搜索路径
|
||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# 导入配置和处理器
|
||
from config import REGION_CONFIGS, PROCESSING_CONFIG
|
||
from spiders.zhejiang import ZhejiangSpider
|
||
from processors.pipeline import ProcessingPipeline
|
||
|
||
# 配置日志
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||
)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 测试URL
|
||
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html"
|
||
|
||
# 模拟爬虫结果
|
||
TEST_ITEM = {
|
||
"标题": "[招标文件](测-试)临海市房建施工0212-2招标文件公示[A3300000090000695005001]",
|
||
"发布日期": "2026-02-12",
|
||
"地区": "临海市",
|
||
"公告类型": "招标文件公示",
|
||
"链接": TEST_URL,
|
||
"来源": "浙江省公共资源交易中心"
|
||
}
|
||
|
||
def main():
|
||
"""主函数"""
|
||
logger.info("开始单条数据测试")
|
||
|
||
# 1. 测试标题解析
|
||
from spiders.zhejiang import ZhejiangSpider
|
||
# 模拟标题解析
|
||
import re
|
||
title = TEST_ITEM["标题"]
|
||
# 使用修复后的正则表达式
|
||
title_pattern = r"\[(?:招标文件|招标公告)\]\s*(.*?)\s*\[([A-Z0-9]+)\]\s*$"
|
||
match = re.search(title_pattern, title)
|
||
if match:
|
||
project_name = match.group(1).strip()
|
||
project_approval = match.group(2).strip()
|
||
logger.info(f"标题解析结果:")
|
||
logger.info(f" 项目名称: {project_name}")
|
||
logger.info(f" 项目批准文号: {project_approval}")
|
||
else:
|
||
logger.warning("标题解析失败")
|
||
|
||
# 2. 测试处理管道
|
||
logger.info("\n测试处理管道:")
|
||
pipeline = ProcessingPipeline()
|
||
|
||
# 模拟ZhejiangSpider的处理过程,添加项目名称和项目批准文号
|
||
test_item_with_fields = TEST_ITEM.copy()
|
||
# 使用修复后的标题解析
|
||
title = test_item_with_fields["标题"]
|
||
title_pattern = r"\[(?:招标文件|招标公告)\]\s*(.*?)\s*\[([A-Z0-9]+)\]\s*$"
|
||
match = re.search(title_pattern, title)
|
||
if match:
|
||
test_item_with_fields["项目名称"] = match.group(1).strip()
|
||
test_item_with_fields["项目批准文号"] = match.group(2).strip()
|
||
|
||
logger.info(f"添加字段后的测试项:")
|
||
logger.info(f" 项目名称: {test_item_with_fields.get('项目名称', '无')}")
|
||
logger.info(f" 项目批准文号: {test_item_with_fields.get('项目批准文号', '无')}")
|
||
|
||
# 模拟爬虫结果列表
|
||
results = [test_item_with_fields]
|
||
|
||
# 处理结果
|
||
processed = pipeline.process_results(
|
||
results,
|
||
site="zhejiang",
|
||
notice_type="招标文件公示",
|
||
upload=False
|
||
)
|
||
|
||
# 分析结果
|
||
if processed:
|
||
record = processed[0]
|
||
logger.info("\n处理结果:")
|
||
logger.info(f" 项目名称: {record.get('项目名称', '文档未提及')}")
|
||
logger.info(f" 项目批准文号: {record.get('项目批准文号', '文档未提及')}")
|
||
logger.info(f" 批准文号: {record.get('批准文号', '文档未提及')}")
|
||
logger.info(f" 最高投标限价: {record.get('最高投标限价', '文档未提及')}")
|
||
logger.info(f" 最高限价: {record.get('最高限价', '文档未提及')}")
|
||
else:
|
||
logger.error("处理失败")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|