Initial commit: 招标信息爬虫与分析系统
This commit is contained in:
98
test_real_with_fixed_config.py
Normal file
98
test_real_with_fixed_config.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
使用修复后的配置文件测试真实提取功能
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
# 添加当前目录到模块搜索路径
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 保存原始配置导入
|
||||
import importlib
|
||||
|
||||
# 备份原始config模块
|
||||
if 'config' in sys.modules:
|
||||
del sys.modules['config']
|
||||
|
||||
# 临时替换config模块为config_fixed
|
||||
import config_fixed
|
||||
import sys
|
||||
|
||||
# 保存原始的config模块引用
|
||||
original_config = None
|
||||
if 'config' in sys.modules:
|
||||
original_config = sys.modules['config']
|
||||
|
||||
# 将config_fixed设置为config模块
|
||||
sys.modules['config'] = config_fixed
|
||||
|
||||
# 现在导入处理器
|
||||
from processors.content_fetcher import ContentFetcher
|
||||
from processors.deepseek import DeepSeekProcessor
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 测试网址
|
||||
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html"
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
logger.info(f"开始测试: {TEST_URL}")
|
||||
|
||||
# 获取内容
|
||||
fetcher = ContentFetcher(temp_dir="temp_files")
|
||||
content = fetcher.get_full_content(TEST_URL)
|
||||
|
||||
if not content:
|
||||
logger.error("无法获取内容")
|
||||
return
|
||||
|
||||
logger.info(f"获取到内容长度: {len(content)} 字符")
|
||||
|
||||
# 执行提取
|
||||
processor = DeepSeekProcessor()
|
||||
|
||||
# 获取浙江招标文件公示的配置
|
||||
config_key = "zhejiang:招标文件公示"
|
||||
from config import REGION_CONFIGS
|
||||
|
||||
if config_key not in REGION_CONFIGS:
|
||||
logger.error(f"未找到配置: {config_key}")
|
||||
return
|
||||
|
||||
ai_fields = REGION_CONFIGS[config_key]["ai_fields"]
|
||||
logger.info(f"需要提取的字段: {ai_fields}")
|
||||
|
||||
# 执行提取
|
||||
extracted = processor.extract_fields(content, ai_fields, "浙江")
|
||||
|
||||
# 分析结果
|
||||
logger.info("\n提取结果:")
|
||||
for field, value in extracted.items():
|
||||
logger.info(f" {field}: {value}")
|
||||
|
||||
# 特别关注资质要求和业绩要求
|
||||
for field in ["资质要求", "业绩要求"]:
|
||||
if field in extracted:
|
||||
value = extracted[field]
|
||||
logger.info(f"\n{field}提取结果: {value}")
|
||||
|
||||
if value != "文档未提及":
|
||||
logger.info(f"✓ {field}提取成功!")
|
||||
else:
|
||||
logger.warning(f"✗ {field}未提取到")
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
finally:
|
||||
# 恢复原始配置
|
||||
if original_config:
|
||||
sys.modules['config'] = original_config
|
||||
Reference in New Issue
Block a user