# -*- coding: utf-8 -*- """ 使用修复后的配置文件测试真实提取功能 """ import logging import sys import os # 添加当前目录到模块搜索路径 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 保存原始配置导入 import importlib # 备份原始config模块 if 'config' in sys.modules: del sys.modules['config'] # 临时替换config模块为config_fixed import config_fixed import sys # 保存原始的config模块引用 original_config = None if 'config' in sys.modules: original_config = sys.modules['config'] # 将config_fixed设置为config模块 sys.modules['config'] = config_fixed # 现在导入处理器 from processors.content_fetcher import ContentFetcher from processors.deepseek import DeepSeekProcessor # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # 测试网址 TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html" def main(): """主函数""" logger.info(f"开始测试: {TEST_URL}") # 获取内容 fetcher = ContentFetcher(temp_dir="temp_files") content = fetcher.get_full_content(TEST_URL) if not content: logger.error("无法获取内容") return logger.info(f"获取到内容长度: {len(content)} 字符") # 执行提取 processor = DeepSeekProcessor() # 获取浙江招标文件公示的配置 config_key = "zhejiang:招标文件公示" from config import REGION_CONFIGS if config_key not in REGION_CONFIGS: logger.error(f"未找到配置: {config_key}") return ai_fields = REGION_CONFIGS[config_key]["ai_fields"] logger.info(f"需要提取的字段: {ai_fields}") # 执行提取 extracted = processor.extract_fields(content, ai_fields, "浙江") # 分析结果 logger.info("\n提取结果:") for field, value in extracted.items(): logger.info(f" {field}: {value}") # 特别关注资质要求和业绩要求 for field in ["资质要求", "业绩要求"]: if field in extracted: value = extracted[field] logger.info(f"\n{field}提取结果: {value}") if value != "文档未提及": logger.info(f"✓ {field}提取成功!") else: logger.warning(f"✗ {field}未提取到") if __name__ == "__main__": try: main() finally: # 恢复原始配置 if original_config: sys.modules['config'] = original_config