# -*- coding: utf-8 -*- """ 使用原始config.py测试提取功能 """ import logging import sys import os # 添加当前目录到模块搜索路径 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 导入原始配置 from config import REGION_CONFIGS from processors.content_fetcher import ContentFetcher from processors.deepseek import DeepSeekProcessor # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # 测试网址 TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/d2f95295-6cb0-40c9-8023-cdbbf7e660ae.html" def main(): """主函数""" logger.info(f"开始测试: {TEST_URL}") # 获取内容 fetcher = ContentFetcher(temp_dir="temp_files") content = fetcher.get_full_content(TEST_URL) if not content: logger.error("无法获取内容") return logger.info(f"获取到内容长度: {len(content)} 字符") # 执行提取 processor = DeepSeekProcessor() # 获取浙江招标文件公示的配置 config_key = "zhejiang:招标文件公示" if config_key not in REGION_CONFIGS: logger.error(f"未找到配置: {config_key}") return ai_fields = REGION_CONFIGS[config_key]["ai_fields"] logger.info(f"需要提取的字段: {ai_fields}") # 执行提取 extracted = processor.extract_fields(content, ai_fields, "浙江") # 分析结果 logger.info("\n提取结果:") for field, value in extracted.items(): logger.info(f" {field}: {value}") # 特别关注资质要求和业绩要求 for field in ["资质要求", "业绩要求"]: if field in extracted: value = extracted[field] logger.info(f"\n{field}提取结果: {value}") if value != "文档未提及": logger.info(f"✓ {field}提取成功!") else: logger.warning(f"✗ {field}未提取到") if __name__ == "__main__": main()