Initial commit: 招标信息爬虫与分析系统

This commit is contained in:
ztb-system
2026-02-13 18:15:20 +08:00
commit d2fa06801f
38 changed files with 5415 additions and 0 deletions

73
test_original_config.py Normal file
View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
"""
使用原始config.py测试提取功能
"""
import logging
import sys
import os
# 添加当前目录到模块搜索路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 导入原始配置
from config import REGION_CONFIGS
from processors.content_fetcher import ContentFetcher
from processors.deepseek import DeepSeekProcessor
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 测试网址
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/d2f95295-6cb0-40c9-8023-cdbbf7e660ae.html"
def main():
"""主函数"""
logger.info(f"开始测试: {TEST_URL}")
# 获取内容
fetcher = ContentFetcher(temp_dir="temp_files")
content = fetcher.get_full_content(TEST_URL)
if not content:
logger.error("无法获取内容")
return
logger.info(f"获取到内容长度: {len(content)} 字符")
# 执行提取
processor = DeepSeekProcessor()
# 获取浙江招标文件公示的配置
config_key = "zhejiang:招标文件公示"
if config_key not in REGION_CONFIGS:
logger.error(f"未找到配置: {config_key}")
return
ai_fields = REGION_CONFIGS[config_key]["ai_fields"]
logger.info(f"需要提取的字段: {ai_fields}")
# 执行提取
extracted = processor.extract_fields(content, ai_fields, "浙江")
# 分析结果
logger.info("\n提取结果:")
for field, value in extracted.items():
logger.info(f" {field}: {value}")
# 特别关注资质要求和业绩要求
for field in ["资质要求", "业绩要求"]:
if field in extracted:
value = extracted[field]
logger.info(f"\n{field}提取结果: {value}")
if value != "文档未提及":
logger.info(f"{field}提取成功!")
else:
logger.warning(f"{field}未提取到")
if __name__ == "__main__":
main()