Initial commit: 招标信息爬虫与分析系统

This commit is contained in:
ztb-system
2026-02-13 18:15:20 +08:00
commit d2fa06801f
38 changed files with 5415 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
"""
测试附件下载和解析功能
"""
import logging
from processors.content_fetcher import ContentFetcher
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 测试网址
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html"
def main():
"""主函数"""
logger.info(f"开始测试附件处理: {TEST_URL}")
# 获取内容
fetcher = ContentFetcher(temp_dir="temp_files")
content = fetcher.get_full_content(TEST_URL)
if not content:
logger.error("无法获取内容")
return
logger.info(f"获取到总内容长度: {len(content)} 字符")
# 检查是否包含附件内容
if "=== 附件:" in content:
logger.info("内容中包含附件")
# 提取附件部分
attachment_parts = content.split("=== 附件:")
for i, part in enumerate(attachment_parts[1:], 1):
attachment_name = part.split("===")[0].strip()
attachment_content = part.split("===")[1].strip() if len(part.split("===")) > 1 else ""
logger.info(f"\n附件 {i}: {attachment_name}")
logger.info(f"附件内容长度: {len(attachment_content)} 字符")
# 检查附件中是否包含资质要求和业绩要求
if "资质要求" in attachment_content:
logger.info("✓ 附件中包含资质要求")
if "业绩要求" in attachment_content:
logger.info("✓ 附件中包含业绩要求")
if "投标人须知前附表" in attachment_content:
logger.info("✓ 附件中包含投标人须知前附表")
else:
logger.warning("内容中不包含附件")
# 保存完整内容到文件,以便分析
with open("full_content.txt", "w", encoding="utf-8") as f:
f.write(content)
logger.info("\n完整内容已保存到 full_content.txt")
if __name__ == "__main__":
main()