# -*- coding: utf-8 -*- """ 测试附件下载和解析功能 """ import logging from processors.content_fetcher import ContentFetcher # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # 测试网址 TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html" def main(): """主函数""" logger.info(f"开始测试附件处理: {TEST_URL}") # 获取内容 fetcher = ContentFetcher(temp_dir="temp_files") content = fetcher.get_full_content(TEST_URL) if not content: logger.error("无法获取内容") return logger.info(f"获取到总内容长度: {len(content)} 字符") # 检查是否包含附件内容 if "=== 附件:" in content: logger.info("内容中包含附件") # 提取附件部分 attachment_parts = content.split("=== 附件:") for i, part in enumerate(attachment_parts[1:], 1): attachment_name = part.split("===")[0].strip() attachment_content = part.split("===")[1].strip() if len(part.split("===")) > 1 else "" logger.info(f"\n附件 {i}: {attachment_name}") logger.info(f"附件内容长度: {len(attachment_content)} 字符") # 检查附件中是否包含资质要求和业绩要求 if "资质要求" in attachment_content: logger.info("✓ 附件中包含资质要求") if "业绩要求" in attachment_content: logger.info("✓ 附件中包含业绩要求") if "投标人须知前附表" in attachment_content: logger.info("✓ 附件中包含投标人须知前附表") else: logger.warning("内容中不包含附件") # 保存完整内容到文件,以便分析 with open("full_content.txt", "w", encoding="utf-8") as f: f.write(content) logger.info("\n完整内容已保存到 full_content.txt") if __name__ == "__main__": main()