# -*- coding: utf-8 -*- """ 分析投标人须知前附表的内容格式,以便优化提示词 """ import logging from processors.content_fetcher import ContentFetcher # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # 测试网址 TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html" def main(): """主函数""" logger.info(f"开始分析: {TEST_URL}") # 获取内容 fetcher = ContentFetcher(temp_dir="temp_files") content = fetcher.get_full_content(TEST_URL) if not content: logger.error("无法获取网页内容") return # 查找投标人须知前附表 if "投标人须知前附表" in content: logger.info("找到投标人须知前附表") # 提取前附表内容 start_idx = content.find("投标人须知前附表") # 查找前附表结束位置(通常是下一个主要章节) end_markers = ["1. 总则", "投标人须知", "第一章", "第二章"] end_idx = len(content) for marker in end_markers: marker_idx = content.find(marker, start_idx + 100) if marker_idx > start_idx: end_idx = min(end_idx, marker_idx) preamble_content = content[start_idx:end_idx] logger.info(f"前附表内容长度: {len(preamble_content)} 字符") # 保存前附表内容到文件 with open("preamble_content.txt", "w", encoding="utf-8") as f: f.write(preamble_content) logger.info("前附表内容已保存到 preamble_content.txt") # 分析前附表中的资质要求和业绩要求 logger.info("\n分析前附表中的关键信息:") # 查找资质要求 if "资质要求" in preamble_content: logger.info("前附表中包含资质要求") # 提取资质要求上下文 qual_start = preamble_content.find("资质要求") qual_end = preamble_content.find("\n", qual_start + 10) if qual_end > qual_start: logger.info(f"资质要求上下文: {preamble_content[qual_start:qual_end]}") else: logger.warning("前附表中未找到资质要求") # 查找业绩要求 if "业绩要求" in preamble_content: logger.info("前附表中包含业绩要求") # 提取业绩要求上下文 perf_start = preamble_content.find("业绩要求") perf_end = preamble_content.find("\n", perf_start + 10) if perf_end > perf_start: logger.info(f"业绩要求上下文: {preamble_content[perf_start:perf_end]}") else: logger.warning("前附表中未找到业绩要求") # 查找其他可能的关键词 keywords = ["资格要求", "企业资质", "施工总承包", "类似工程业绩"] for keyword in keywords: if keyword in preamble_content: logger.info(f"前附表中包含: {keyword}") else: logger.warning("未找到投标人须知前附表") if __name__ == "__main__": main()