Files
ztb/analyze_preamble.py
2026-02-13 18:15:20 +08:00

88 lines
3.2 KiB
Python

# -*- coding: utf-8 -*-
"""
分析投标人须知前附表的内容格式,以便优化提示词
"""
import logging
from processors.content_fetcher import ContentFetcher
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 测试网址
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html"
def main():
"""主函数"""
logger.info(f"开始分析: {TEST_URL}")
# 获取内容
fetcher = ContentFetcher(temp_dir="temp_files")
content = fetcher.get_full_content(TEST_URL)
if not content:
logger.error("无法获取网页内容")
return
# 查找投标人须知前附表
if "投标人须知前附表" in content:
logger.info("找到投标人须知前附表")
# 提取前附表内容
start_idx = content.find("投标人须知前附表")
# 查找前附表结束位置(通常是下一个主要章节)
end_markers = ["1. 总则", "投标人须知", "第一章", "第二章"]
end_idx = len(content)
for marker in end_markers:
marker_idx = content.find(marker, start_idx + 100)
if marker_idx > start_idx:
end_idx = min(end_idx, marker_idx)
preamble_content = content[start_idx:end_idx]
logger.info(f"前附表内容长度: {len(preamble_content)} 字符")
# 保存前附表内容到文件
with open("preamble_content.txt", "w", encoding="utf-8") as f:
f.write(preamble_content)
logger.info("前附表内容已保存到 preamble_content.txt")
# 分析前附表中的资质要求和业绩要求
logger.info("\n分析前附表中的关键信息:")
# 查找资质要求
if "资质要求" in preamble_content:
logger.info("前附表中包含资质要求")
# 提取资质要求上下文
qual_start = preamble_content.find("资质要求")
qual_end = preamble_content.find("\n", qual_start + 10)
if qual_end > qual_start:
logger.info(f"资质要求上下文: {preamble_content[qual_start:qual_end]}")
else:
logger.warning("前附表中未找到资质要求")
# 查找业绩要求
if "业绩要求" in preamble_content:
logger.info("前附表中包含业绩要求")
# 提取业绩要求上下文
perf_start = preamble_content.find("业绩要求")
perf_end = preamble_content.find("\n", perf_start + 10)
if perf_end > perf_start:
logger.info(f"业绩要求上下文: {preamble_content[perf_start:perf_end]}")
else:
logger.warning("前附表中未找到业绩要求")
# 查找其他可能的关键词
keywords = ["资格要求", "企业资质", "施工总承包", "类似工程业绩"]
for keyword in keywords:
if keyword in preamble_content:
logger.info(f"前附表中包含: {keyword}")
else:
logger.warning("未找到投标人须知前附表")
if __name__ == "__main__":
main()