88 lines
3.2 KiB
Python
88 lines
3.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
分析投标人须知前附表的内容格式,以便优化提示词
|
|
"""
|
|
import logging
|
|
from processors.content_fetcher import ContentFetcher
|
|
|
|
# 配置日志
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 测试网址
|
|
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/9a7966d8-80f4-475b-897e-f7631bc64d0c.html"
|
|
|
|
def main():
|
|
"""主函数"""
|
|
logger.info(f"开始分析: {TEST_URL}")
|
|
|
|
# 获取内容
|
|
fetcher = ContentFetcher(temp_dir="temp_files")
|
|
content = fetcher.get_full_content(TEST_URL)
|
|
|
|
if not content:
|
|
logger.error("无法获取网页内容")
|
|
return
|
|
|
|
# 查找投标人须知前附表
|
|
if "投标人须知前附表" in content:
|
|
logger.info("找到投标人须知前附表")
|
|
|
|
# 提取前附表内容
|
|
start_idx = content.find("投标人须知前附表")
|
|
# 查找前附表结束位置(通常是下一个主要章节)
|
|
end_markers = ["1. 总则", "投标人须知", "第一章", "第二章"]
|
|
end_idx = len(content)
|
|
|
|
for marker in end_markers:
|
|
marker_idx = content.find(marker, start_idx + 100)
|
|
if marker_idx > start_idx:
|
|
end_idx = min(end_idx, marker_idx)
|
|
|
|
preamble_content = content[start_idx:end_idx]
|
|
logger.info(f"前附表内容长度: {len(preamble_content)} 字符")
|
|
|
|
# 保存前附表内容到文件
|
|
with open("preamble_content.txt", "w", encoding="utf-8") as f:
|
|
f.write(preamble_content)
|
|
logger.info("前附表内容已保存到 preamble_content.txt")
|
|
|
|
# 分析前附表中的资质要求和业绩要求
|
|
logger.info("\n分析前附表中的关键信息:")
|
|
|
|
# 查找资质要求
|
|
if "资质要求" in preamble_content:
|
|
logger.info("前附表中包含资质要求")
|
|
# 提取资质要求上下文
|
|
qual_start = preamble_content.find("资质要求")
|
|
qual_end = preamble_content.find("\n", qual_start + 10)
|
|
if qual_end > qual_start:
|
|
logger.info(f"资质要求上下文: {preamble_content[qual_start:qual_end]}")
|
|
else:
|
|
logger.warning("前附表中未找到资质要求")
|
|
|
|
# 查找业绩要求
|
|
if "业绩要求" in preamble_content:
|
|
logger.info("前附表中包含业绩要求")
|
|
# 提取业绩要求上下文
|
|
perf_start = preamble_content.find("业绩要求")
|
|
perf_end = preamble_content.find("\n", perf_start + 10)
|
|
if perf_end > perf_start:
|
|
logger.info(f"业绩要求上下文: {preamble_content[perf_start:perf_end]}")
|
|
else:
|
|
logger.warning("前附表中未找到业绩要求")
|
|
|
|
# 查找其他可能的关键词
|
|
keywords = ["资格要求", "企业资质", "施工总承包", "类似工程业绩"]
|
|
for keyword in keywords:
|
|
if keyword in preamble_content:
|
|
logger.info(f"前附表中包含: {keyword}")
|
|
else:
|
|
logger.warning("未找到投标人须知前附表")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|