# -*- coding: utf-8 -*- """ 测试优化后的提示词 """ import logging import sys import os import requests from bs4 import BeautifulSoup import re # 添加当前目录到模块搜索路径 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) # 导入修复后的配置 from config_fixed import DEEPSEEK_PROMPTS # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # 测试网址(选择一个可能包含资质和业绩要求的网址) TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/d2f95295-6cb0-40c9-8023-cdbbf7e660ae.html" def get_content(url): """获取网页内容""" try: response = requests.get(url, timeout=30) response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') # 提取主要内容 content = [] # 查找标题 title = soup.find('h1') if title: content.append(title.get_text(strip=True)) # 查找正文内容 content_div = soup.find('div', class_='ewb-article') if content_div: for p in content_div.find_all('p'): text = p.get_text(strip=True) if text: content.append(text) # 查找附件 attachments = soup.find_all('a', href=re.compile(r'\.(pdf|doc|docx)$')) if attachments: content.append("\n附件:") for attachment in attachments: content.append(f"- {attachment.get_text(strip=True)}: {attachment['href']}") return "\n".join(content) except Exception as e: logging.error(f"获取内容失败: {e}") return None def test_prompts(): """测试优化后的提示词""" logger.info(f"开始测试提示词优化: {TEST_URL}") # 获取内容 content = get_content(TEST_URL) if not content: logger.error("无法获取内容") return logger.info(f"获取到内容长度: {len(content)} 字符") # 测试关键字段的提示词 test_fields = ["资质要求", "业绩要求"] for field in test_fields: logger.info(f"\n=== 测试 {field} 提示词 ===") if field in DEEPSEEK_PROMPTS: prompt = DEEPSEEK_PROMPTS[field] logger.info(f"提示词长度: {len(prompt)} 字符") logger.info(f"提示词内容预览: {prompt[:500]}...") # 检查内容中