Files
ztb/test_prompt_optimization.py

86 lines
2.5 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
"""
测试优化后的提示词
"""
import logging
import sys
import os
import requests
from bs4 import BeautifulSoup
import re
# 添加当前目录到模块搜索路径
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
# 导入修复后的配置
from config_fixed import DEEPSEEK_PROMPTS
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# 测试网址(选择一个可能包含资质和业绩要求的网址)
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/d2f95295-6cb0-40c9-8023-cdbbf7e660ae.html"
def get_content(url):
"""获取网页内容"""
try:
response = requests.get(url, timeout=30)
response.encoding = 'utf-8'
soup = BeautifulSoup(response.text, 'html.parser')
# 提取主要内容
content = []
# 查找标题
title = soup.find('h1')
if title:
content.append(title.get_text(strip=True))
# 查找正文内容
content_div = soup.find('div', class_='ewb-article')
if content_div:
for p in content_div.find_all('p'):
text = p.get_text(strip=True)
if text:
content.append(text)
# 查找附件
attachments = soup.find_all('a', href=re.compile(r'\.(pdf|doc|docx)$'))
if attachments:
content.append("\n附件:")
for attachment in attachments:
content.append(f"- {attachment.get_text(strip=True)}: {attachment['href']}")
return "\n".join(content)
except Exception as e:
logging.error(f"获取内容失败: {e}")
return None
def test_prompts():
"""测试优化后的提示词"""
logger.info(f"开始测试提示词优化: {TEST_URL}")
# 获取内容
content = get_content(TEST_URL)
if not content:
logger.error("无法获取内容")
return
logger.info(f"获取到内容长度: {len(content)} 字符")
# 测试关键字段的提示词
test_fields = ["资质要求", "业绩要求"]
for field in test_fields:
logger.info(f"\n=== 测试 {field} 提示词 ===")
if field in DEEPSEEK_PROMPTS:
prompt = DEEPSEEK_PROMPTS[field]
logger.info(f"提示词长度: {len(prompt)} 字符")
logger.info(f"提示词内容预览: {prompt[:500]}...")
# 检查内容中