86 lines
2.5 KiB
Python
86 lines
2.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
测试优化后的提示词
|
|
"""
|
|
import logging
|
|
import sys
|
|
import os
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import re
|
|
|
|
# 添加当前目录到模块搜索路径
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
# 导入修复后的配置
|
|
from config_fixed import DEEPSEEK_PROMPTS
|
|
|
|
# 配置日志
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 测试网址(选择一个可能包含资质和业绩要求的网址)
|
|
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/d2f95295-6cb0-40c9-8023-cdbbf7e660ae.html"
|
|
|
|
def get_content(url):
|
|
"""获取网页内容"""
|
|
try:
|
|
response = requests.get(url, timeout=30)
|
|
response.encoding = 'utf-8'
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
# 提取主要内容
|
|
content = []
|
|
|
|
# 查找标题
|
|
title = soup.find('h1')
|
|
if title:
|
|
content.append(title.get_text(strip=True))
|
|
|
|
# 查找正文内容
|
|
content_div = soup.find('div', class_='ewb-article')
|
|
if content_div:
|
|
for p in content_div.find_all('p'):
|
|
text = p.get_text(strip=True)
|
|
if text:
|
|
content.append(text)
|
|
|
|
# 查找附件
|
|
attachments = soup.find_all('a', href=re.compile(r'\.(pdf|doc|docx)$'))
|
|
if attachments:
|
|
content.append("\n附件:")
|
|
for attachment in attachments:
|
|
content.append(f"- {attachment.get_text(strip=True)}: {attachment['href']}")
|
|
|
|
return "\n".join(content)
|
|
except Exception as e:
|
|
logging.error(f"获取内容失败: {e}")
|
|
return None
|
|
|
|
def test_prompts():
|
|
"""测试优化后的提示词"""
|
|
logger.info(f"开始测试提示词优化: {TEST_URL}")
|
|
|
|
# 获取内容
|
|
content = get_content(TEST_URL)
|
|
|
|
if not content:
|
|
logger.error("无法获取内容")
|
|
return
|
|
|
|
logger.info(f"获取到内容长度: {len(content)} 字符")
|
|
|
|
# 测试关键字段的提示词
|
|
test_fields = ["资质要求", "业绩要求"]
|
|
|
|
for field in test_fields:
|
|
logger.info(f"\n=== 测试 {field} 提示词 ===")
|
|
if field in DEEPSEEK_PROMPTS:
|
|
prompt = DEEPSEEK_PROMPTS[field]
|
|
logger.info(f"提示词长度: {len(prompt)} 字符")
|
|
logger.info(f"提示词内容预览: {prompt[:500]}...")
|
|
|
|
# 检查内容中 |