Initial commit: 招标信息爬虫与分析系统
This commit is contained in:
86
test_prompt_optimization.py
Normal file
86
test_prompt_optimization.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试优化后的提示词
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
# 添加当前目录到模块搜索路径
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
# 导入修复后的配置
|
||||
from config_fixed import DEEPSEEK_PROMPTS
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 测试网址(选择一个可能包含资质和业绩要求的网址)
|
||||
TEST_URL = "https://ggzy.zj.gov.cn/jyxxgk/002001/002001011/20260212/d2f95295-6cb0-40c9-8023-cdbbf7e660ae.html"
|
||||
|
||||
def get_content(url):
|
||||
"""获取网页内容"""
|
||||
try:
|
||||
response = requests.get(url, timeout=30)
|
||||
response.encoding = 'utf-8'
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# 提取主要内容
|
||||
content = []
|
||||
|
||||
# 查找标题
|
||||
title = soup.find('h1')
|
||||
if title:
|
||||
content.append(title.get_text(strip=True))
|
||||
|
||||
# 查找正文内容
|
||||
content_div = soup.find('div', class_='ewb-article')
|
||||
if content_div:
|
||||
for p in content_div.find_all('p'):
|
||||
text = p.get_text(strip=True)
|
||||
if text:
|
||||
content.append(text)
|
||||
|
||||
# 查找附件
|
||||
attachments = soup.find_all('a', href=re.compile(r'\.(pdf|doc|docx)$'))
|
||||
if attachments:
|
||||
content.append("\n附件:")
|
||||
for attachment in attachments:
|
||||
content.append(f"- {attachment.get_text(strip=True)}: {attachment['href']}")
|
||||
|
||||
return "\n".join(content)
|
||||
except Exception as e:
|
||||
logging.error(f"获取内容失败: {e}")
|
||||
return None
|
||||
|
||||
def test_prompts():
|
||||
"""测试优化后的提示词"""
|
||||
logger.info(f"开始测试提示词优化: {TEST_URL}")
|
||||
|
||||
# 获取内容
|
||||
content = get_content(TEST_URL)
|
||||
|
||||
if not content:
|
||||
logger.error("无法获取内容")
|
||||
return
|
||||
|
||||
logger.info(f"获取到内容长度: {len(content)} 字符")
|
||||
|
||||
# 测试关键字段的提示词
|
||||
test_fields = ["资质要求", "业绩要求"]
|
||||
|
||||
for field in test_fields:
|
||||
logger.info(f"\n=== 测试 {field} 提示词 ===")
|
||||
if field in DEEPSEEK_PROMPTS:
|
||||
prompt = DEEPSEEK_PROMPTS[field]
|
||||
logger.info(f"提示词长度: {len(prompt)} 字符")
|
||||
logger.info(f"提示词内容预览: {prompt[:500]}...")
|
||||
|
||||
# 检查内容中
|
||||
Reference in New Issue
Block a user