初始化医疗报告生成项目,添加核心代码文件

This commit is contained in:
2026-02-13 18:32:52 +08:00
commit faaf2158d4
69 changed files with 29836 additions and 0 deletions

View File

View File

@@ -0,0 +1,801 @@
import os
import tempfile
from pathlib import Path
from typing import List, Dict, Any
from datetime import datetime
# 导入 DeepSeek 健康内容生成服务
from services.deepseek_health_service import DeepSeekHealthService
class BatchReportService:
"""批量报告处理服务"""
def __init__(self, ocr_service, llm_service, pdf_service, template_service):
self.ocr_service = ocr_service
self.llm_service = llm_service
self.pdf_service = pdf_service
self.template_service = template_service
# 初始化 DeepSeek 健康内容生成服务
self.deepseek_health_service = DeepSeekHealthService()
# 临时文件目录
self.temp_dir = Path(tempfile.gettempdir()) / "medical_reports_temp"
self.temp_dir.mkdir(exist_ok=True)
def process_multiple_reports(
self,
file_paths: List[str],
patient_name: str = "患者"
) -> Dict[str, Any]:
"""
处理多个报告文件并生成综合健康报告
新流程:直接将文件传给 Coze 工作流处理
Args:
file_paths: 临时上传的多个PDF文件路径列表
patient_name: 患者姓名
Returns:
包含分析结果和生成的PDF路径的字典
"""
try:
print(f"正在处理 {len(file_paths)} 份报告...")
# 准备文件信息列表
file_infos = []
for idx, file_path in enumerate(file_paths, 1):
filename = Path(file_path).name
print(f" [{idx}/{len(file_paths)}] 准备文件: {filename}")
file_infos.append({
"filename": filename,
"filepath": file_path
})
# 调用分析(会根据 LLM 类型选择不同的处理方式)
print("正在进行综合分析...")
combined_analysis = self._analyze_with_files(file_infos)
# 使用 DeepSeek 生成健康评估和建议内容
if self.deepseek_health_service.is_available():
health_content = self.deepseek_health_service.generate_health_content(combined_analysis)
if health_content:
combined_analysis["health_assessment"] = health_content.get("health_assessment", {})
combined_analysis["health_advice"] = health_content.get("health_advice", {})
# 更新异常项DeepSeek 可能提供更详细的信息)
if health_content.get("abnormal_items"):
combined_analysis["abnormal_items_detailed"] = health_content["abnormal_items"]
else:
print("\n ⚠️ DeepSeek API Key 未配置,跳过健康评估和建议生成")
# 生成综合报告 PDF
print("\n正在生成健康报告...")
pdf_path = self._generate_comprehensive_report(
patient_name=patient_name,
reports=file_infos,
analysis=combined_analysis
)
# 清理临时文件
print("正在清理临时文件...")
self._cleanup_temp_files(file_paths)
return {
"success": True,
"patient_name": patient_name,
"report_count": len(file_paths),
"analysis": combined_analysis,
"pdf_path": pdf_path,
"generated_at": datetime.now().isoformat()
}
except Exception as e:
# 即使出错也要清理临时文件
self._cleanup_temp_files(file_paths)
raise Exception(f"批量处理失败: {str(e)}")
def _analyze_with_files(self, file_infos: List[Dict[str, str]]) -> Dict[str, Any]:
"""
综合分析流程(两阶段处理):
1. OCR 提取所有文件的文本
2. Coze 分析文本 → 返回 JSON
3. Ollama 处理 Coze JSON → 生成 Be.U 风格报告
"""
# 第1步OCR 提取文本
print(" [步骤1] OCR 提取文本...")
extracted_texts = []
for idx, file_info in enumerate(file_infos, 1):
print(f" [{idx}/{len(file_infos)}] 识别: {file_info['filename']}")
text = self.ocr_service.extract_text(file_info["filepath"])
extracted_texts.append({
"filename": file_info["filename"],
"text": text
})
# 第2-3步LLM 分析Coze → Ollama 或 纯 Ollama
print(" [步骤2-3] 综合分析与报告生成...")
return self._analyze_combined_reports(extracted_texts)
def _analyze_with_coze_files(self, file_infos: List[Dict[str, str]]) -> Dict[str, Any]:
"""
使用 Coze 文件上传 + 工作流处理
1. 上传文件到 Coze 获取 file_id
2. 分批调用工作流(每批最多 3 个文件)
3. 合并结果
"""
import requests
import json
import time
api_key = os.getenv("COZE_API_KEY")
workflow_id = os.getenv("COZE_WORKFLOW_ID")
if not api_key or not workflow_id:
raise ValueError("未配置 Coze API 所需的 COZE_API_KEY 或 COZE_WORKFLOW_ID")
# 第1步上传所有文件获取 file_id
file_ids = []
for idx, file_info in enumerate(file_infos, 1):
print(f" [{idx}/{len(file_infos)}] 上传: {file_info['filename']}")
try:
file_id = self._upload_file_to_coze(
file_path=file_info['filepath'],
api_key=api_key
)
file_ids.append({
"filename": file_info['filename'],
"file_id": file_id
})
print(f" ✓ File ID: {file_id}")
except Exception as e:
print(f" ✗ 上传失败: {e}")
raise Exception(f"文件上传失败: {file_info['filename']}, {e}")
# 第2步一次性调用工作流处理所有文件
print(f"\n [步骤2] 调用 Coze 工作流分析 {len(file_ids)} 个文件...")
# 构造请求参数input 是字符串数组,每个元素是 JSON 字符串
input_params = []
for file_data in file_ids:
# 每个元素是 JSON 字符串格式:"{\"file_id\":\"xxx\"}"
json_str = json.dumps({"file_id": file_data["file_id"]}, ensure_ascii=False)
input_params.append(json_str)
print(f" - {file_data['filename']}: {file_data['file_id']}")
# 调用工作流
try:
final_result = self._call_coze_workflow(
workflow_id=workflow_id,
api_key=api_key,
input_params=input_params
)
print(f" ✓ 工作流处理完成")
except Exception as e:
print(f" ✗ 工作流调用失败: {e}")
raise
# 保存结果缓存
try:
cache_file = Path("coze_result_cache.json")
cache_data = {
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
"report_count": len(file_ids),
"coze_result": final_result,
"file_ids": file_ids
}
cache_file.write_text(json.dumps(cache_data, ensure_ascii=False, indent=2), encoding='utf-8')
print(f" → Coze 结果已缓存到: {cache_file.absolute()}")
except Exception as e:
print(f" ⚠️ 缓存保存失败: {e}")
return final_result
def _upload_file_to_coze(self, file_path: str, api_key: str) -> str:
"""
上传文件到 Coze 获取 file_id
"""
import requests
upload_url = "https://api.coze.cn/v1/files/upload"
headers = {
"Authorization": f"Bearer {api_key}"
}
with open(file_path, 'rb') as f:
files = {
'file': (Path(file_path).name, f, 'application/octet-stream')
}
response = requests.post(
upload_url,
headers=headers,
files=files,
timeout=60
)
if response.status_code != 200:
raise Exception(f"上传失败 (HTTP {response.status_code}): {response.text}")
data = response.json()
# 解析返回的 file_id
if data.get("code") == 0 and data.get("data"):
file_id = data["data"].get("id") or data["data"].get("file_id")
if file_id:
return file_id
raise Exception(f"未能获取 file_id: {data}")
def _call_coze_workflow(self, workflow_id: str, api_key: str, input_params: List[str]) -> Dict[str, Any]:
"""
调用 Coze 工作流(使用流式接口)
input_params: 字符串数组,每个元素是 JSON 字符串格式的 file_id
"""
from cozepy import Coze, TokenAuth, COZE_CN_BASE_URL, WorkflowEventType
# 初始化 Coze 客户端
coze = Coze(auth=TokenAuth(token=api_key), base_url=COZE_CN_BASE_URL)
print(f" → 调用工作流 (file_id 数量: {len(input_params)})...")
# 调用流式工作流
import time as time_module
start = time_module.time()
stream = coze.workflows.stream_run(
workflow_id=workflow_id,
parameters={"input": input_params}
)
content_result = None
for event in stream:
if event.event == WorkflowEventType.MESSAGE:
if hasattr(event, 'message'):
msg = event.message
node_title = getattr(msg, 'node_title', None)
node_is_finish = getattr(msg, 'node_is_finish', None)
content = getattr(msg, 'content', None)
if node_title == "End" and node_is_finish and content:
content_result = content
break
elif event.event == WorkflowEventType.ERROR:
error_msg = str(event.error) if hasattr(event, 'error') else "Unknown error"
raise Exception(f"工作流执行错误: {error_msg}")
elapsed = time_module.time() - start
if not content_result:
raise Exception("未获取到工作流执行结果")
print(f" ✓ 工作流完成 (耗时: {elapsed:.1f}秒)")
# 解析结果
import json
try:
if isinstance(content_result, str):
result_data = json.loads(content_result)
else:
result_data = content_result
# 提取 output
if isinstance(result_data, dict) and "output" in result_data:
output = result_data["output"]
if isinstance(output, str):
output = json.loads(output)
return output
return result_data
except json.JSONDecodeError as e:
raise Exception(f"解析工作流结果失败: {e}")
def _call_coze_with_files(self, file_infos: List[Dict[str, str]]) -> Dict[str, Any]:
"""
直接将文件传给 Coze 工作流处理
"""
try:
import requests
import json
# 读取 Coze 配置
api_url = os.getenv("COZE_API_URL", "https://api.coze.cn/v1/workflow/run")
api_key = os.getenv("COZE_API_KEY")
workflow_id = os.getenv("COZE_WORKFLOW_ID")
max_retries = int(os.getenv("COZE_MAX_RETRIES", "3"))
if not api_key or not workflow_id:
raise ValueError("未配置 Coze API 所需的 COZE_API_KEY 或 COZE_WORKFLOW_ID")
# 准备文件数据(根据 Coze API 要求构造)
# 如果 Coze 需要文件内容,读取并转换为 base64
files_data = []
for file_info in file_infos:
filepath = file_info["filepath"]
filename = file_info["filename"]
# 读取文件内容并转换为 base64
with open(filepath, 'rb') as f:
import base64
file_content = base64.b64encode(f.read()).decode('utf-8')
files_data.append({
"filename": filename,
"content": file_content,
"type": "application/pdf" if filename.endswith('.pdf') else "image/jpeg"
})
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
# 构造请求体
payload = {
"workflow_id": workflow_id,
"parameters": {
"input": files_data # 传递文件数组
}
}
print(f" 正在调用 Coze 工作流处理 {len(files_data)} 个文件...")
last_error = None
for attempt in range(max_retries):
try:
response = requests.post(api_url, headers=headers, json=payload, timeout=300)
response.raise_for_status()
data = response.json()
# 解析 Coze 返回结果
if isinstance(data, dict) and data.get("code") == 0:
raw_data = data.get("data", {})
if isinstance(raw_data, str):
try:
raw_data = json.loads(raw_data)
except json.JSONDecodeError:
pass
output = raw_data.get("output", raw_data)
# 使用 Ollama 增强 Coze 结果
print(" [阶段2] 使用 Ollama 优化报告内容...")
final_result = self._enhance_with_ollama(output, f"处理了 {len(files_data)} 个文件")
return final_result
last_error = f"Coze API 返回非0 code: {data}"
except Exception as e:
last_error = str(e)
if attempt < max_retries - 1:
wait_time = (attempt + 1) * 3
print(f" 重试 {attempt + 1}/{max_retries}...")
import time
time.sleep(wait_time)
else:
break
raise Exception(last_error or "Coze API 调用失败")
except Exception as e:
print(f" ⚠ Coze 处理失败: {e}")
# 降级到 OCR + Ollama 方式
print(" 降级到 OCR + Ollama 处理...")
extracted_texts = []
for file_info in file_infos:
text = self.ocr_service.extract_text(file_info["filepath"])
extracted_texts.append({
"filename": file_info["filename"],
"text": text
})
return self._analyze_combined_reports(extracted_texts)
def _analyze_combined_reports(self, reports: List[Dict[str, str]]) -> Dict[str, Any]:
"""
综合分析流程(两阶段处理):
- 如果使用 Coze
阶段1: Coze 工作流处理数据 → 返回结构化 JSON
阶段2: Ollama 分析 JSON → 生成适配 Be.U 模板的专业报告
- 如果使用其他LLM
直接使用该 LLM 生成报告
"""
if self.llm_service.llm_type == "coze":
# === 两阶段处理Coze + Ollama ===
# 【阶段1】Coze 工作流分析
print(" [阶段1/2] Coze 工作流分析中...")
print(f" - 处理 {len(reports)} 份报告")
# Coze 工作流有执行超时限制,超过阈值时分批处理
BATCH_SIZE = 3 # 每批最多 3 个报告
import json
# 原始文本列表(给 Ollama 使用)
original_texts: List[str] = []
# 所有批次的 Coze 结果
all_coze_results: List[Dict[str, Any]] = []
# 分批处理
total_batches = (len(reports) + BATCH_SIZE - 1) // BATCH_SIZE
if total_batches > 1:
print(f" - 报告数量较多,将分 {total_batches} 批处理(每批 {BATCH_SIZE} 个)")
for batch_idx in range(total_batches):
start_idx = batch_idx * BATCH_SIZE
end_idx = min(start_idx + BATCH_SIZE, len(reports))
batch_reports = reports[start_idx:end_idx]
if total_batches > 1:
print(f"\n [批次 {batch_idx + 1}/{total_batches}] 处理报告 {start_idx + 1}-{end_idx}")
# 准备当前批次的数据
coze_inputs: List[Dict[str, str]] = []
for report in batch_reports:
filename = report["filename"]
text = report["text"]
# 保留一份可读的原始文本
original_text = f"【文件名】{filename}\n【内容】\n{text}"
original_texts.append(original_text)
# 构造传给 Coze 的 JSON 对象
coze_obj = {
"filename": filename,
"text": text,
}
coze_inputs.append(coze_obj)
print(f" - {filename}: {len(text)} 字符")
print(f" - 本批次元素个数: {len(coze_inputs)}")
# 保存当前批次的调试信息
if total_batches > 1:
debug_file = Path(f"debug_batch_{batch_idx + 1}.json")
else:
debug_file = Path("debug_ocr_texts.json")
try:
final_payload = {
"workflow_id": os.getenv("COZE_WORKFLOW_ID", ""),
"parameters": {
"input": coze_inputs
}
}
debug_file.write_text(json.dumps(final_payload, ensure_ascii=False, indent=2), encoding='utf-8')
print(f" → Payload 已保存: {debug_file.name}")
except Exception as e:
print(f" ⚠️ 保存调试文件失败: {e}")
# 调用 Coze 处理当前批次
print(f" → 调用 Coze 工作流...")
batch_result = self.llm_service.analyze_multiple_reports(coze_inputs)
# 检查当前批次是否成功
if batch_result.get("error"):
error_msg = batch_result.get('error')
print(f" ✗ 批次 {batch_idx + 1} 失败: {error_msg}")
raise Exception(f"Coze 工作流调用失败: {error_msg}")
print(f" ✓ 批次 {batch_idx + 1} 完成")
all_coze_results.append(batch_result)
# 合并所有批次的结果
print(f"\n ✓ 所有批次处理完成,合并结果...")
coze_result = self._merge_batch_results(all_coze_results)
# 保存 Coze 返回结果用于后续测试
try:
import time
cache_file = Path("coze_result_cache.json")
cache_data = {
"timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
"report_count": len(reports),
"coze_result": coze_result,
"original_texts": original_texts
}
cache_file.write_text(json.dumps(cache_data, ensure_ascii=False, indent=2), encoding='utf-8')
print(f" → Coze 结果已缓存到: {cache_file.absolute()}")
except Exception as e:
print(f" ⚠️ 缓存保存失败: {e}")
# 【阶段2】Ollama 优化生成
print(" [阶段2/2] Ollama 生成 Be.U 风格报告...")
print(" - 将 Coze JSON 转换为专业报告内容")
print(" - 适配 Be.U Wellness Center 模板")
# 合并原始文本供 Ollama 参考(仍然使用人类可读的文本,而不是 JSON 字符串)
combined_text = "\n\n".join(original_texts)
final_analysis = self._enhance_with_ollama(coze_result, combined_text)
print(" ✓ 综合报告生成完成")
return final_analysis
else:
# === 单阶段:直接使用当前 LLM ===
print(f" 使用 {self.llm_service.llm_type} 直接生成报告...")
print(f" - 处理 {len(reports)} 份报告")
# 合并所有报告文本
combined_text = "\n\n=== 报告分隔 ===\n\n".join([
f"【文件名】{report['filename']}\n【内容】\n{report['text']}"
for report in reports
])
analysis = self.llm_service.analyze_single_report(combined_text)
print(" ✓ 报告生成完成")
return analysis
def _merge_batch_results(self, batch_results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
合并多个批次的 Coze 结果
"""
if len(batch_results) == 1:
return batch_results[0]
print(f" - 合并 {len(batch_results)} 个批次的结果...")
# 合并结果
merged = {
"summary": "",
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "",
"recommendations": []
}
# 收集所有字段
summaries = []
risk_assessments = []
for idx, result in enumerate(batch_results, 1):
# 摘要
if result.get("summary"):
summaries.append(f"批次{idx}: {result['summary']}")
# 关键发现
if result.get("key_findings"):
merged["key_findings"].extend(result["key_findings"])
# 异常指标
if result.get("abnormal_items"):
merged["abnormal_items"].extend(result["abnormal_items"])
# 风险评估
if result.get("risk_assessment"):
risk_assessments.append(f"批次{idx}: {result['risk_assessment']}")
# 建议
if result.get("recommendations"):
merged["recommendations"].extend(result["recommendations"])
# 合并摘要和风险评估
merged["summary"] = "\n\n".join(summaries) if summaries else "未提供摘要"
merged["risk_assessment"] = "\n\n".join(risk_assessments) if risk_assessments else "未提供风险评估"
print(f" - 合并后: 关键发现 {len(merged['key_findings'])} 项, "
f"异常指标 {len(merged['abnormal_items'])} 项, "
f"建议 {len(merged['recommendations'])}")
return merged
def _enhance_with_ollama(self, coze_result: Dict[str, Any], original_text: str) -> Dict[str, Any]:
"""
使用 Ollama 分析 Coze 返回的 JSON生成适配 Be.U 模板的最终报告内容
"""
try:
import requests
import json
# 构建给 Ollama 的提示词
prompt = f"""你是一位专业的医疗报告撰写专家。现在需要基于 Coze 工作流返回的结构化数据,生成一份适合 Be.U Wellness Center 风格的功能医学健康报告。
Coze 工作流返回的数据:
{json.dumps(coze_result, ensure_ascii=False, indent=2)}
原始检测报告文本:
{original_text}
请基于以上信息生成一份专业的综合健康报告包含以下部分JSON格式
1. summary: 综合健康摘要(整体评估,语言专业且易懂)
2. key_findings: 关键发现列表(提取最重要的检测结果)
3. abnormal_items: 异常指标详情(包含 name, value, reference, level
4. risk_assessment: 健康风险评估(基于所有指标的综合分析)
5. recommendations: 个性化健康建议(具体可执行的建议)
要求:
- 语言专业但易于理解
- 突出重点和异常项
- 提供可操作的健康建议
- 使用 Be.U Wellness Center 的专业风格
- 必须返回完整的 JSON 格式
请直接返回 JSON不要有其他文字"""
# 调用 Ollama
ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434")
ollama_model = os.getenv("OLLAMA_MODEL", "qwen2.5:7b")
response = requests.post(
f"{ollama_host}/api/generate",
json={
"model": ollama_model,
"prompt": prompt,
"stream": False
},
timeout=300
)
if response.status_code == 200:
content = response.json().get("response", "")
# 解析 Ollama 返回的 JSON
return self._parse_ollama_response(content)
else:
print(f" ⚠ Ollama 调用失败,使用 Coze 原始结果")
return coze_result
except Exception as e:
print(f" ⚠ Ollama 增强失败: {e},使用 Coze 原始结果")
return coze_result
def _parse_ollama_response(self, response: str) -> Dict[str, Any]:
"""解析 Ollama 返回的 JSON"""
try:
import re
import json
# 尝试提取 JSON
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response, re.DOTALL)
if json_match:
json_str = json_match.group(1)
else:
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
else:
json_str = response
result = json.loads(json_str)
# 确保必需字段存在
required_fields = ["summary", "key_findings", "abnormal_items", "risk_assessment", "recommendations"]
for field in required_fields:
if field not in result:
result[field] = [] if field in ["key_findings", "abnormal_items", "recommendations"] else "未提供"
return result
except Exception as e:
print(f" ⚠ JSON 解析失败: {e}")
return {
"summary": "解析失败",
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "无法生成",
"recommendations": []
}
def _direct_ollama_analysis(self, combined_text: str) -> Dict[str, Any]:
"""
Coze 失败后的降级方案:直接使用 Ollama 生成完整报告
"""
try:
import requests
import json
print(" → 使用 Ollama 模型生成完整报告...")
# 构建 Ollama 提示词
prompt = f"""你是一位专业的医疗报告分析助手。请分析以下医疗报告,提供专业的综合健康评估。
医疗报告内容:
{combined_text}
请按以下 JSON 格式返回分析结果:
{{
"summary": "综合健康摘要2-3句话",
"key_findings": ["关键发现1", "关键发现2", "..."],
"abnormal_items": [
{{
"name": "指标名称",
"result": "测量值",
"reference": "参考范围",
"level": "high/low/normal"
}}
],
"risk_assessment": "健康风险评估(综合说明)",
"recommendations": ["建议1", "建议2", "..."]
}}
请严格按照 JSON 格式返回,不要添加其他说明文字。"""
# 调用 Ollama
response = requests.post(
"http://localhost:11434/api/generate",
json={
"model": "qwen2.5:7b",
"prompt": prompt,
"stream": False
},
timeout=180
)
if response.status_code == 200:
ollama_response = response.json().get("response", "")
print(f" ✓ Ollama 响应完成")
# 解析 JSON
import re
json_match = re.search(r'\{.*\}', ollama_response, re.DOTALL)
if json_match:
result = json.loads(json_match.group(0))
# 确保必需字段存在
required_fields = ["summary", "key_findings", "abnormal_items", "risk_assessment", "recommendations"]
for field in required_fields:
if field not in result:
result[field] = [] if field in ["key_findings", "abnormal_items", "recommendations"] else "未提供"
return result
else:
raise ValueError("无法解析 Ollama 返回的 JSON")
else:
raise Exception(f"Ollama API 返回错误: {response.status_code}")
except Exception as e:
print(f" ⚠️ Ollama 降级方案也失败: {e}")
return {
"summary": "由于系统问题,暂时无法生成完整分析",
"key_findings": ["OCR 文本提取完成", "分析服务暂时不可用"],
"abnormal_items": [],
"risk_assessment": "建议稍后重试或使用其他方式分析",
"recommendations": ["联系技术支持", "检查系统配置"]
}
def _generate_comprehensive_report(
self,
patient_name: str,
reports: List[Dict[str, str]],
analysis: Dict[str, Any]
) -> str:
"""生成综合健康报告 PDF"""
# 准备扩展的模板数据
template_data = {
"patient_name": patient_name,
"report_count": len(reports),
"report_list": [r["filename"] for r in reports],
"analysis": analysis,
"generation_date": datetime.now().strftime("%Y年%m月%d")
}
# 生成 PDF使用增强的模板
pdf_path = self.pdf_service.generate_comprehensive_report(
patient_name=patient_name,
template_data=template_data
)
return pdf_path
def _cleanup_temp_files(self, file_paths: List[str]):
"""清理临时文件"""
for file_path in file_paths:
try:
if os.path.exists(file_path):
os.remove(file_path)
print(f" ✓ 已删除临时文件: {Path(file_path).name}")
except Exception as e:
print(f" ⚠ 删除临时文件失败 {Path(file_path).name}: {e}")

View File

@@ -0,0 +1,135 @@
import json
import os
from pathlib import Path
from typing import Dict, Any, Optional
from datetime import datetime
import threading
class DataStore:
"""数据持久化存储服务"""
def __init__(self, storage_dir: str = "data"):
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(exist_ok=True)
# 数据文件路径
self.data_file = self.storage_dir / "reports_data.json"
# 内存缓存
self._cache: Dict[str, Any] = {}
# 线程锁,防止并发写入冲突
self._lock = threading.Lock()
# 启动时加载数据
self._load_data()
def _load_data(self):
"""从文件加载数据"""
if self.data_file.exists():
try:
with open(self.data_file, 'r', encoding='utf-8') as f:
self._cache = json.load(f)
print(f"✓ 成功加载 {len(self._cache)} 份报告数据")
except Exception as e:
print(f"⚠ 加载数据失败: {e},将使用空数据")
self._cache = {}
else:
print("✓ 数据文件不存在,将创建新文件")
self._cache = {}
def _save_data(self):
"""保存数据到文件"""
try:
with self._lock:
# 创建临时文件,避免写入过程中断导致数据损坏
temp_file = self.data_file.with_suffix('.json.tmp')
with open(temp_file, 'w', encoding='utf-8') as f:
json.dump(self._cache, f, ensure_ascii=False, indent=2)
# 原子性替换
temp_file.replace(self.data_file)
except Exception as e:
print(f"⚠ 保存数据失败: {e}")
def get_all(self) -> Dict[str, Any]:
"""获取所有报告数据"""
return self._cache.copy()
def get(self, file_id: str) -> Optional[Dict[str, Any]]:
"""获取单个报告数据"""
return self._cache.get(file_id)
def set(self, file_id: str, data: Dict[str, Any]) -> None:
"""设置/更新报告数据"""
self._cache[file_id] = data
self._save_data()
def update(self, file_id: str, updates: Dict[str, Any]) -> None:
"""更新报告数据的部分字段"""
if file_id in self._cache:
self._cache[file_id].update(updates)
self._save_data()
else:
raise KeyError(f"报告 {file_id} 不存在")
def delete(self, file_id: str) -> None:
"""删除报告数据"""
if file_id in self._cache:
del self._cache[file_id]
self._save_data()
def exists(self, file_id: str) -> bool:
"""检查报告是否存在"""
return file_id in self._cache
def count(self) -> int:
"""获取报告总数"""
return len(self._cache)
def cleanup_orphaned_files(self, upload_dir: Path) -> int:
"""清理孤立的文件(数据库中有记录但文件不存在)"""
cleaned = 0
orphaned_ids = []
for file_id, report in self._cache.items():
filepath = report.get('filepath')
if filepath and not os.path.exists(filepath):
orphaned_ids.append(file_id)
for file_id in orphaned_ids:
self.delete(file_id)
cleaned += 1
if cleaned > 0:
print(f"✓ 清理了 {cleaned} 条孤立记录")
return cleaned
def export_backup(self, backup_path: Optional[str] = None) -> str:
"""导出备份"""
if backup_path is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = self.storage_dir / f"backup_{timestamp}.json"
else:
backup_path = Path(backup_path)
with open(backup_path, 'w', encoding='utf-8') as f:
json.dump(self._cache, f, ensure_ascii=False, indent=2)
print(f"✓ 数据已备份到: {backup_path}")
return str(backup_path)
def import_backup(self, backup_path: str) -> None:
"""从备份恢复数据"""
backup_path = Path(backup_path)
if not backup_path.exists():
raise FileNotFoundError(f"备份文件不存在: {backup_path}")
with open(backup_path, 'r', encoding='utf-8') as f:
imported_data = json.load(f)
self._cache.update(imported_data)
self._save_data()
print(f"✓ 已从备份恢复 {len(imported_data)} 份报告")

View File

@@ -0,0 +1,480 @@
"""
DeepSeek 健康评估与建议生成服务
用于生成"整体健康状况""功能性健康建议"内容
优化:优先使用模板中已有的项目解释,只有模板中没有的项目才调用 DeepSeek 生成
"""
import os
import json
import requests
from pathlib import Path
from typing import List, Dict, Any
class DeepSeekHealthService:
"""DeepSeek 健康内容生成服务"""
def __init__(self, api_key: str = None):
self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY", "")
self.api_url = "https://api.deepseek.com/v1/chat/completions"
# 加载模板中的解释
self.template_explanations = self._load_template_explanations()
def _load_template_explanations(self) -> Dict[str, Dict[str, str]]:
"""加载模板中已有的项目解释"""
explanations_file = Path(__file__).parent.parent / "template_explanations.json"
if explanations_file.exists():
try:
with open(explanations_file, 'r', encoding='utf-8') as f:
explanations = json.load(f)
print(f" ✓ 已加载 {len(explanations)} 个模板解释")
return explanations
except Exception as e:
print(f" ⚠️ 加载模板解释失败: {e}")
return {}
def get_template_explanation(self, abb: str) -> Dict[str, str]:
"""
获取模板中的项目解释
Args:
abb: 项目缩写
Returns:
{"clinical_en": "...", "clinical_cn": "..."} 或空字典
"""
# 尝试多种匹配方式
abb_upper = abb.upper().strip()
# 直接匹配
if abb_upper in self.template_explanations:
return self.template_explanations[abb_upper]
# 去除特殊字符后匹配
abb_clean = ''.join(c for c in abb_upper if c.isalnum())
for key, value in self.template_explanations.items():
key_clean = ''.join(c for c in key if c.isalnum())
if abb_clean == key_clean:
return value
return {}
def is_available(self) -> bool:
"""检查服务是否可用"""
return bool(self.api_key)
def call_deepseek(self, prompt: str) -> str:
"""调用 DeepSeek API"""
if not self.api_key:
raise ValueError("未配置 DEEPSEEK_API_KEY")
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
data = {
"model": "deepseek-chat",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.1,
"max_tokens": 8000
}
response = requests.post(
self.api_url,
headers=headers,
json=data,
timeout=120
)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
def collect_abnormal_items(self, analysis: Dict[str, Any]) -> List[Dict[str, str]]:
"""
从分析结果中收集异常项
Args:
analysis: LLM 分析结果,包含 abnormal_items 字段
Returns:
异常项列表
"""
abnormal_items = []
# 从 abnormal_items 字段提取
raw_items = analysis.get("abnormal_items", [])
for item in raw_items:
if isinstance(item, dict):
abnormal_items.append({
"name": item.get("name", ""),
"abb": item.get("abb", item.get("name", "")),
"result": str(item.get("result", item.get("value", ""))),
"reference": item.get("reference", ""),
"unit": item.get("unit", ""),
"level": item.get("level", ""),
"point": "" if item.get("level") == "high" else ("" if item.get("level") == "low" else "")
})
elif isinstance(item, str):
# 如果是字符串格式,尝试解析
abnormal_items.append({
"name": item,
"abb": "",
"result": "",
"reference": "",
"unit": "",
"level": "",
"point": ""
})
return abnormal_items
def get_item_explanations(self, abnormal_items: List[Dict[str, str]]) -> Dict[str, Dict[str, str]]:
"""
为异常项获取解释(优先使用模板中的解释,缺失的才调用 DeepSeek 生成)
Args:
abnormal_items: 异常项列表
Returns:
{
"ABB": {"clinical_en": "...", "clinical_cn": "..."},
...
}
"""
explanations = {}
items_need_generation = []
print("\n 📋 检查模板中的项目解释...")
for item in abnormal_items:
abb = item.get("abb", "").upper().strip()
name = item.get("name", "")
if not abb:
continue
# 尝试从模板获取解释
template_exp = self.get_template_explanation(abb)
if template_exp and template_exp.get("clinical_en") and template_exp.get("clinical_cn"):
explanations[abb] = template_exp
print(f"{abb}: 使用模板解释")
else:
items_need_generation.append(item)
print(f"{abb}: 需要生成解释")
# 如果有需要生成的项目,调用 DeepSeek
if items_need_generation and self.api_key:
print(f"\n 🤖 调用 DeepSeek 为 {len(items_need_generation)} 个项目生成解释...")
generated = self._generate_missing_explanations(items_need_generation)
explanations.update(generated)
return explanations
def _generate_missing_explanations(self, items: List[Dict[str, str]]) -> Dict[str, Dict[str, str]]:
"""
调用 DeepSeek 为缺失解释的项目生成临床意义
Args:
items: 需要生成解释的项目列表
Returns:
生成的解释字典
"""
if not items:
return {}
# 构建项目描述
items_desc = []
for item in items:
desc = f"- {item['abb']}: {item['name']}"
if item.get('result'):
desc += f", 结果: {item['result']}"
if item.get('unit'):
desc += f" {item['unit']}"
if item.get('reference'):
desc += f", 参考范围: {item['reference']}"
items_desc.append(desc)
prompt = f"""你是一位医学检验专家,请为以下医疗检测项目生成临床意义解释。
## 需要解释的项目:
{chr(10).join(items_desc)}
## 要求:
1. 为每个项目提供英文和中文的临床意义解释
2. 解释应包含:该指标的作用、正常范围的意义、异常时可能的原因
3. 语言专业但易于理解
4. 每个解释约50-100字
## 输出格式JSON
```json
{{
"ABB1": {{
"clinical_en": "English clinical significance...",
"clinical_cn": "中文临床意义..."
}},
"ABB2": {{
"clinical_en": "...",
"clinical_cn": "..."
}}
}}
```
只返回JSON不要其他说明。"""
try:
response = self.call_deepseek(prompt)
# 解析 JSON
if "```json" in response:
response = response.split("```json")[1].split("```")[0]
elif "```" in response:
response = response.split("```")[1].split("```")[0]
result = json.loads(response.strip())
print(f" ✓ 成功生成 {len(result)} 个项目的解释")
return result
except Exception as e:
print(f" ✗ 生成解释失败: {e}")
return {}
def generate_health_assessment(self, abnormal_items: List[Dict[str, str]]) -> Dict[str, Any]:
"""
生成"整体健康状况"评估内容
Args:
abnormal_items: 异常项列表
Returns:
包含多个小节的健康评估内容
"""
if not self.api_key or not abnormal_items:
return {"sections": []}
# 构建异常项描述
abnormal_desc = []
for item in abnormal_items:
direction = "偏高" if item.get("point") in ["", "H", ""] or item.get("level") == "high" else "偏低"
desc = f"- {item['name']}"
if item.get('abb'):
desc += f" ({item['abb']})"
desc += f": {item['result']}"
if item.get('unit'):
desc += f" {item['unit']}"
desc += f" ({direction}"
if item.get('reference'):
desc += f", 参考范围: {item['reference']}"
desc += ")"
abnormal_desc.append(desc)
prompt = f"""你是一位功能医学专家,请根据以下所有异常检测指标,撰写"整体健康状况评估"的内容。
## 异常指标:
{chr(10).join(abnormal_desc)}
## 要求:
1. 根据异常指标的类型,自动分成合适的小节(如血液学、内分泌、免疫、代谢等,根据实际异常项决定)
2. 每个小节包含英文和中文两个版本
3. 从功能医学和整体健康角度分析
4. 解释可能的原因和健康影响
5. 语言专业但易于理解
6. 每个小节的每个语言版本约150-250字
## 输出格式JSON
```json
{{
"sections": [
{{
"title_en": "(I) Section Title in English",
"title_cn": "(一)中文小节标题",
"content_en": "English analysis content...",
"content_cn": "中文分析内容..."
}}
]
}}
```
只返回JSON不要其他说明。根据实际异常项情况决定分几个小节不要硬套固定模板。"""
try:
response = self.call_deepseek(prompt)
# 解析 JSON
if "```json" in response:
response = response.split("```json")[1].split("```")[0]
elif "```" in response:
response = response.split("```")[1].split("```")[0]
result = json.loads(response.strip())
print(f" ✓ 生成健康评估内容,共 {len(result.get('sections', []))} 个小节")
return result
except Exception as e:
print(f" ✗ 生成健康评估内容失败: {e}")
return {"sections": []}
def generate_health_advice(self, abnormal_items: List[Dict[str, str]]) -> Dict[str, Any]:
"""
生成"功能性健康建议"内容
Args:
abnormal_items: 异常项列表
Returns:
包含5个固定小节的健康建议内容
"""
if not self.api_key or not abnormal_items:
return {"sections": []}
# 异常项描述
abnormal_desc = []
for item in abnormal_items:
direction = "偏高" if item.get("point") in ["", "H", ""] or item.get("level") == "high" else "偏低"
desc = f"- {item['name']}"
if item.get('abb'):
desc += f" ({item['abb']})"
desc += f": {item['result']}"
if item.get('unit'):
desc += f" {item['unit']}"
desc += f" ({direction})"
abnormal_desc.append(desc)
prompt = f"""你是一位功能医学专家,请根据以下异常检测指标,撰写"功能医学健康建议"的内容。
## 异常指标:
{chr(10).join(abnormal_desc)}
## 要求:
1. 必须包含以下5个固定小节按顺序
- Nutrition Intervention 营养干预
- Exercise Intervention 运动干预
- Sleep & Stress Management 睡眠与压力管理
- Lifestyle Adjustment 生活方式调整
- Long-term Follow-up Plan 长期随访计划
2. 每个小节针对这些异常指标提供具体、可执行的建议
3. 从功能医学角度出发,强调预防和整体调理
4. 每个小节包含3-5条具体建议措施
5. 语言专业但易于理解
6. 分别提供英文和中文版本
7. 每个小节的每个语言版本约200-300字
## 输出格式JSON
```json
{{
"sections": [
{{
"title_en": "Nutrition Intervention",
"title_cn": "营养干预",
"content_en": "English nutrition advice...",
"content_cn": "中文营养建议..."
}},
{{
"title_en": "Exercise Intervention",
"title_cn": "运动干预",
"content_en": "...",
"content_cn": "..."
}},
{{
"title_en": "Sleep & Stress Management",
"title_cn": "睡眠与压力管理",
"content_en": "...",
"content_cn": "..."
}},
{{
"title_en": "Lifestyle Adjustment",
"title_cn": "生活方式调整",
"content_en": "...",
"content_cn": "..."
}},
{{
"title_en": "Long-term Follow-up Plan",
"title_cn": "长期随访计划",
"content_en": "...",
"content_cn": "..."
}}
]
}}
```
只返回JSON不要其他说明。"""
try:
response = self.call_deepseek(prompt)
# 解析 JSON
if "```json" in response:
response = response.split("```json")[1].split("```")[0]
elif "```" in response:
response = response.split("```")[1].split("```")[0]
result = json.loads(response.strip())
print(f" ✓ 生成健康建议内容,共 {len(result.get('sections', []))} 个小节")
return result
except Exception as e:
print(f" ✗ 生成健康建议内容失败: {e}")
return {"sections": []}
def generate_health_content(self, analysis: Dict[str, Any]) -> Dict[str, Any]:
"""
生成完整的健康评估和建议内容
优化:优先使用模板中已有的项目解释,只有模板中没有的项目才调用 DeepSeek 生成
Args:
analysis: LLM 分析结果
Returns:
包含 health_assessment, health_advice, item_explanations 的字典
"""
if not self.is_available():
print(" ⚠️ DeepSeek API Key 未配置,跳过健康内容生成")
return {}
print("\n============================================================")
print("DeepSeek 健康内容生成")
print("============================================================")
# 收集异常项
print("\n 📝 正在收集异常项...")
abnormal_items = self.collect_abnormal_items(analysis)
if not abnormal_items:
print(" 没有检测到异常项目,跳过内容生成")
return {}
print(f" 发现 {len(abnormal_items)} 个异常项目:")
for item in abnormal_items[:10]:
direction = "" if item.get("level") == "high" or item.get("point") == "" else ""
print(f" - {item['name']}: {item['result']} {direction}")
if len(abnormal_items) > 10:
print(f" ... 等共 {len(abnormal_items)}")
# 获取项目解释(优先使用模板,缺失的才生成)
item_explanations = self.get_item_explanations(abnormal_items)
# 统计使用情况
template_count = sum(1 for abb in item_explanations if self.get_template_explanation(abb))
generated_count = len(item_explanations) - template_count
print(f"\n 📊 解释来源统计: 模板 {template_count} 个, DeepSeek生成 {generated_count}")
# 生成健康评估
print("\n 🤖 正在调用 DeepSeek 生成整体健康状况...")
health_assessment = self.generate_health_assessment(abnormal_items)
# 生成健康建议
print("\n 🤖 正在调用 DeepSeek 生成功能性健康建议...")
health_advice = self.generate_health_advice(abnormal_items)
print("\n ✓ 健康内容生成完成")
return {
"health_assessment": health_assessment,
"health_advice": health_advice,
"abnormal_items": abnormal_items,
"item_explanations": item_explanations # 包含每个项目的解释
}

View File

@@ -0,0 +1,506 @@
import os
import json
from typing import Dict, Any, List
class LLMService:
"""大语言模型服务支持本地Ollama或OpenAI API"""
def __init__(self):
self.llm_type = self._detect_llm_type()
self._initialize_llm()
def _detect_llm_type(self) -> str:
"""检测可用的LLM类型"""
# 优先使用 DeepSeek如果已配置
if os.getenv("DEEPSEEK_API_KEY") and os.getenv("USE_DEEPSEEK_LLM", "true").lower() == "true":
return "deepseek"
# 检查 OpenAI
elif os.getenv("OPENAI_API_KEY"):
return "openai"
# Coze如果已配置
elif os.getenv("COZE_API_KEY") and os.getenv("COZE_WORKFLOW_ID"):
return "coze"
elif os.getenv("OLLAMA_HOST") or self._check_ollama_available():
return "ollama"
else:
return "mock"
def _check_ollama_available(self) -> bool:
"""检查Ollama是否可用"""
try:
import requests
response = requests.get("http://localhost:11434/api/tags", timeout=2)
return response.status_code == 200
except:
return False
def _initialize_llm(self):
"""初始化LLM客户端"""
if self.llm_type == "deepseek":
try:
self.deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
self.deepseek_api_url = os.getenv("DEEPSEEK_API_BASE", "https://api.deepseek.com") + "/v1/chat/completions"
self.model = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
print(f"✓ 使用 DeepSeek API (模型: {self.model})")
except Exception as e:
print(f"⚠ DeepSeek 初始化失败: {e}")
self.llm_type = "mock"
elif self.llm_type == "openai":
try:
from openai import OpenAI
# 支持自定义API端点
api_key = os.getenv("OPENAI_API_KEY")
api_base = os.getenv("OPENAI_API_BASE")
if api_base:
self.client = OpenAI(api_key=api_key, base_url=api_base)
else:
self.client = OpenAI(api_key=api_key)
self.model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
print(f"✓ 使用 OpenAI API (模型: {self.model})")
except Exception as e:
print(f"⚠ OpenAI 初始化失败: {e}")
self.llm_type = "mock"
elif self.llm_type == "ollama":
try:
import requests
self.ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434")
# 默认使用已安装的 qwen2.5:7b 模型,如需更换可通过 OLLAMA_MODEL 环境变量覆盖
self.model = os.getenv("OLLAMA_MODEL", "qwen2.5:7b")
print(f"✓ 使用 Ollama (模型: {self.model})")
except Exception as e:
print(f"⚠ Ollama 初始化失败: {e}")
self.llm_type = "mock"
elif self.llm_type == "coze":
try:
# Coze 工作流调用所需配置,通过环境变量提供
self.coze_api_url = os.getenv("COZE_API_URL", "https://api.coze.cn/v1/workflow/run")
self.coze_api_key = os.getenv("COZE_API_KEY")
self.coze_workflow_id = os.getenv("COZE_WORKFLOW_ID")
if not self.coze_api_key or not self.coze_workflow_id:
raise ValueError("COZE_API_KEY 或 COZE_WORKFLOW_ID 未配置")
print("✓ 使用 Coze 工作流作为LLM")
except Exception as e:
print(f"⚠ Coze 初始化失败: {e}")
self.llm_type = "mock"
if self.llm_type == "mock":
print("✓ 使用模拟LLM模式用于演示")
def analyze_single_report(self, report_text: str) -> Dict[str, Any]:
"""分析单个报告"""
prompt = f"""请分析以下医疗报告,提取关键信息:
{report_text}
请提供:
1. 摘要
2. 关键发现
3. 异常指标
4. 风险评估
5. 建议
以JSON格式返回结果。
"""
if self.llm_type == "deepseek":
return self._call_deepseek(prompt)
elif self.llm_type == "openai":
return self._call_openai(prompt)
elif self.llm_type == "ollama":
return self._call_ollama(prompt)
elif self.llm_type == "coze":
# 对于 Coze直接将原始报告文本传给工作流由工作流内部负责解析与生成结构化结果
print(f" → 准备调用 Coze 工作流...")
coze_input = [{
"filename": "single_report",
"text": report_text,
}]
result = self._call_coze(coze_input) # 单个报告也作为数组传入
print(f" ← Coze 调用返回")
return result
else:
return self._mock_analysis(report_text)
def analyze_multiple_reports(self, report_texts: List[str]) -> Dict[str, Any]:
"""
分析多个报告Coze专用
report_texts: 报告文本的数组每个元素是一个PDF的文本
"""
if self.llm_type == "coze":
print(f" → 准备调用 Coze 工作流(传入 {len(report_texts)} 个报告)...")
result = self._call_coze(report_texts)
print(f" ← Coze 调用返回")
return result
else:
# 其他LLM类型合并文本后调用
combined = "\n\n".join(report_texts)
return self.analyze_single_report(combined)
def _call_openai(self, prompt: str) -> Dict[str, Any]:
"""调用OpenAI API"""
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": "你是一位专业的医疗报告分析助手。"},
{"role": "user", "content": prompt}
],
temperature=0.7,
max_tokens=2000
)
content = response.choices[0].message.content
return self._parse_llm_response(content)
except Exception as e:
return {
"error": f"OpenAI API 调用失败: {str(e)}",
"summary": "分析失败",
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "无法评估",
"recommendations": []
}
def _call_deepseek(self, prompt: str) -> Dict[str, Any]:
"""调用 DeepSeek API"""
try:
import requests
headers = {
"Authorization": f"Bearer {self.deepseek_api_key}",
"Content-Type": "application/json"
}
data = {
"model": self.model,
"messages": [
{"role": "system", "content": "你是一位专业的医疗报告分析助手。请以JSON格式返回分析结果。"},
{"role": "user", "content": prompt}
],
"temperature": 0.3,
"max_tokens": 4000
}
response = requests.post(
self.deepseek_api_url,
headers=headers,
json=data,
timeout=120
)
if response.status_code == 200:
content = response.json()["choices"][0]["message"]["content"]
return self._parse_llm_response(content)
else:
raise Exception(f"DeepSeek 返回错误: {response.status_code} - {response.text}")
except Exception as e:
return {
"error": f"DeepSeek API 调用失败: {str(e)}",
"summary": "分析失败",
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "无法评估",
"recommendations": []
}
def _call_coze(self, report_texts: List[str]) -> Dict[str, Any]:
"""
调用 Coze 工作流 API流式模式对医疗报告进行分析
report_texts: 报告文本数组每个元素是一个PDF的文本
"""
try:
import time
from cozepy import Coze, TokenAuth, COZE_CN_BASE_URL, WorkflowEventType
api_key = getattr(self, "coze_api_key", os.getenv("COZE_API_KEY"))
workflow_id = getattr(self, "coze_workflow_id", os.getenv("COZE_WORKFLOW_ID"))
max_retries = int(os.getenv("COZE_MAX_RETRIES", "3"))
if not api_key or not workflow_id:
raise ValueError("未配置 Coze API 所需的 COZE_API_KEY 或 COZE_WORKFLOW_ID")
print(f" → 调用 Coze 工作流(流式模式)...")
print(f" → Workflow ID: {workflow_id}")
print(f" → 数组元素个数: {len(report_texts)}")
total_chars = 0
for item in report_texts:
if isinstance(item, str):
total_chars += len(item)
elif isinstance(item, dict):
text_value = item.get("text")
if isinstance(text_value, str):
total_chars += len(text_value)
print(f" → 总文本长度: {total_chars} 字符")
print(f" → 请求发送时间: {time.strftime('%H:%M:%S')}")
# 初始化 Coze 客户端
coze = Coze(auth=TokenAuth(token=api_key), base_url=COZE_CN_BASE_URL)
# 添加请求开始时间
import time as time_module
start = time_module.time()
last_error = None
for attempt in range(max_retries):
try:
if attempt > 0:
print(f" → 重试 {attempt}/{max_retries - 1}...")
# 调用流式接口
stream = coze.workflows.runs.stream(
workflow_id=workflow_id,
parameters={"input": report_texts}
)
print(f" ✓ 已连接到流式接口,等待执行...")
# 处理事件流
content_result = None
event_count = 0
for event in stream:
event_count += 1
print(f" [事件 {event_count}] 类型: {event.event}")
if event.event == WorkflowEventType.MESSAGE:
# 打印进度信息
if hasattr(event, 'message') and event.message:
msg = event.message
node_type = getattr(msg, 'node_type', None)
node_title = getattr(msg, 'node_title', None)
node_is_finish = getattr(msg, 'node_is_finish', None)
content = getattr(msg, 'content', None)
print(f" 节点标题: {node_title}")
print(f" 节点类型: {node_type}")
print(f" 是否完成: {node_is_finish}")
print(f" 内容长度: {len(content) if content else 0}")
if node_title:
print(f" ⏳ 执行节点: {node_title} (类型: {node_type})")
# 检查是否为结束节点(使用 node_title 判断)
if node_title == "End" and node_is_finish and content:
print(f" ✓ 工作流执行完成,获取到结果")
content_result = content
break
elif event.event == WorkflowEventType.ERROR:
error_msg = str(event.error) if hasattr(event, 'error') else "Unknown error"
print(f" ✗ 错误事件: {error_msg}")
raise Exception(f"工作流执行错误: {error_msg}")
elif event.event == WorkflowEventType.INTERRUPT:
print(f" ⚠️ 工作流需要交互,暂不支持")
raise Exception("工作流需要人工交互,当前不支持")
if not content_result:
raise Exception("未获取到工作流执行结果")
elapsed = time_module.time() - start
print(f" → 收到完整结果 (耗时: {elapsed:.1f}秒)")
print(f" → 结果数据: {content_result[:200]}...")
# 解析 content 字段(通常包含 JSON 格式的输出)
# content 格式示例: {"output":"```json\n{...}\n```"}
if isinstance(content_result, str):
# 尝试解析为 JSON
try:
content_json = json.loads(content_result)
output = content_json.get("output", content_result)
except json.JSONDecodeError:
output = content_result
# 如果 output 包含 markdown 格式的 JSON提取出来
if isinstance(output, str) and "```json" in output:
import re
json_match = re.search(r'```json\s*\n(.*?)\n```', output, re.DOTALL)
if json_match:
output = json_match.group(1)
# 尝试解析最终的 JSON
data = {"code": 0, "data": {"output": output}}
else:
data = {"code": 0, "data": {"output": content_result}}
# 参考间隔定时脚本的返回结构:{ code: 0, data: { output: ... } }
if isinstance(data, dict) and data.get("code") == 0:
raw_data = data.get("data", {})
if isinstance(raw_data, str):
try:
raw_data = json.loads(raw_data)
except json.JSONDecodeError:
# data.data 为字符串,直接按 LLM 文本解析
return self._parse_llm_response(raw_data)
# 期望 workflow 在 data.output 中返回结果
output = raw_data.get("output", raw_data)
# 如果 output 还是字符串,再次解析
if isinstance(output, str):
try:
output = json.loads(output)
print(f" ✓ Coze 返回的 output 需要二次解析")
except json.JSONDecodeError:
print(f" ⚠️ output 为字符串但无法解析为JSON尝试文本解析")
return self._parse_llm_response(output)
if isinstance(output, dict):
# 如果已经是结构化结果,直接补齐字段
print(f" ✓ Coze 返回结构化数据")
print(f" → 包含字段: {list(output.keys())}")
result = output
required_fields = [
"summary",
"key_findings",
"abnormal_items",
"risk_assessment",
"recommendations",
]
for field in required_fields:
if field not in result:
result[field] = [] if field in [
"key_findings",
"abnormal_items",
"recommendations",
] else "未提供"
print(f" ✓✓ Coze 工作流调用成功!")
return result
if isinstance(output, str):
# output 为文本,通过原有 JSON 解析逻辑处理
return self._parse_llm_response(output)
# 其它类型(列表等),转为字符串后再解析
return self._parse_llm_response(json.dumps(output, ensure_ascii=False))
# code 非 0视为错误
last_error = f"Coze API 返回非0 code: {data}"
print(f" ✗ Coze 返回错误: {last_error}")
except Exception as e: # 包含超时在内的所有请求异常
last_error = str(e)
print(f" ✗ Coze API 调用失败: {last_error}")
if attempt < max_retries - 1:
# 简单的递增退避等待
wait_time = (attempt + 1) * 3
print(f" → 等待 {wait_time} 秒后重试...")
time.sleep(wait_time)
else:
print(f" ✗✗ 已达最大重试次数,放弃调用")
break
print(f" ✗✗ Coze 工作流调用最终失败: {last_error}")
raise Exception(last_error or "Coze API 调用失败")
except Exception as e:
return {
"error": f"Coze API 调用失败: {str(e)}",
"summary": "分析失败",
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "无法评估",
"recommendations": []
}
def _call_ollama(self, prompt: str) -> Dict[str, Any]:
"""调用Ollama API"""
try:
import requests
response = requests.post(
f"{self.ollama_host}/api/generate",
json={
"model": self.model,
"prompt": prompt,
"stream": False
},
timeout=300
)
if response.status_code == 200:
content = response.json().get("response", "")
return self._parse_llm_response(content)
else:
raise Exception(f"Ollama 返回错误: {response.status_code}")
except Exception as e:
return {
"error": f"Ollama API 调用失败: {str(e)}",
"summary": "分析失败",
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "无法评估",
"recommendations": []
}
def _parse_llm_response(self, response: str) -> Dict[str, Any]:
"""解析LLM响应"""
try:
# 尝试提取JSON内容
import re
# 查找JSON代码块
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response, re.DOTALL)
if json_match:
json_str = json_match.group(1)
else:
# 查找裸JSON
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
else:
json_str = response
result = json.loads(json_str)
# 验证必需字段
required_fields = ["summary", "key_findings", "abnormal_items", "risk_assessment", "recommendations"]
for field in required_fields:
if field not in result:
result[field] = [] if field in ["key_findings", "abnormal_items", "recommendations"] else "未提供"
return result
except:
# 解析失败,返回原始文本
return {
"summary": "无法解析LLM响应",
"raw_response": response,
"key_findings": [],
"abnormal_items": [],
"risk_assessment": "解析失败",
"recommendations": []
}
def _mock_analysis(self, report_text: str) -> Dict[str, Any]:
"""模拟分析结果"""
return {
"summary": "这是一份血常规检查报告。根据报告内容,各项指标均在正常参考范围内,未发现明显异常。",
"key_findings": [
"白细胞计数: 6.5×10^9/L正常范围",
"红细胞计数: 4.8×10^12/L正常范围",
"血红蛋白: 145 g/L正常范围",
"血小板计数: 220×10^9/L正常范围"
],
"abnormal_items": [],
"risk_assessment": "低风险。所有检测指标均在正常范围内,未发现需要关注的异常项。建议定期体检,保持健康生活方式。",
"recommendations": [
"继续保持良好的生活习惯",
"定期进行健康体检(建议每年一次)",
"保持均衡饮食和适量运动",
"如有不适症状,及时就医"
],
"note": "这是一个模拟的分析结果。实际使用时请配置 OpenAI API 或本地 Ollama 模型。"
}

View File

@@ -0,0 +1,222 @@
import os
import sys
from pathlib import Path
from typing import Union
import tempfile
import shutil
class OCRService:
"""OCR识别服务 - 支持 MinerU、百度云OCR API、PaddleOCR生产模式不支持演示"""
def __init__(self):
self.ocr_type = self._detect_ocr_type()
self._initialize_ocr()
def _detect_ocr_type(self) -> str:
"""检测可用的OCR类型"""
# 最优先使用百度云OCR API速度快、精度高、免费额度足够日常使用
if os.getenv("BAIDU_OCR_APP_ID") and os.getenv("BAIDU_OCR_API_KEY") and os.getenv("BAIDU_OCR_SECRET_KEY"):
return "baidu_cloud"
# 其次使用 MinerU最强大的文档解析工具但速度慢
elif self._check_mineru():
return "mineru"
# 再次使用PaddleOCR
elif self._check_paddleocr():
return "paddleocr"
# 没有可用的OCR
else:
raise RuntimeError(
"❌ 没有可用的OCR引擎请至少配置以下一种\n"
"1. MinerU - 将 MinerU-master 文件夹放在桌面\n"
"2. 百度OCR - 配置环境变量 BAIDU_OCR_*\n"
"3. PaddleOCR - 运行 pip install paddleocr paddlepaddle"
)
def _initialize_ocr(self):
"""初始化OCR引擎"""
if self.ocr_type == "mineru":
try:
# 添加 MinerU 路径
mineru_path = Path(r"c:\Users\UI\Desktop\MinerU-master")
if mineru_path.exists() and str(mineru_path) not in sys.path:
sys.path.insert(0, str(mineru_path))
from demo.demo import parse_doc
self.mineru_parse = parse_doc
try:
from torch.serialization import add_safe_globals
from doclayout_yolo.nn.tasks import YOLOv10DetectionModel
add_safe_globals([YOLOv10DetectionModel])
except Exception:
pass
print("✓ 使用 MinerU 引擎(高精度文档解析)")
except Exception as e:
raise RuntimeError(f"❌ MinerU 初始化失败: {e}\n请安装完整依赖或使用其他OCR引擎")
elif self.ocr_type == "baidu_cloud":
try:
from aip import AipOcr
app_id = os.getenv("BAIDU_OCR_APP_ID")
api_key = os.getenv("BAIDU_OCR_API_KEY")
secret_key = os.getenv("BAIDU_OCR_SECRET_KEY")
self.baidu_client = AipOcr(app_id, api_key, secret_key)
print("✓ 使用百度云OCR API高精度")
except Exception as e:
raise RuntimeError(f"❌ 百度云OCR初始化失败: {e}\n请检查环境变量配置")
elif self.ocr_type == "paddleocr":
try:
from paddleocr import PaddleOCR
self.paddle_ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
print("✓ 使用 PaddleOCR 引擎(本地离线)")
except Exception as e:
raise RuntimeError(f"❌ PaddleOCR 初始化失败: {e}\n请运行: pip install paddleocr paddlepaddle")
def _check_mineru(self) -> bool:
"""检查MinerU是否可用"""
try:
mineru_path = Path(r"c:\Users\UI\Desktop\MinerU-master")
return mineru_path.exists() and (mineru_path / "demo" / "demo.py").exists()
except:
return False
def _check_paddleocr(self) -> bool:
"""检查PaddleOCR是否可用"""
try:
import paddleocr
return True
except ImportError:
return False
def extract_text(self, file_path: Union[str, Path]) -> str:
"""从图片或PDF中提取文本"""
file_path = str(file_path)
file_ext = Path(file_path).suffix.lower()
if file_ext == '.pdf':
return self._extract_from_pdf(file_path)
else:
return self._extract_from_image(file_path)
def _extract_from_image(self, image_path: str) -> str:
"""从图片中提取文本"""
if self.ocr_type == "mineru":
return self._extract_with_mineru(image_path)
elif self.ocr_type == "baidu_cloud":
return self._extract_with_baidu_cloud(image_path)
elif self.ocr_type == "paddleocr":
return self._extract_with_paddleocr(image_path)
else:
raise RuntimeError("OCR引擎未正确初始化")
def _extract_with_mineru(self, file_path: str) -> str:
"""使用 MinerU 提取文本支持PDF和图片"""
try:
# 创建临时输出目录
temp_dir = tempfile.mkdtemp(prefix="mineru_")
try:
# 调用 MinerU 解析
file_path_obj = Path(file_path)
self.mineru_parse(
path_list=[file_path_obj],
output_dir=temp_dir,
lang="ch", # 中文
backend="pipeline", # 使用 pipeline 模式
method="auto" # 自动检测
)
# 读取生成的 markdown 文件
md_files = list(Path(temp_dir).rglob("*.md"))
if md_files:
# 优先排除 layout / span / origin 等辅助文件
content_files = [
f for f in md_files
if not any(x in f.stem for x in ['layout', 'span', 'origin'])
]
target_files = content_files or md_files
with open(target_files[0], 'r', encoding='utf-8') as f:
content = f.read()
return content if content.strip() else "未识别到文本内容"
return "未识别到文本内容"
finally:
# 清理临时目录
try:
shutil.rmtree(temp_dir)
except:
pass
except Exception as e:
return f"MinerU识别出错: {str(e)}"
def _extract_with_baidu_cloud(self, image_path: str) -> str:
"""使用百度云OCR API提取文本"""
try:
# 读取图片
with open(image_path, 'rb') as f:
image_data = f.read()
# 调用通用文字识别(高精度版)
result = self.baidu_client.accurateBasic(image_data)
if 'error_code' in result:
return f"百度OCR错误 ({result['error_code']}): {result.get('error_msg', '未知错误')}"
# 提取文本
if 'words_result' in result:
text_lines = [item['words'] for item in result['words_result']]
return "\n".join(text_lines) if text_lines else "未识别到文本内容"
return "未识别到文本内容"
except Exception as e:
return f"百度云OCR识别出错: {str(e)}"
def _extract_with_paddleocr(self, image_path: str) -> str:
"""使用PaddleOCR提取文本"""
try:
result = self.paddle_ocr.ocr(image_path, cls=True)
if not result or not result[0]:
return "未识别到文本内容"
# 提取所有文本行
text_lines = []
for line in result[0]:
if line and len(line) >= 2:
text_lines.append(line[1][0])
return "\n".join(text_lines) if text_lines else "未识别到文本内容"
except Exception as e:
return f"PaddleOCR识别出错: {str(e)}"
def _extract_from_pdf(self, pdf_path: str) -> str:
"""从PDF中提取文本"""
# 优先使用 MinerU 处理 PDF效果最好
if self.ocr_type == "mineru":
return self._extract_with_mineru(pdf_path)
# 备选方案:使用 pdfplumber
try:
import pdfplumber
text_content = []
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
text = page.extract_text()
if text:
text_content.append(text)
return "\n\n".join(text_content) if text_content else "未提取到文本内容"
except ImportError:
# PDF库不可用尝试使用OCR处理PDF的图像
return "PDF处理需要安装 pdfplumber 库\n可以运行: pip install pdfplumber"
except Exception as e:
return f"PDF处理出错: {str(e)}"

View File

@@ -0,0 +1,267 @@
import os
from pathlib import Path
from datetime import datetime
from typing import Dict, Any
from jinja2 import Environment, FileSystemLoader
class PDFService:
"""PDF报告生成服务"""
def __init__(self):
# 模板目录
self.template_dir = Path(__file__).parent.parent / "templates"
self.template_dir.mkdir(exist_ok=True)
# 输出目录
self.output_dir = Path(__file__).parent.parent / "generated_reports"
self.output_dir.mkdir(exist_ok=True)
# 初始化Jinja2环境
self.jinja_env = Environment(loader=FileSystemLoader(str(self.template_dir)))
def generate_report(
self,
filename: str,
analysis: Dict[str, Any],
llm_type: str = "Coze Workflow"
) -> str:
"""
生成PDF报告
Args:
filename: 原始文件名
analysis: 分析结果
llm_type: 使用的LLM类型
Returns:
生成的PDF文件路径
"""
try:
# 准备模板数据
template_data = self._prepare_template_data(filename, analysis, llm_type)
# 渲染HTML
html_content = self._render_html(template_data)
# 生成PDF
pdf_path = self._generate_pdf(html_content, filename)
return pdf_path
except Exception as e:
raise Exception(f"PDF生成失败: {str(e)}")
def _prepare_template_data(
self,
filename: str,
analysis: Dict[str, Any],
llm_type: str
) -> Dict[str, Any]:
"""准备模板数据"""
# 处理 key_findings
key_findings = analysis.get("key_findings", [])
if key_findings:
# 如果是对象数组,提取文本
key_findings = [
item.get("finding", item.get("text", str(item)))
if isinstance(item, dict) else str(item)
for item in key_findings
]
# 处理 abnormal_items
abnormal_items = analysis.get("abnormal_items", [])
if abnormal_items:
processed_items = []
for item in abnormal_items:
if isinstance(item, dict):
processed_items.append(item)
else:
processed_items.append({"name": str(item)})
abnormal_items = processed_items
# 处理 risk_assessment
risk_assessment = analysis.get("risk_assessment", "未提供")
if isinstance(risk_assessment, dict):
# 如果是对象,转换为文本
parts = []
if risk_assessment.get("high_risk"):
parts.append(f"【高风险】{'; '.join(risk_assessment['high_risk'])}")
if risk_assessment.get("medium_risk"):
parts.append(f"【中风险】{'; '.join(risk_assessment['medium_risk'])}")
if risk_assessment.get("low_risk"):
parts.append(f"【低风险】{'; '.join(risk_assessment['low_risk'])}")
risk_assessment = "\n".join(parts) if parts else "未检测到明确风险"
# 处理 recommendations
recommendations = analysis.get("recommendations", [])
if recommendations:
recommendations = [
item.get("recommendation", item.get("text", str(item)))
if isinstance(item, dict) else str(item)
for item in recommendations
]
return {
"filename": filename,
"analysis_date": datetime.now().strftime("%Y年%m月%d"),
"llm_type": llm_type,
"summary": analysis.get("summary", "暂无摘要"),
"key_findings": key_findings,
"abnormal_items": abnormal_items,
"risk_assessment": risk_assessment,
"recommendations": recommendations,
"generation_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
def _render_html(self, template_data: Dict[str, Any]) -> str:
"""渲染HTML模板"""
template = self.jinja_env.get_template("report_template.html")
return template.render(**template_data)
def _generate_pdf(self, html_content: str, original_filename: str) -> str:
"""将HTML转换为PDF"""
try:
# 生成PDF文件名
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
base_name = Path(original_filename).stem
pdf_filename = f"{base_name}_分析报告_{timestamp}.pdf"
pdf_path = self.output_dir / pdf_filename
# 尝试使用 WeasyPrint推荐质量更好
try:
from weasyprint import HTML, CSS
HTML(string=html_content).write_pdf(
str(pdf_path),
stylesheets=[CSS(string='@page { size: A4; margin: 1cm; }')]
)
except ImportError:
# 降级到 xhtml2pdf更简单无需额外依赖
print(" WeasyPrint 未安装,使用 xhtml2pdf 生成PDF...")
from xhtml2pdf import pisa
with open(pdf_path, "wb") as pdf_file:
pisa_status = pisa.CreatePDF(html_content, dest=pdf_file)
if pisa_status.err:
raise Exception("xhtml2pdf 生成失败")
return str(pdf_path)
except Exception as e:
raise Exception(f"PDF转换失败: {str(e)}")
def get_pdf_file(self, pdf_path: str) -> bytes:
"""读取PDF文件内容"""
if not os.path.exists(pdf_path):
raise FileNotFoundError("PDF文件不存在")
with open(pdf_path, "rb") as f:
return f.read()
def generate_comprehensive_report(
self,
patient_name: str,
template_data: Dict[str, Any]
) -> str:
"""
生成综合健康报告(多份报告整合)
Args:
patient_name: 患者姓名
template_data: 包含所有报告数据和分析结果的字典
Returns:
生成的PDF文件路径
"""
try:
# 准备综合报告模板数据
comprehensive_data = self._prepare_comprehensive_data(patient_name, template_data)
# 渲染HTML使用综合报告模板
html_content = self._render_comprehensive_html(comprehensive_data)
# 生成PDF
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
pdf_filename = f"{patient_name}_综合健康报告_{timestamp}.pdf"
pdf_path = self.output_dir / pdf_filename
try:
from weasyprint import HTML, CSS
HTML(string=html_content).write_pdf(
str(pdf_path),
stylesheets=[CSS(string='@page { size: A4; margin: 1cm; }')]
)
except ImportError:
# 如果 WeasyPrint 不可用,使用 xhtml2pdf
from xhtml2pdf import pisa
with open(pdf_path, "wb") as pdf_file:
pisa_status = pisa.CreatePDF(html_content, dest=pdf_file)
if pisa_status.err:
raise Exception("xhtml2pdf 生成失败")
return str(pdf_path)
except Exception as e:
raise Exception(f"综合报告生成失败: {str(e)}")
def _prepare_comprehensive_data(
self,
patient_name: str,
template_data: Dict[str, Any]
) -> Dict[str, Any]:
"""准备综合报告模板数据"""
analysis = template_data.get("analysis", {})
# 处理分析结果(与单报告相同的逻辑)
key_findings = analysis.get("key_findings", [])
if key_findings:
key_findings = [
item.get("finding", item.get("text", str(item)))
if isinstance(item, dict) else str(item)
for item in key_findings
]
abnormal_items = analysis.get("abnormal_items", [])
if abnormal_items:
processed_items = []
for item in abnormal_items:
if isinstance(item, dict):
processed_items.append(item)
else:
processed_items.append({"name": str(item)})
abnormal_items = processed_items
risk_assessment = analysis.get("risk_assessment", "未提供")
if isinstance(risk_assessment, dict):
parts = []
if risk_assessment.get("high_risk"):
parts.append(f"【高风险】{'; '.join(risk_assessment['high_risk'])}")
if risk_assessment.get("medium_risk"):
parts.append(f"【中风险】{'; '.join(risk_assessment['medium_risk'])}")
if risk_assessment.get("low_risk"):
parts.append(f"【低风险】{'; '.join(risk_assessment['low_risk'])}")
risk_assessment = "\n".join(parts) if parts else "未检测到明确风险"
recommendations = analysis.get("recommendations", [])
if recommendations:
recommendations = [
item.get("recommendation", item.get("text", str(item)))
if isinstance(item, dict) else str(item)
for item in recommendations
]
return {
"patient_name": patient_name,
"report_count": template_data.get("report_count", 0),
"report_list": template_data.get("report_list", []),
"generation_date": template_data.get("generation_date", datetime.now().strftime("%Y年%m月%d")),
"summary": analysis.get("summary", "暂无摘要"),
"key_findings": key_findings,
"abnormal_items": abnormal_items,
"risk_assessment": risk_assessment,
"recommendations": recommendations,
"generation_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
def _render_comprehensive_html(self, template_data: Dict[str, Any]) -> str:
"""渲染综合报告HTML模板"""
template = self.jinja_env.get_template("comprehensive_report_template.html")
return template.render(**template_data)

View File

@@ -0,0 +1,106 @@
from typing import List, Dict, Any
import json
class ReportIntegrator:
"""医疗报告整合分析器"""
def __init__(self, llm_service):
self.llm_service = llm_service
def integrate_reports(self, reports: List[Dict[str, Any]]) -> Dict[str, Any]:
"""整合多份医疗报告"""
if len(reports) == 1:
return self._single_report_summary(reports[0])
# 构建整合分析的提示词
prompt = self._build_integration_prompt(reports)
# 调用LLM进行整合分析
if self.llm_service.llm_type == "openai":
result = self._call_openai_integration(prompt)
elif self.llm_service.llm_type == "ollama":
result = self._call_ollama_integration(prompt)
else:
result = self._mock_integration(reports)
# 添加报告列表
result["reports_included"] = [
{"filename": report["filename"], "summary": report["analysis"].get("summary", "无摘要")}
for report in reports
]
return result
def _build_integration_prompt(self, reports: List[Dict[str, Any]]) -> str:
"""构建整合分析提示词"""
report_details = []
for i, report in enumerate(reports, 1):
analysis = report["analysis"]
report_details.append(f"【报告{i}: {report['filename']}\n摘要: {analysis.get('summary', '')}")
prompt = f"""你是专业医疗分析专家。整合以下{len(reports)}份报告,提供综合评估。
{chr(10).join(report_details)}
请以JSON格式返回
{{"overall_summary": "整体摘要", "health_trends": ["趋势"], "priority_concerns": [{{"concern": "关注点", "severity": "低/中/高", "description": "描述"}}], "comprehensive_assessment": "综合评估", "integrated_recommendations": ["建议"], "follow_up_suggestions": ["后续建议"]}}"""
return prompt
def _call_openai_integration(self, prompt: str) -> Dict[str, Any]:
"""调用OpenAI进行整合分析"""
try:
response = self.llm_service.client.chat.completions.create(
model=self.llm_service.model,
messages=[{"role": "system", "content": "你是医疗分析专家。"}, {"role": "user", "content": prompt}],
temperature=0.7, max_tokens=3000
)
content = response.choices[0].message.content
return self.llm_service._parse_llm_response(content)
except Exception as e:
return self._create_error_result(f"OpenAI分析失败: {str(e)}")
def _call_ollama_integration(self, prompt: str) -> Dict[str, Any]:
"""调用Ollama进行整合分析"""
try:
import requests
response = requests.post(f"{self.llm_service.ollama_host}/api/generate",
json={"model": self.llm_service.model, "prompt": prompt, "stream": False}, timeout=90)
if response.status_code == 200:
return self.llm_service._parse_llm_response(response.json().get("response", ""))
raise Exception(f"Ollama错误: {response.status_code}")
except Exception as e:
return self._create_error_result(f"Ollama分析失败: {str(e)}")
def _mock_integration(self, reports: List[Dict[str, Any]]) -> Dict[str, Any]:
"""模拟整合分析结果"""
total_abnormal = sum(len(report["analysis"].get("abnormal_items", [])) for report in reports)
return {
"overall_summary": f"综合分析了{len(reports)}份报告,发现{total_abnormal}项异常指标。整体健康状况良好。",
"health_trends": ["各项指标整体稳定", "未发现明显恶化趋势", "建议持续监测"],
"priority_concerns": [{"concern": "定期体检", "severity": "", "description": "建议保持定期体检"}] if total_abnormal == 0 else [{"concern": "异常指标", "severity": "", "description": f"发现{total_abnormal}项异常"}],
"comprehensive_assessment": "整体健康状况可控,建议关注生活方式、定期复查。",
"integrated_recommendations": ["保持均衡饮食", "坚持适量运动", "保证充足睡眠", "定期体检"],
"follow_up_suggestions": ["3-6个月后复查关键指标", "如有不适及时就医", "保持健康记录"],
"note": "这是模拟结果。实际使用请配置OpenAI或Ollama。"
}
def _single_report_summary(self, report: Dict[str, Any]) -> Dict[str, Any]:
"""单个报告摘要"""
analysis = report["analysis"]
return {
"overall_summary": f"单份报告分析:{analysis.get('summary', '无摘要')}",
"reports_included": [{"filename": report["filename"], "summary": analysis.get("summary", "")}],
"health_trends": analysis.get("key_findings", []),
"priority_concerns": [{"concern": item, "severity": "", "description": "需关注"} for item in analysis.get("abnormal_items", [])[:3]],
"comprehensive_assessment": analysis.get("risk_assessment", "请查看详细分析"),
"integrated_recommendations": analysis.get("recommendations", []),
"follow_up_suggestions": ["定期复查", "咨询医生"]
}
def _create_error_result(self, error_msg: str) -> Dict[str, Any]:
"""创建错误结果"""
return {
"error": error_msg, "overall_summary": "分析失败", "health_trends": [],
"priority_concerns": [], "comprehensive_assessment": "无法完成分析",
"integrated_recommendations": [], "follow_up_suggestions": []
}

View File

@@ -0,0 +1,67 @@
import os
import shutil
from pathlib import Path
from typing import Optional
class TemplateService:
"""PDF模板管理服务"""
def __init__(self, template_dir: str = "templates/pdf"):
self.template_dir = Path(template_dir)
self.template_dir.mkdir(parents=True, exist_ok=True)
# 默认模板名称
self.default_template = "be_u_template.pdf"
def get_template_path(self, template_name: Optional[str] = None) -> Path:
"""获取模板文件路径"""
if template_name is None:
template_name = self.default_template
template_path = self.template_dir / template_name
if not template_path.exists():
raise FileNotFoundError(f"模板文件不存在: {template_path}")
return template_path
def save_template(self, source_path: str, template_name: Optional[str] = None) -> str:
"""
保存模板文件到系统
Args:
source_path: 源文件路径
template_name: 模板名称(可选)
Returns:
保存后的模板路径
"""
source_path = Path(source_path)
if not source_path.exists():
raise FileNotFoundError(f"源文件不存在: {source_path}")
if template_name is None:
template_name = self.default_template
dest_path = self.template_dir / template_name
# 复制文件
shutil.copy2(source_path, dest_path)
print(f"✓ 模板已保存: {dest_path}")
return str(dest_path)
def template_exists(self, template_name: Optional[str] = None) -> bool:
"""检查模板是否存在"""
if template_name is None:
template_name = self.default_template
return (self.template_dir / template_name).exists()
def list_templates(self) -> list:
"""列出所有可用模板"""
if not self.template_dir.exists():
return []
return [f.name for f in self.template_dir.glob("*.pdf")]