初始化医疗报告生成项目,添加核心代码文件
This commit is contained in:
962
backend/health_assessment_v2.py
Normal file
962
backend/health_assessment_v2.py
Normal file
@@ -0,0 +1,962 @@
|
||||
"""
|
||||
整体健康情况分析模块 V2
|
||||
按照功能医学整体观重新设计的报告分析生成器
|
||||
|
||||
核心原则:
|
||||
1. 功能医学整体观视角,聚焦"系统功能平衡""机能趋势预判"
|
||||
2. 专业、客观、严谨的语言风格
|
||||
3. 只呈现"指标状态→功能提示→关注方向",不包含干预方案
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from typing import Dict, List, Any, Tuple
|
||||
from docx.oxml import OxmlElement
|
||||
from docx.oxml.ns import qn
|
||||
|
||||
|
||||
# 系统分类映射:将模块映射到四大系统
|
||||
SYSTEM_MAPPING = {
|
||||
# (I) 血液学与炎症状态
|
||||
'Hematology': [
|
||||
'Complete Blood Count', # 血常规
|
||||
'Blood Coagulation', # 凝血功能
|
||||
'Inflammatory Reaction', # 炎症反应
|
||||
],
|
||||
# (II) 荷尔蒙与内分泌调节
|
||||
'Endocrine': [
|
||||
'Thyroid Function', # 甲状腺功能
|
||||
'Female Hormone', # 女性荷尔蒙
|
||||
'Male Hormone', # 男性荷尔蒙
|
||||
'Bone Metabolism', # 骨代谢
|
||||
],
|
||||
# (III) 免疫学与感染风险
|
||||
'Immunology': [
|
||||
'Four Infectious Diseases', # 传染病四项
|
||||
'Lymphocyte Subpopulation', # 淋巴细胞亚群
|
||||
'Humoral Immunity', # 体液免疫
|
||||
'Autoantibody', # 自身抗体
|
||||
],
|
||||
# (IV) 营养与代谢状况
|
||||
'Metabolism': [
|
||||
'Blood Sugar', # 血糖
|
||||
'Lipid Profile', # 血脂
|
||||
'Liver Function', # 肝功能
|
||||
'Kidney Function', # 肾功能
|
||||
'Serum Electrolytes', # 血电解质
|
||||
'Microelement', # 微量元素
|
||||
'Urine Test', # 尿液检测
|
||||
'Myocardial Enzyme', # 心肌酶谱
|
||||
'Thromboembolism', # 心脑血管风险因子
|
||||
'Tumor Markers', # 肿瘤标记物
|
||||
],
|
||||
}
|
||||
|
||||
# 系统中英文名称
|
||||
SYSTEM_NAMES = {
|
||||
'Hematology': {
|
||||
'en': '(I) Hematology and Inflammatory Status',
|
||||
'cn': '(一)血液学与炎症状态'
|
||||
},
|
||||
'Endocrine': {
|
||||
'en': '(II) Hormonal and Endocrine Regulation',
|
||||
'cn': '(二)荷尔蒙与内分泌调节'
|
||||
},
|
||||
'Immunology': {
|
||||
'en': '(III) Immunology and Infection Risk',
|
||||
'cn': '(三)免疫学与感染风险'
|
||||
},
|
||||
'Metabolism': {
|
||||
'en': '(IV) Nutrition and Metabolic Profile',
|
||||
'cn': '(四)营养与代谢状况'
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_system_for_module(module_name: str) -> str:
|
||||
"""根据模块名称获取所属系统"""
|
||||
for system, modules in SYSTEM_MAPPING.items():
|
||||
if module_name in modules:
|
||||
return system
|
||||
# 默认归入代谢系统
|
||||
return 'Metabolism'
|
||||
|
||||
|
||||
def classify_items_by_system(matched_data: dict, config: dict = None) -> Dict[str, Dict[str, List]]:
|
||||
"""
|
||||
将所有检测项目按四大系统分类
|
||||
|
||||
Returns:
|
||||
{
|
||||
'Hematology': {
|
||||
'normal': [...], # 正常指标
|
||||
'abnormal': [...], # 异常指标
|
||||
'borderline': [...] # 临界指标
|
||||
},
|
||||
...
|
||||
}
|
||||
"""
|
||||
from config import load_abb_config, normalize_abb
|
||||
|
||||
if config is None:
|
||||
config = load_abb_config()
|
||||
|
||||
abb_to_info = config.get('abb_to_info', {})
|
||||
|
||||
result = {
|
||||
'Hematology': {'normal': [], 'abnormal': [], 'borderline': []},
|
||||
'Endocrine': {'normal': [], 'abnormal': [], 'borderline': []},
|
||||
'Immunology': {'normal': [], 'abnormal': [], 'borderline': []},
|
||||
'Metabolism': {'normal': [], 'abnormal': [], 'borderline': []},
|
||||
}
|
||||
|
||||
for abb, data in matched_data.items():
|
||||
point = data.get('point', '').strip()
|
||||
result_val = data.get('result', '').strip()
|
||||
reference = data.get('reference', '').strip()
|
||||
unit = data.get('unit', '').strip()
|
||||
|
||||
# 获取模块信息
|
||||
normalized_abb = normalize_abb(abb, config)
|
||||
info = abb_to_info.get(normalized_abb.upper(), {})
|
||||
if not info:
|
||||
info = abb_to_info.get(abb.upper(), {})
|
||||
|
||||
module = info.get('module', data.get('module', ''))
|
||||
system = get_system_for_module(module)
|
||||
|
||||
# 获取中文名称
|
||||
name = info.get('project_cn') or data.get('project_cn') or info.get('project') or data.get('project', abb)
|
||||
|
||||
item_info = {
|
||||
'abb': abb,
|
||||
'name': name,
|
||||
'result': result_val,
|
||||
'unit': unit,
|
||||
'reference': reference,
|
||||
'point': point,
|
||||
'module': module,
|
||||
'system': system
|
||||
}
|
||||
|
||||
# 分类:正常、异常、临界
|
||||
if point in ['↑', '↓', 'H', 'L', '高', '低']:
|
||||
# 判断是否是临界值(接近参考范围边界)
|
||||
is_borderline = _is_borderline_value(result_val, reference, point)
|
||||
if is_borderline:
|
||||
result[system]['borderline'].append(item_info)
|
||||
else:
|
||||
result[system]['abnormal'].append(item_info)
|
||||
else:
|
||||
# 正常指标
|
||||
if result_val: # 只添加有结果的项目
|
||||
result[system]['normal'].append(item_info)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _is_borderline_value(result: str, reference: str, point: str) -> bool:
|
||||
"""
|
||||
判断是否是临界值(偏离参考范围不超过10%)
|
||||
"""
|
||||
try:
|
||||
result_num = float(re.sub(r'[^\d.]', '', result))
|
||||
|
||||
# 解析参考范围
|
||||
ref_match = re.search(r'([\d.]+)\s*[-~]\s*([\d.]+)', reference)
|
||||
if ref_match:
|
||||
ref_low = float(ref_match.group(1))
|
||||
ref_high = float(ref_match.group(2))
|
||||
|
||||
if point in ['↑', 'H', '高']:
|
||||
# 偏高:检查是否超出上限不超过10%
|
||||
if ref_high > 0:
|
||||
deviation = (result_num - ref_high) / ref_high
|
||||
return 0 < deviation <= 0.1
|
||||
elif point in ['↓', 'L', '低']:
|
||||
# 偏低:检查是否低于下限不超过10%
|
||||
if ref_low > 0:
|
||||
deviation = (ref_low - result_num) / ref_low
|
||||
return 0 < deviation <= 0.1
|
||||
except:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def collect_all_items_for_assessment(matched_data: dict, api_key: str = None) -> Tuple[List, List, Dict]:
|
||||
"""
|
||||
收集所有指标用于健康评估
|
||||
|
||||
Returns:
|
||||
(normal_items, abnormal_items, system_classified_data)
|
||||
"""
|
||||
from config import load_abb_config, normalize_abb
|
||||
|
||||
config = load_abb_config()
|
||||
abb_to_info = config.get('abb_to_info', {})
|
||||
|
||||
normal_items = []
|
||||
abnormal_items = []
|
||||
|
||||
for abb, data in matched_data.items():
|
||||
point = data.get('point', '').strip()
|
||||
result_val = data.get('result', '').strip()
|
||||
reference = data.get('reference', '').strip()
|
||||
unit = data.get('unit', '').strip()
|
||||
|
||||
if not result_val:
|
||||
continue
|
||||
|
||||
# 获取项目信息
|
||||
normalized_abb = normalize_abb(abb, config)
|
||||
info = abb_to_info.get(normalized_abb.upper(), {})
|
||||
if not info:
|
||||
info = abb_to_info.get(abb.upper(), {})
|
||||
|
||||
module = info.get('module', data.get('module', ''))
|
||||
name = info.get('project_cn') or data.get('project_cn') or info.get('project') or data.get('project', abb)
|
||||
|
||||
item_info = {
|
||||
'abb': abb,
|
||||
'name': name,
|
||||
'result': result_val,
|
||||
'unit': unit,
|
||||
'reference': reference,
|
||||
'point': point,
|
||||
'module': module,
|
||||
'system': get_system_for_module(module)
|
||||
}
|
||||
|
||||
if point in ['↑', '↓', 'H', 'L', '高', '低']:
|
||||
abnormal_items.append(item_info)
|
||||
else:
|
||||
normal_items.append(item_info)
|
||||
|
||||
# 按系统分类
|
||||
system_data = classify_items_by_system(matched_data, config)
|
||||
|
||||
return normal_items, abnormal_items, system_data
|
||||
|
||||
|
||||
def build_assessment_prompt(normal_items: List, abnormal_items: List, system_data: Dict) -> str:
|
||||
"""
|
||||
构建整体健康情况分析的 Prompt(基于案例文档优化)
|
||||
"""
|
||||
# 构建正常指标描述
|
||||
normal_desc = []
|
||||
for item in normal_items[:30]:
|
||||
desc = f" - {item['name']} ({item['abb']}): {item['result']}"
|
||||
if item.get('unit'):
|
||||
desc += f" {item['unit']}"
|
||||
if item.get('reference'):
|
||||
desc += f" [参考: {item['reference']}]"
|
||||
normal_desc.append(desc)
|
||||
|
||||
# 构建异常指标描述(按系统分组)
|
||||
abnormal_by_system = {}
|
||||
for item in abnormal_items:
|
||||
system = item.get('system', 'Metabolism')
|
||||
if system not in abnormal_by_system:
|
||||
abnormal_by_system[system] = []
|
||||
|
||||
direction = '偏高' if item['point'] in ['↑', 'H', '高'] else '偏低'
|
||||
is_borderline = _is_borderline_value(item['result'], item.get('reference', ''), item['point'])
|
||||
level = '临界' if is_borderline else '异常'
|
||||
|
||||
desc = f" - {item['name']} ({item['abb']}): {item['result']}"
|
||||
if item.get('unit'):
|
||||
desc += f" {item['unit']}"
|
||||
desc += f" ({direction}, {level})"
|
||||
if item.get('reference'):
|
||||
desc += f" [参考: {item['reference']}]"
|
||||
abnormal_by_system[system].append(desc)
|
||||
|
||||
# 构建系统分组的异常指标描述
|
||||
system_abnormal_desc = []
|
||||
for system_key, system_info in SYSTEM_NAMES.items():
|
||||
items = abnormal_by_system.get(system_key, [])
|
||||
if items:
|
||||
system_abnormal_desc.append(f"\n【{system_info['cn']}】")
|
||||
system_abnormal_desc.extend(items)
|
||||
|
||||
prompt = f"""# 角色设定
|
||||
你是Be.U Med功能医学团队的资深医学顾问,在功能医学、整体健康、抗衰老医学领域具有丰富的临床经验。
|
||||
|
||||
# 任务
|
||||
根据体检者的血液检查报告,撰写"整体健康情况分析"报告。
|
||||
|
||||
# 检测数据
|
||||
|
||||
## 正常指标(部分)
|
||||
{chr(10).join(normal_desc) if normal_desc else ' 暂无数据'}
|
||||
|
||||
## 异常/临界指标(按系统分类)
|
||||
{chr(10).join(system_abnormal_desc) if system_abnormal_desc else ' 暂无异常指标'}
|
||||
|
||||
# 核心原则(必须严格遵守)
|
||||
|
||||
## 1. 段落格式(极其重要!)
|
||||
- **每个段落必须先写英文,再写对应的中文**
|
||||
- **第一段:英文80-120词,中文80-120字**
|
||||
- **第二段:英文80-100词,中文约120字(严格控制在110-130字之间)**
|
||||
- 不要英中混排,必须分开
|
||||
|
||||
## 2. 语言风格
|
||||
- 专业、客观、严谨,体现功能医学视角
|
||||
- 使用"提示""可能""建议""值得关注""需要注意"等引导词
|
||||
- 禁用"必须""一定""保证""治愈"等绝对化表述
|
||||
- 不做临床疾病诊断,聚焦功能状态分析
|
||||
|
||||
## 3. 核心指标判定
|
||||
- **核心指标**:从医学角度判定各生理学系统的关键指标
|
||||
- **异常项**:超出参考范围的指标 + 逼近临界值的指标
|
||||
- 指标必须精准,标注具体数值及单位
|
||||
|
||||
# 文章结构(必须严格遵循)
|
||||
|
||||
## 总述概述(2段)
|
||||
**第一段**:前半部分列重点正常项及数值,后半部分列重点异常项及数值
|
||||
**第二段**:说明这些异常指标对整体健康的综合影响
|
||||
|
||||
## 四大系统分析(固定顺序,每个系统2段)
|
||||
|
||||
### (I) Hematology and Inflammatory Status / (一)血液学与炎症状态
|
||||
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
|
||||
**第二段**:说明该系统核心异常指标对其他生理系统的影响
|
||||
|
||||
### (II) Hormonal and Endocrine Regulation / (二)荷尔蒙与内分泌调节
|
||||
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
|
||||
**第二段**:说明该系统核心异常指标对其他生理系统的影响
|
||||
|
||||
### (III) Immunology and Infection Risk / (三)免疫学与感染风险
|
||||
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
|
||||
**第二段**:说明该系统核心异常指标对其他生理系统的影响
|
||||
|
||||
### (IV) Nutrition and Metabolic Profile / (四)营养与代谢状况
|
||||
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
|
||||
**第二段**:说明该系统核心异常指标对其他生理系统的影响
|
||||
|
||||
## 结尾总结(2段)
|
||||
**第一段 - 功能医学健康管理重点**:概括本次检测发现的核心健康管理重点
|
||||
**第二段 - 个性化管理方向**:说明往哪个方向开展个性化健康管理
|
||||
|
||||
# 输出格式(JSON)
|
||||
|
||||
```json
|
||||
{{
|
||||
"overview": {{
|
||||
"paragraph1": {{
|
||||
"en": "英文(80-120词):前半部分列重点正常项及数值,后半部分列重点异常项及数值...",
|
||||
"cn": "中文(80-120字):对应翻译..."
|
||||
}},
|
||||
"paragraph2": {{
|
||||
"en": "英文(80-100词):说明异常指标对整体健康的综合影响...",
|
||||
"cn": "中文(约120字):对应翻译..."
|
||||
}}
|
||||
}},
|
||||
"systems": [
|
||||
{{
|
||||
"key": "Hematology",
|
||||
"title_en": "(I) Hematology and Inflammatory Status",
|
||||
"title_cn": "(一)血液学与炎症状态",
|
||||
"paragraph1": {{
|
||||
"en": "英文(80-120词):前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值...",
|
||||
"cn": "中文(80-120字):对应翻译..."
|
||||
}},
|
||||
"paragraph2": {{
|
||||
"en": "英文(80-100词):说明该系统核心异常指标对其他生理系统的影响...",
|
||||
"cn": "中文(约120字):对应翻译..."
|
||||
}}
|
||||
}},
|
||||
{{
|
||||
"key": "Endocrine",
|
||||
"title_en": "(II) Hormonal and Endocrine Regulation",
|
||||
"title_cn": "(二)荷尔蒙与内分泌调节",
|
||||
"paragraph1": {{}},
|
||||
"paragraph2": {{}}
|
||||
}},
|
||||
{{
|
||||
"key": "Immunology",
|
||||
"title_en": "(III) Immunology and Infection Risk",
|
||||
"title_cn": "(三)免疫学与感染风险",
|
||||
"paragraph1": {{}},
|
||||
"paragraph2": {{}}
|
||||
}},
|
||||
{{
|
||||
"key": "Metabolism",
|
||||
"title_en": "(IV) Nutrition and Metabolic Profile",
|
||||
"title_cn": "(四)营养与代谢状况",
|
||||
"paragraph1": {{}},
|
||||
"paragraph2": {{}}
|
||||
}}
|
||||
],
|
||||
"conclusion": {{
|
||||
"management_focus": {{
|
||||
"en": "英文(80-120词):功能医学健康管理重点概括...",
|
||||
"cn": "中文(80-120字):对应翻译..."
|
||||
}},
|
||||
"personalized_direction": {{
|
||||
"en": "英文(80-120词):个性化管理方向说明...",
|
||||
"cn": "中文(80-120字):对应翻译..."
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
```
|
||||
|
||||
# 重要提示
|
||||
1. **每个段落必须先英文后中文,不要混排**
|
||||
2. **第一段:英文80-120词,中文80-120字**
|
||||
3. **第二段:英文80-100词,中文约120字(严格控制在110-130字)**
|
||||
4. **第一段结构:前半部分正常项+数值,后半部分异常项+数值**
|
||||
5. **第二段结构:异常指标对其他系统的影响**
|
||||
6. **结尾两段:功能医学管理重点 + 个性化管理方向**
|
||||
7. **逼近临界值的指标也算作异常项**
|
||||
8. **只返回JSON,不要其他内容**"""
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
|
||||
|
||||
def generate_health_assessment_v2(matched_data: dict, api_key: str, call_deepseek_api) -> dict:
|
||||
"""
|
||||
生成整体健康情况分析内容(V2版本)
|
||||
|
||||
Args:
|
||||
matched_data: 匹配的检测数据
|
||||
api_key: DeepSeek API Key
|
||||
call_deepseek_api: API调用函数
|
||||
|
||||
Returns:
|
||||
包含整体分析和系统分析的字典
|
||||
"""
|
||||
if not api_key:
|
||||
print(" ⚠️ 未提供API Key,跳过健康评估生成")
|
||||
return {}
|
||||
|
||||
# 收集所有指标
|
||||
normal_items, abnormal_items, system_data = collect_all_items_for_assessment(matched_data)
|
||||
|
||||
if not normal_items and not abnormal_items:
|
||||
print(" ⚠️ 没有检测数据,跳过健康评估生成")
|
||||
return {}
|
||||
|
||||
print(f" 📊 数据统计: 正常指标 {len(normal_items)} 个, 异常指标 {len(abnormal_items)} 个")
|
||||
|
||||
# 构建prompt
|
||||
prompt = build_assessment_prompt(normal_items, abnormal_items, system_data)
|
||||
|
||||
def parse_json_response(response_text):
|
||||
"""解析JSON响应"""
|
||||
# 提取JSON部分
|
||||
if '```json' in response_text:
|
||||
response_text = response_text.split('```json')[1].split('```')[0]
|
||||
elif '```' in response_text:
|
||||
response_text = response_text.split('```')[1].split('```')[0]
|
||||
|
||||
response_text = response_text.strip()
|
||||
|
||||
try:
|
||||
return json.loads(response_text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 尝试修复常见问题
|
||||
if response_text.count('"') % 2 != 0:
|
||||
response_text += '"'
|
||||
|
||||
open_braces = response_text.count('{') - response_text.count('}')
|
||||
open_brackets = response_text.count('[') - response_text.count(']')
|
||||
|
||||
if open_brackets > 0:
|
||||
if open_braces > 0:
|
||||
response_text += '}' * open_braces
|
||||
response_text += ']' * open_brackets
|
||||
elif open_braces > 0:
|
||||
response_text += '}' * open_braces
|
||||
|
||||
try:
|
||||
return json.loads(response_text)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
# 最多重试3次
|
||||
for attempt in range(3):
|
||||
try:
|
||||
print(f" 🤖 调用DeepSeek生成整体健康分析... (第{attempt+1}次)")
|
||||
response = call_deepseek_api(prompt, api_key, max_tokens=6000, timeout=180)
|
||||
|
||||
if response is None:
|
||||
if attempt < 2:
|
||||
print(f" ⚠️ API请求失败,重试中...")
|
||||
import time
|
||||
time.sleep(3)
|
||||
continue
|
||||
|
||||
result = parse_json_response(response)
|
||||
|
||||
# 检查新格式(overview, systems)或旧格式(overall_analysis, system_analysis)
|
||||
if result and (result.get('overview') or result.get('systems') or
|
||||
result.get('overall_analysis') or result.get('system_analysis')):
|
||||
print(f" ✓ 成功生成整体健康分析")
|
||||
return result
|
||||
|
||||
if attempt < 2:
|
||||
print(f" ⚠️ 响应格式不完整,重试中...")
|
||||
|
||||
except Exception as e:
|
||||
if attempt < 2:
|
||||
print(f" ⚠️ 生成失败: {e},重试中...")
|
||||
|
||||
print(f" ✗ 生成整体健康分析失败")
|
||||
return {}
|
||||
|
||||
|
||||
def convert_v2_to_sections_format(v2_result: dict) -> dict:
|
||||
"""
|
||||
将V2格式转换为原有的sections格式,以便复用现有的填充函数
|
||||
|
||||
新格式:overview{paragraph1, paragraph2}, systems[], conclusion{management_focus, personalized_direction}
|
||||
"""
|
||||
sections = []
|
||||
|
||||
# 1. 总述部分(2段,不需要标题)
|
||||
overview = v2_result.get('overview', {})
|
||||
if overview:
|
||||
paragraphs = []
|
||||
|
||||
# 第一段:正常项+异常项
|
||||
para1 = overview.get('paragraph1', {})
|
||||
if para1.get('en') or para1.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': para1.get('en', ''),
|
||||
'cn': para1.get('cn', '')
|
||||
})
|
||||
|
||||
# 第二段:异常指标对整体健康的影响
|
||||
para2 = overview.get('paragraph2', {})
|
||||
if para2.get('en') or para2.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': para2.get('en', ''),
|
||||
'cn': para2.get('cn', '')
|
||||
})
|
||||
|
||||
if paragraphs:
|
||||
sections.append({
|
||||
'title_en': '',
|
||||
'title_cn': '',
|
||||
'paragraphs': paragraphs,
|
||||
'is_overview': True
|
||||
})
|
||||
|
||||
# 2. 四大系统分析
|
||||
systems = v2_result.get('systems', [])
|
||||
for system in systems:
|
||||
paragraphs = []
|
||||
|
||||
# 第一段:正常项+异常项
|
||||
para1 = system.get('paragraph1', {})
|
||||
if para1.get('en') or para1.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': para1.get('en', ''),
|
||||
'cn': para1.get('cn', '')
|
||||
})
|
||||
|
||||
# 第二段:异常指标对其他系统的影响
|
||||
para2 = system.get('paragraph2', {})
|
||||
if para2.get('en') or para2.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': para2.get('en', ''),
|
||||
'cn': para2.get('cn', '')
|
||||
})
|
||||
|
||||
# 兼容旧格式(paragraphs数组)
|
||||
if not paragraphs and system.get('paragraphs'):
|
||||
for para in system.get('paragraphs', []):
|
||||
if para.get('en') or para.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': para.get('en', ''),
|
||||
'cn': para.get('cn', '')
|
||||
})
|
||||
|
||||
if paragraphs:
|
||||
sections.append({
|
||||
'title_en': system.get('title_en', ''),
|
||||
'title_cn': system.get('title_cn', ''),
|
||||
'paragraphs': paragraphs
|
||||
})
|
||||
|
||||
# 3. 结尾总结(2段)
|
||||
conclusion = v2_result.get('conclusion', {})
|
||||
if conclusion:
|
||||
paragraphs = []
|
||||
|
||||
# 第一段:功能医学健康管理重点
|
||||
mgmt_focus = conclusion.get('management_focus', {})
|
||||
if mgmt_focus.get('en') or mgmt_focus.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': mgmt_focus.get('en', ''),
|
||||
'cn': mgmt_focus.get('cn', '')
|
||||
})
|
||||
|
||||
# 第二段:个性化管理方向
|
||||
pers_dir = conclusion.get('personalized_direction', {})
|
||||
if pers_dir.get('en') or pers_dir.get('cn'):
|
||||
paragraphs.append({
|
||||
'en': pers_dir.get('en', ''),
|
||||
'cn': pers_dir.get('cn', '')
|
||||
})
|
||||
|
||||
# 兼容旧格式(直接en/cn)
|
||||
if not paragraphs and (conclusion.get('en') or conclusion.get('cn')):
|
||||
paragraphs.append({
|
||||
'en': conclusion.get('en', ''),
|
||||
'cn': conclusion.get('cn', '')
|
||||
})
|
||||
|
||||
if paragraphs:
|
||||
sections.append({
|
||||
'title_en': '',
|
||||
'title_cn': '',
|
||||
'paragraphs': paragraphs,
|
||||
'is_conclusion': True
|
||||
})
|
||||
|
||||
# 兼容旧格式(overall_analysis, system_analysis[])
|
||||
if not sections:
|
||||
overall = v2_result.get('overall_analysis', {})
|
||||
if overall:
|
||||
paragraphs = []
|
||||
summary = overall.get('summary', {})
|
||||
if summary.get('en') or summary.get('cn'):
|
||||
paragraphs.append({'en': summary.get('en', ''), 'cn': summary.get('cn', '')})
|
||||
|
||||
strength = overall.get('strength_indicators', {})
|
||||
if strength.get('en') or strength.get('cn'):
|
||||
paragraphs.append({'en': strength.get('en', ''), 'cn': strength.get('cn', '')})
|
||||
|
||||
abnormal = overall.get('abnormal_indicators', {})
|
||||
if abnormal.get('en') or abnormal.get('cn'):
|
||||
paragraphs.append({'en': abnormal.get('en', ''), 'cn': abnormal.get('cn', '')})
|
||||
|
||||
focus = overall.get('focus_direction', {})
|
||||
if focus.get('en') or focus.get('cn'):
|
||||
paragraphs.append({'en': focus.get('en', ''), 'cn': focus.get('cn', '')})
|
||||
|
||||
if paragraphs:
|
||||
sections.append({
|
||||
'title_en': overall.get('title_en', ''),
|
||||
'title_cn': overall.get('title_cn', ''),
|
||||
'paragraphs': paragraphs
|
||||
})
|
||||
|
||||
system_analysis = v2_result.get('system_analysis', [])
|
||||
for system in system_analysis:
|
||||
paragraphs = []
|
||||
for key in ['summary', 'strength_indicators', 'abnormal_indicators', 'focus_direction']:
|
||||
item = system.get(key, {})
|
||||
if item.get('en') or item.get('cn'):
|
||||
paragraphs.append({'en': item.get('en', ''), 'cn': item.get('cn', '')})
|
||||
|
||||
if paragraphs:
|
||||
sections.append({
|
||||
'title_en': system.get('title_en', ''),
|
||||
'title_cn': system.get('title_cn', ''),
|
||||
'paragraphs': paragraphs
|
||||
})
|
||||
|
||||
return {'sections': sections}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 文档填充函数
|
||||
# ============================================================
|
||||
|
||||
def clean_markdown_formatting(text: str) -> str:
|
||||
"""清理文本中的Markdown格式标记"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
|
||||
text = re.sub(r'__([^_]+)__', r'\1', text)
|
||||
text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'\1', text)
|
||||
text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text)
|
||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def create_formatted_paragraph_v2(text: str, is_title: bool = False, is_chinese: bool = False):
|
||||
"""创建带格式的段落(无缩进)"""
|
||||
text = clean_markdown_formatting(text)
|
||||
|
||||
p = OxmlElement('w:p')
|
||||
pPr = OxmlElement('w:pPr')
|
||||
|
||||
# 设置段落样式
|
||||
pStyle = OxmlElement('w:pStyle')
|
||||
if is_chinese:
|
||||
pStyle.set(qn('w:val'), '0') # 0中文正文样式
|
||||
else:
|
||||
pStyle.set(qn('w:val'), '00') # 0英语正文样式
|
||||
pPr.append(pStyle)
|
||||
|
||||
# 显式清除所有缩进(覆盖样式中的默认缩进)
|
||||
ind = OxmlElement('w:ind')
|
||||
ind.set(qn('w:left'), '0')
|
||||
ind.set(qn('w:right'), '0')
|
||||
ind.set(qn('w:firstLine'), '0')
|
||||
pPr.append(ind)
|
||||
|
||||
if is_title:
|
||||
rPr_para = OxmlElement('w:rPr')
|
||||
b = OxmlElement('w:b')
|
||||
rPr_para.append(b)
|
||||
bCs = OxmlElement('w:bCs')
|
||||
rPr_para.append(bCs)
|
||||
pPr.append(rPr_para)
|
||||
|
||||
p.append(pPr)
|
||||
|
||||
r = OxmlElement('w:r')
|
||||
rPr = OxmlElement('w:rPr')
|
||||
|
||||
if is_title:
|
||||
b = OxmlElement('w:b')
|
||||
rPr.append(b)
|
||||
bCs = OxmlElement('w:bCs')
|
||||
rPr.append(bCs)
|
||||
|
||||
color = OxmlElement('w:color')
|
||||
if is_chinese:
|
||||
color.set(qn('w:val'), '000000')
|
||||
else:
|
||||
color.set(qn('w:val'), '767171')
|
||||
color.set(qn('w:themeColor'), 'background2')
|
||||
color.set(qn('w:themeShade'), '80')
|
||||
rPr.append(color)
|
||||
|
||||
r.append(rPr)
|
||||
|
||||
t = OxmlElement('w:t')
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
t.text = text
|
||||
r.append(t)
|
||||
p.append(r)
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def create_empty_paragraph_v2():
|
||||
"""创建空段落(无缩进)"""
|
||||
p = OxmlElement('w:p')
|
||||
pPr = OxmlElement('w:pPr')
|
||||
pStyle = OxmlElement('w:pStyle')
|
||||
pStyle.set(qn('w:val'), '00')
|
||||
pPr.append(pStyle)
|
||||
# 显式清除所有缩进
|
||||
ind = OxmlElement('w:ind')
|
||||
ind.set(qn('w:left'), '0')
|
||||
ind.set(qn('w:right'), '0')
|
||||
ind.set(qn('w:firstLine'), '0')
|
||||
pPr.append(ind)
|
||||
p.append(pPr)
|
||||
return p
|
||||
|
||||
|
||||
def create_section_title_two_lines_v2(title_en: str, title_cn: str):
|
||||
"""创建两行的模块标题(只创建非空的标题)"""
|
||||
result = []
|
||||
if title_en and title_en.strip():
|
||||
p_en = create_formatted_paragraph_v2(title_en, is_title=True, is_chinese=False)
|
||||
result.append(p_en)
|
||||
if title_cn and title_cn.strip():
|
||||
p_cn = create_formatted_paragraph_v2(title_cn, is_title=True, is_chinese=True)
|
||||
result.append(p_cn)
|
||||
return result
|
||||
|
||||
|
||||
def fill_health_assessment_v2(doc, assessment_result: dict):
|
||||
"""
|
||||
将V2版本的健康评估内容填充到文档
|
||||
|
||||
Args:
|
||||
doc: Word文档对象
|
||||
assessment_result: generate_health_assessment_v2 返回的结果
|
||||
"""
|
||||
if not assessment_result:
|
||||
print(" 健康评估内容为空,跳过填充")
|
||||
return
|
||||
|
||||
# 转换为sections格式
|
||||
sections_data = convert_v2_to_sections_format(assessment_result)
|
||||
sections = sections_data.get('sections', [])
|
||||
|
||||
if not sections:
|
||||
print(" 转换后的sections为空,跳过填充")
|
||||
return
|
||||
|
||||
body = doc.element.body
|
||||
children = list(body)
|
||||
|
||||
# 查找 "Overall Health Assessment" 位置
|
||||
overall_start = -1
|
||||
for i, elem in enumerate(children):
|
||||
text = ''.join(elem.itertext()).strip().lower()
|
||||
if 'overall health' in text and 'assessment' in text:
|
||||
overall_start = i
|
||||
print(f" 找到Overall Health Assessment位置: {i}")
|
||||
break
|
||||
|
||||
if overall_start < 0:
|
||||
print(" 未找到Overall Health Assessment位置")
|
||||
return
|
||||
|
||||
# 找到下一个主要区域的位置
|
||||
next_section_pos = len(children)
|
||||
end_keywords = ['medical intervention', '医学干预',
|
||||
'functional medical health advice', '功能医学健康建议']
|
||||
|
||||
for i in range(overall_start + 1, len(children)):
|
||||
text = ''.join(children[i].itertext()).strip().lower()
|
||||
if any(kw in text for kw in end_keywords):
|
||||
next_section_pos = i
|
||||
print(f" 找到下一区域位置: {i}")
|
||||
break
|
||||
|
||||
# 删除标题之后、下一区域之前的所有模板内容
|
||||
children = list(body)
|
||||
elements_to_remove = []
|
||||
for i in range(overall_start + 1, min(next_section_pos, len(children))):
|
||||
elem = children[i]
|
||||
if elem.tag.endswith('}sectPr'):
|
||||
continue
|
||||
br_elem = elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}br')
|
||||
if br_elem is not None:
|
||||
break_type = br_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type')
|
||||
if break_type == 'page':
|
||||
continue
|
||||
elements_to_remove.append(elem)
|
||||
|
||||
for elem in elements_to_remove:
|
||||
try:
|
||||
body.remove(elem)
|
||||
except:
|
||||
pass
|
||||
|
||||
if elements_to_remove:
|
||||
print(f" 已删除 {len(elements_to_remove)} 个模板占位内容")
|
||||
|
||||
# 重新获取位置
|
||||
children = list(body)
|
||||
insert_pos = -1
|
||||
for i, elem in enumerate(children):
|
||||
text = ''.join(elem.itertext()).strip().lower()
|
||||
if 'overall health' in text and 'assessment' in text:
|
||||
insert_pos = i + 1
|
||||
break
|
||||
|
||||
if insert_pos < 0:
|
||||
print(" 无法确定插入位置")
|
||||
return
|
||||
|
||||
# 插入新生成的内容
|
||||
for idx, section in enumerate(sections):
|
||||
title_en = section.get('title_en', '').strip()
|
||||
title_cn = section.get('title_cn', '').strip()
|
||||
paragraphs = section.get('paragraphs', [])
|
||||
is_overview = section.get('is_overview', False)
|
||||
is_conclusion = section.get('is_conclusion', False)
|
||||
|
||||
# 只在有标题的模块前插入空段落(总述和结尾没有标题,不需要空段落)
|
||||
if idx > 0 and (title_en or title_cn):
|
||||
empty_p = create_empty_paragraph_v2()
|
||||
body.insert(insert_pos, empty_p)
|
||||
insert_pos += 1
|
||||
|
||||
# 小节标题(只有当标题不为空时才插入)
|
||||
if title_en or title_cn:
|
||||
title_paragraphs = create_section_title_two_lines_v2(title_en, title_cn)
|
||||
for title_p in title_paragraphs:
|
||||
body.insert(insert_pos, title_p)
|
||||
insert_pos += 1
|
||||
|
||||
# 段落内容
|
||||
for para in paragraphs:
|
||||
en_text = para.get('en', '')
|
||||
if en_text:
|
||||
p_en = create_formatted_paragraph_v2(en_text, is_chinese=False)
|
||||
body.insert(insert_pos, p_en)
|
||||
insert_pos += 1
|
||||
|
||||
cn_text = para.get('cn', '')
|
||||
if cn_text:
|
||||
p_cn = create_formatted_paragraph_v2(cn_text, is_chinese=True)
|
||||
body.insert(insert_pos, p_cn)
|
||||
insert_pos += 1
|
||||
|
||||
print(f" ✓ 已插入 {len(sections)} 个健康评估小节")
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 主入口函数
|
||||
# ============================================================
|
||||
|
||||
def generate_and_fill_health_assessment_v2(doc, matched_data: dict, api_key: str, call_deepseek_api):
|
||||
"""
|
||||
生成并填充整体健康情况分析(V2版本)
|
||||
|
||||
这是主入口函数,替代原有的 generate_health_assessment_content + fill_health_assessment_section
|
||||
"""
|
||||
if not api_key:
|
||||
print(" ⚠️ 未提供DeepSeek API Key,跳过健康评估生成")
|
||||
return None
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("整体健康情况分析 V2")
|
||||
print("=" * 60)
|
||||
|
||||
# 生成内容
|
||||
assessment_result = generate_health_assessment_v2(matched_data, api_key, call_deepseek_api)
|
||||
|
||||
if assessment_result:
|
||||
# 填充到文档
|
||||
print("\n 📝 正在填充健康评估内容...")
|
||||
fill_health_assessment_v2(doc, assessment_result)
|
||||
print(" ✓ 整体健康情况分析完成")
|
||||
else:
|
||||
print(" ✗ 健康评估生成失败")
|
||||
|
||||
return assessment_result
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 测试函数
|
||||
# ============================================================
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 测试prompt构建
|
||||
test_normal = [
|
||||
{'abb': 'WBC', 'name': '白细胞计数', 'result': '6.5', 'unit': '10^9/L', 'reference': '4.0-10.0', 'system': 'Hematology'},
|
||||
{'abb': 'RBC', 'name': '红细胞计数', 'result': '4.8', 'unit': '10^12/L', 'reference': '4.0-5.5', 'system': 'Hematology'},
|
||||
]
|
||||
|
||||
test_abnormal = [
|
||||
{'abb': 'TSH', 'name': '促甲状腺激素', 'result': '16.879', 'unit': 'μIU/mL', 'reference': '0.35-4.94', 'point': '↑', 'system': 'Endocrine'},
|
||||
{'abb': 'AMH', 'name': '抗缪勒管激素', 'result': '0.17', 'unit': 'ng/mL', 'reference': '1.0-10.0', 'point': '↓', 'system': 'Endocrine'},
|
||||
]
|
||||
|
||||
test_system_data = {
|
||||
'Hematology': {'normal': test_normal, 'abnormal': [], 'borderline': []},
|
||||
'Endocrine': {'normal': [], 'abnormal': test_abnormal, 'borderline': []},
|
||||
'Immunology': {'normal': [], 'abnormal': [], 'borderline': []},
|
||||
'Metabolism': {'normal': [], 'abnormal': [], 'borderline': []},
|
||||
}
|
||||
|
||||
prompt = build_assessment_prompt(test_normal, test_abnormal, test_system_data)
|
||||
print("=" * 60)
|
||||
print("生成的Prompt预览(前2000字符):")
|
||||
print("=" * 60)
|
||||
print(prompt[:2000])
|
||||
print("...")
|
||||
Reference in New Issue
Block a user