Files
yiliao/backend/health_assessment_v2.py

963 lines
34 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
整体健康情况分析模块 V2
按照功能医学整体观重新设计的报告分析生成器
核心原则:
1. 功能医学整体观视角,聚焦"系统功能平衡""机能趋势预判"
2. 专业、客观、严谨的语言风格
3. 只呈现"指标状态→功能提示→关注方向",不包含干预方案
"""
import json
import re
from typing import Dict, List, Any, Tuple
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
# 系统分类映射:将模块映射到四大系统
SYSTEM_MAPPING = {
# (I) 血液学与炎症状态
'Hematology': [
'Complete Blood Count', # 血常规
'Blood Coagulation', # 凝血功能
'Inflammatory Reaction', # 炎症反应
],
# (II) 荷尔蒙与内分泌调节
'Endocrine': [
'Thyroid Function', # 甲状腺功能
'Female Hormone', # 女性荷尔蒙
'Male Hormone', # 男性荷尔蒙
'Bone Metabolism', # 骨代谢
],
# (III) 免疫学与感染风险
'Immunology': [
'Four Infectious Diseases', # 传染病四项
'Lymphocyte Subpopulation', # 淋巴细胞亚群
'Humoral Immunity', # 体液免疫
'Autoantibody', # 自身抗体
],
# (IV) 营养与代谢状况
'Metabolism': [
'Blood Sugar', # 血糖
'Lipid Profile', # 血脂
'Liver Function', # 肝功能
'Kidney Function', # 肾功能
'Serum Electrolytes', # 血电解质
'Microelement', # 微量元素
'Urine Test', # 尿液检测
'Myocardial Enzyme', # 心肌酶谱
'Thromboembolism', # 心脑血管风险因子
'Tumor Markers', # 肿瘤标记物
],
}
# 系统中英文名称
SYSTEM_NAMES = {
'Hematology': {
'en': '(I) Hematology and Inflammatory Status',
'cn': '(一)血液学与炎症状态'
},
'Endocrine': {
'en': '(II) Hormonal and Endocrine Regulation',
'cn': '(二)荷尔蒙与内分泌调节'
},
'Immunology': {
'en': '(III) Immunology and Infection Risk',
'cn': '(三)免疫学与感染风险'
},
'Metabolism': {
'en': '(IV) Nutrition and Metabolic Profile',
'cn': '(四)营养与代谢状况'
},
}
def get_system_for_module(module_name: str) -> str:
"""根据模块名称获取所属系统"""
for system, modules in SYSTEM_MAPPING.items():
if module_name in modules:
return system
# 默认归入代谢系统
return 'Metabolism'
def classify_items_by_system(matched_data: dict, config: dict = None) -> Dict[str, Dict[str, List]]:
"""
将所有检测项目按四大系统分类
Returns:
{
'Hematology': {
'normal': [...], # 正常指标
'abnormal': [...], # 异常指标
'borderline': [...] # 临界指标
},
...
}
"""
from config import load_abb_config, normalize_abb
if config is None:
config = load_abb_config()
abb_to_info = config.get('abb_to_info', {})
result = {
'Hematology': {'normal': [], 'abnormal': [], 'borderline': []},
'Endocrine': {'normal': [], 'abnormal': [], 'borderline': []},
'Immunology': {'normal': [], 'abnormal': [], 'borderline': []},
'Metabolism': {'normal': [], 'abnormal': [], 'borderline': []},
}
for abb, data in matched_data.items():
point = data.get('point', '').strip()
result_val = data.get('result', '').strip()
reference = data.get('reference', '').strip()
unit = data.get('unit', '').strip()
# 获取模块信息
normalized_abb = normalize_abb(abb, config)
info = abb_to_info.get(normalized_abb.upper(), {})
if not info:
info = abb_to_info.get(abb.upper(), {})
module = info.get('module', data.get('module', ''))
system = get_system_for_module(module)
# 获取中文名称
name = info.get('project_cn') or data.get('project_cn') or info.get('project') or data.get('project', abb)
item_info = {
'abb': abb,
'name': name,
'result': result_val,
'unit': unit,
'reference': reference,
'point': point,
'module': module,
'system': system
}
# 分类:正常、异常、临界
if point in ['', '', 'H', 'L', '', '']:
# 判断是否是临界值(接近参考范围边界)
is_borderline = _is_borderline_value(result_val, reference, point)
if is_borderline:
result[system]['borderline'].append(item_info)
else:
result[system]['abnormal'].append(item_info)
else:
# 正常指标
if result_val: # 只添加有结果的项目
result[system]['normal'].append(item_info)
return result
def _is_borderline_value(result: str, reference: str, point: str) -> bool:
"""
判断是否是临界值偏离参考范围不超过10%
"""
try:
result_num = float(re.sub(r'[^\d.]', '', result))
# 解析参考范围
ref_match = re.search(r'([\d.]+)\s*[-~]\s*([\d.]+)', reference)
if ref_match:
ref_low = float(ref_match.group(1))
ref_high = float(ref_match.group(2))
if point in ['', 'H', '']:
# 偏高检查是否超出上限不超过10%
if ref_high > 0:
deviation = (result_num - ref_high) / ref_high
return 0 < deviation <= 0.1
elif point in ['', 'L', '']:
# 偏低检查是否低于下限不超过10%
if ref_low > 0:
deviation = (ref_low - result_num) / ref_low
return 0 < deviation <= 0.1
except:
pass
return False
def collect_all_items_for_assessment(matched_data: dict, api_key: str = None) -> Tuple[List, List, Dict]:
"""
收集所有指标用于健康评估
Returns:
(normal_items, abnormal_items, system_classified_data)
"""
from config import load_abb_config, normalize_abb
config = load_abb_config()
abb_to_info = config.get('abb_to_info', {})
normal_items = []
abnormal_items = []
for abb, data in matched_data.items():
point = data.get('point', '').strip()
result_val = data.get('result', '').strip()
reference = data.get('reference', '').strip()
unit = data.get('unit', '').strip()
if not result_val:
continue
# 获取项目信息
normalized_abb = normalize_abb(abb, config)
info = abb_to_info.get(normalized_abb.upper(), {})
if not info:
info = abb_to_info.get(abb.upper(), {})
module = info.get('module', data.get('module', ''))
name = info.get('project_cn') or data.get('project_cn') or info.get('project') or data.get('project', abb)
item_info = {
'abb': abb,
'name': name,
'result': result_val,
'unit': unit,
'reference': reference,
'point': point,
'module': module,
'system': get_system_for_module(module)
}
if point in ['', '', 'H', 'L', '', '']:
abnormal_items.append(item_info)
else:
normal_items.append(item_info)
# 按系统分类
system_data = classify_items_by_system(matched_data, config)
return normal_items, abnormal_items, system_data
def build_assessment_prompt(normal_items: List, abnormal_items: List, system_data: Dict) -> str:
"""
构建整体健康情况分析的 Prompt基于案例文档优化
"""
# 构建正常指标描述
normal_desc = []
for item in normal_items[:30]:
desc = f" - {item['name']} ({item['abb']}): {item['result']}"
if item.get('unit'):
desc += f" {item['unit']}"
if item.get('reference'):
desc += f" [参考: {item['reference']}]"
normal_desc.append(desc)
# 构建异常指标描述(按系统分组)
abnormal_by_system = {}
for item in abnormal_items:
system = item.get('system', 'Metabolism')
if system not in abnormal_by_system:
abnormal_by_system[system] = []
direction = '偏高' if item['point'] in ['', 'H', ''] else '偏低'
is_borderline = _is_borderline_value(item['result'], item.get('reference', ''), item['point'])
level = '临界' if is_borderline else '异常'
desc = f" - {item['name']} ({item['abb']}): {item['result']}"
if item.get('unit'):
desc += f" {item['unit']}"
desc += f" ({direction}, {level})"
if item.get('reference'):
desc += f" [参考: {item['reference']}]"
abnormal_by_system[system].append(desc)
# 构建系统分组的异常指标描述
system_abnormal_desc = []
for system_key, system_info in SYSTEM_NAMES.items():
items = abnormal_by_system.get(system_key, [])
if items:
system_abnormal_desc.append(f"\n{system_info['cn']}")
system_abnormal_desc.extend(items)
prompt = f"""# 角色设定
你是Be.U Med功能医学团队的资深医学顾问在功能医学、整体健康、抗衰老医学领域具有丰富的临床经验。
# 任务
根据体检者的血液检查报告,撰写"整体健康情况分析"报告。
# 检测数据
## 正常指标(部分)
{chr(10).join(normal_desc) if normal_desc else ' 暂无数据'}
## 异常/临界指标(按系统分类)
{chr(10).join(system_abnormal_desc) if system_abnormal_desc else ' 暂无异常指标'}
# 核心原则(必须严格遵守)
## 1. 段落格式(极其重要!)
- **每个段落必须先写英文,再写对应的中文**
- **第一段英文80-120词中文80-120字**
- **第二段英文80-100词中文约120字严格控制在110-130字之间**
- 不要英中混排,必须分开
## 2. 语言风格
- 专业、客观、严谨,体现功能医学视角
- 使用"提示""可能""建议""值得关注""需要注意"等引导词
- 禁用"必须""一定""保证""治愈"等绝对化表述
- 不做临床疾病诊断,聚焦功能状态分析
## 3. 核心指标判定
- **核心指标**:从医学角度判定各生理学系统的关键指标
- **异常项**:超出参考范围的指标 + 逼近临界值的指标
- 指标必须精准,标注具体数值及单位
# 文章结构(必须严格遵循)
## 总述概述2段
**第一段**:前半部分列重点正常项及数值,后半部分列重点异常项及数值
**第二段**:说明这些异常指标对整体健康的综合影响
## 四大系统分析固定顺序每个系统2段
### (I) Hematology and Inflammatory Status / (一)血液学与炎症状态
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
**第二段**:说明该系统核心异常指标对其他生理系统的影响
### (II) Hormonal and Endocrine Regulation / (二)荷尔蒙与内分泌调节
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
**第二段**:说明该系统核心异常指标对其他生理系统的影响
### (III) Immunology and Infection Risk / (三)免疫学与感染风险
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
**第二段**:说明该系统核心异常指标对其他生理系统的影响
### (IV) Nutrition and Metabolic Profile / (四)营养与代谢状况
**第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值)
**第二段**:说明该系统核心异常指标对其他生理系统的影响
## 结尾总结2段
**第一段 - 功能医学健康管理重点**:概括本次检测发现的核心健康管理重点
**第二段 - 个性化管理方向**:说明往哪个方向开展个性化健康管理
# 输出格式JSON
```json
{{
"overview": {{
"paragraph1": {{
"en": "英文80-120词前半部分列重点正常项及数值后半部分列重点异常项及数值...",
"cn": "中文80-120字对应翻译..."
}},
"paragraph2": {{
"en": "英文80-100词说明异常指标对整体健康的综合影响...",
"cn": "中文约120字对应翻译..."
}}
}},
"systems": [
{{
"key": "Hematology",
"title_en": "(I) Hematology and Inflammatory Status",
"title_cn": "(一)血液学与炎症状态",
"paragraph1": {{
"en": "英文80-120词前半部分列该系统重点正常项及数值后半部分列重点异常项及数值...",
"cn": "中文80-120字对应翻译..."
}},
"paragraph2": {{
"en": "英文80-100词说明该系统核心异常指标对其他生理系统的影响...",
"cn": "中文约120字对应翻译..."
}}
}},
{{
"key": "Endocrine",
"title_en": "(II) Hormonal and Endocrine Regulation",
"title_cn": "(二)荷尔蒙与内分泌调节",
"paragraph1": {{}},
"paragraph2": {{}}
}},
{{
"key": "Immunology",
"title_en": "(III) Immunology and Infection Risk",
"title_cn": "(三)免疫学与感染风险",
"paragraph1": {{}},
"paragraph2": {{}}
}},
{{
"key": "Metabolism",
"title_en": "(IV) Nutrition and Metabolic Profile",
"title_cn": "(四)营养与代谢状况",
"paragraph1": {{}},
"paragraph2": {{}}
}}
],
"conclusion": {{
"management_focus": {{
"en": "英文80-120词功能医学健康管理重点概括...",
"cn": "中文80-120字对应翻译..."
}},
"personalized_direction": {{
"en": "英文80-120词个性化管理方向说明...",
"cn": "中文80-120字对应翻译..."
}}
}}
}}
```
# 重要提示
1. **每个段落必须先英文后中文,不要混排**
2. **第一段英文80-120词中文80-120字**
3. **第二段英文80-100词中文约120字严格控制在110-130字**
4. **第一段结构:前半部分正常项+数值,后半部分异常项+数值**
5. **第二段结构:异常指标对其他系统的影响**
6. **结尾两段:功能医学管理重点 + 个性化管理方向**
7. **逼近临界值的指标也算作异常项**
8. **只返回JSON不要其他内容**"""
return prompt
def generate_health_assessment_v2(matched_data: dict, api_key: str, call_deepseek_api) -> dict:
"""
生成整体健康情况分析内容V2版本
Args:
matched_data: 匹配的检测数据
api_key: DeepSeek API Key
call_deepseek_api: API调用函数
Returns:
包含整体分析和系统分析的字典
"""
if not api_key:
print(" ⚠️ 未提供API Key跳过健康评估生成")
return {}
# 收集所有指标
normal_items, abnormal_items, system_data = collect_all_items_for_assessment(matched_data)
if not normal_items and not abnormal_items:
print(" ⚠️ 没有检测数据,跳过健康评估生成")
return {}
print(f" 📊 数据统计: 正常指标 {len(normal_items)} 个, 异常指标 {len(abnormal_items)}")
# 构建prompt
prompt = build_assessment_prompt(normal_items, abnormal_items, system_data)
def parse_json_response(response_text):
"""解析JSON响应"""
# 提取JSON部分
if '```json' in response_text:
response_text = response_text.split('```json')[1].split('```')[0]
elif '```' in response_text:
response_text = response_text.split('```')[1].split('```')[0]
response_text = response_text.strip()
try:
return json.loads(response_text)
except json.JSONDecodeError:
pass
# 尝试修复常见问题
if response_text.count('"') % 2 != 0:
response_text += '"'
open_braces = response_text.count('{') - response_text.count('}')
open_brackets = response_text.count('[') - response_text.count(']')
if open_brackets > 0:
if open_braces > 0:
response_text += '}' * open_braces
response_text += ']' * open_brackets
elif open_braces > 0:
response_text += '}' * open_braces
try:
return json.loads(response_text)
except json.JSONDecodeError:
return None
# 最多重试3次
for attempt in range(3):
try:
print(f" 🤖 调用DeepSeek生成整体健康分析... (第{attempt+1}次)")
response = call_deepseek_api(prompt, api_key, max_tokens=6000, timeout=180)
if response is None:
if attempt < 2:
print(f" ⚠️ API请求失败重试中...")
import time
time.sleep(3)
continue
result = parse_json_response(response)
# 检查新格式overview, systems或旧格式overall_analysis, system_analysis
if result and (result.get('overview') or result.get('systems') or
result.get('overall_analysis') or result.get('system_analysis')):
print(f" ✓ 成功生成整体健康分析")
return result
if attempt < 2:
print(f" ⚠️ 响应格式不完整,重试中...")
except Exception as e:
if attempt < 2:
print(f" ⚠️ 生成失败: {e},重试中...")
print(f" ✗ 生成整体健康分析失败")
return {}
def convert_v2_to_sections_format(v2_result: dict) -> dict:
"""
将V2格式转换为原有的sections格式以便复用现有的填充函数
新格式overview{paragraph1, paragraph2}, systems[], conclusion{management_focus, personalized_direction}
"""
sections = []
# 1. 总述部分2段不需要标题
overview = v2_result.get('overview', {})
if overview:
paragraphs = []
# 第一段:正常项+异常项
para1 = overview.get('paragraph1', {})
if para1.get('en') or para1.get('cn'):
paragraphs.append({
'en': para1.get('en', ''),
'cn': para1.get('cn', '')
})
# 第二段:异常指标对整体健康的影响
para2 = overview.get('paragraph2', {})
if para2.get('en') or para2.get('cn'):
paragraphs.append({
'en': para2.get('en', ''),
'cn': para2.get('cn', '')
})
if paragraphs:
sections.append({
'title_en': '',
'title_cn': '',
'paragraphs': paragraphs,
'is_overview': True
})
# 2. 四大系统分析
systems = v2_result.get('systems', [])
for system in systems:
paragraphs = []
# 第一段:正常项+异常项
para1 = system.get('paragraph1', {})
if para1.get('en') or para1.get('cn'):
paragraphs.append({
'en': para1.get('en', ''),
'cn': para1.get('cn', '')
})
# 第二段:异常指标对其他系统的影响
para2 = system.get('paragraph2', {})
if para2.get('en') or para2.get('cn'):
paragraphs.append({
'en': para2.get('en', ''),
'cn': para2.get('cn', '')
})
# 兼容旧格式paragraphs数组
if not paragraphs and system.get('paragraphs'):
for para in system.get('paragraphs', []):
if para.get('en') or para.get('cn'):
paragraphs.append({
'en': para.get('en', ''),
'cn': para.get('cn', '')
})
if paragraphs:
sections.append({
'title_en': system.get('title_en', ''),
'title_cn': system.get('title_cn', ''),
'paragraphs': paragraphs
})
# 3. 结尾总结2段
conclusion = v2_result.get('conclusion', {})
if conclusion:
paragraphs = []
# 第一段:功能医学健康管理重点
mgmt_focus = conclusion.get('management_focus', {})
if mgmt_focus.get('en') or mgmt_focus.get('cn'):
paragraphs.append({
'en': mgmt_focus.get('en', ''),
'cn': mgmt_focus.get('cn', '')
})
# 第二段:个性化管理方向
pers_dir = conclusion.get('personalized_direction', {})
if pers_dir.get('en') or pers_dir.get('cn'):
paragraphs.append({
'en': pers_dir.get('en', ''),
'cn': pers_dir.get('cn', '')
})
# 兼容旧格式直接en/cn
if not paragraphs and (conclusion.get('en') or conclusion.get('cn')):
paragraphs.append({
'en': conclusion.get('en', ''),
'cn': conclusion.get('cn', '')
})
if paragraphs:
sections.append({
'title_en': '',
'title_cn': '',
'paragraphs': paragraphs,
'is_conclusion': True
})
# 兼容旧格式overall_analysis, system_analysis[]
if not sections:
overall = v2_result.get('overall_analysis', {})
if overall:
paragraphs = []
summary = overall.get('summary', {})
if summary.get('en') or summary.get('cn'):
paragraphs.append({'en': summary.get('en', ''), 'cn': summary.get('cn', '')})
strength = overall.get('strength_indicators', {})
if strength.get('en') or strength.get('cn'):
paragraphs.append({'en': strength.get('en', ''), 'cn': strength.get('cn', '')})
abnormal = overall.get('abnormal_indicators', {})
if abnormal.get('en') or abnormal.get('cn'):
paragraphs.append({'en': abnormal.get('en', ''), 'cn': abnormal.get('cn', '')})
focus = overall.get('focus_direction', {})
if focus.get('en') or focus.get('cn'):
paragraphs.append({'en': focus.get('en', ''), 'cn': focus.get('cn', '')})
if paragraphs:
sections.append({
'title_en': overall.get('title_en', ''),
'title_cn': overall.get('title_cn', ''),
'paragraphs': paragraphs
})
system_analysis = v2_result.get('system_analysis', [])
for system in system_analysis:
paragraphs = []
for key in ['summary', 'strength_indicators', 'abnormal_indicators', 'focus_direction']:
item = system.get(key, {})
if item.get('en') or item.get('cn'):
paragraphs.append({'en': item.get('en', ''), 'cn': item.get('cn', '')})
if paragraphs:
sections.append({
'title_en': system.get('title_en', ''),
'title_cn': system.get('title_cn', ''),
'paragraphs': paragraphs
})
return {'sections': sections}
# ============================================================
# 文档填充函数
# ============================================================
def clean_markdown_formatting(text: str) -> str:
"""清理文本中的Markdown格式标记"""
if not text:
return text
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
text = re.sub(r'__([^_]+)__', r'\1', text)
text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'\1', text)
text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text)
text = re.sub(r'`([^`]+)`', r'\1', text)
return text
def create_formatted_paragraph_v2(text: str, is_title: bool = False, is_chinese: bool = False):
"""创建带格式的段落(无缩进)"""
text = clean_markdown_formatting(text)
p = OxmlElement('w:p')
pPr = OxmlElement('w:pPr')
# 设置段落样式
pStyle = OxmlElement('w:pStyle')
if is_chinese:
pStyle.set(qn('w:val'), '0') # 0中文正文样式
else:
pStyle.set(qn('w:val'), '00') # 0英语正文样式
pPr.append(pStyle)
# 显式清除所有缩进(覆盖样式中的默认缩进)
ind = OxmlElement('w:ind')
ind.set(qn('w:left'), '0')
ind.set(qn('w:right'), '0')
ind.set(qn('w:firstLine'), '0')
pPr.append(ind)
if is_title:
rPr_para = OxmlElement('w:rPr')
b = OxmlElement('w:b')
rPr_para.append(b)
bCs = OxmlElement('w:bCs')
rPr_para.append(bCs)
pPr.append(rPr_para)
p.append(pPr)
r = OxmlElement('w:r')
rPr = OxmlElement('w:rPr')
if is_title:
b = OxmlElement('w:b')
rPr.append(b)
bCs = OxmlElement('w:bCs')
rPr.append(bCs)
color = OxmlElement('w:color')
if is_chinese:
color.set(qn('w:val'), '000000')
else:
color.set(qn('w:val'), '767171')
color.set(qn('w:themeColor'), 'background2')
color.set(qn('w:themeShade'), '80')
rPr.append(color)
r.append(rPr)
t = OxmlElement('w:t')
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
t.text = text
r.append(t)
p.append(r)
return p
def create_empty_paragraph_v2():
"""创建空段落(无缩进)"""
p = OxmlElement('w:p')
pPr = OxmlElement('w:pPr')
pStyle = OxmlElement('w:pStyle')
pStyle.set(qn('w:val'), '00')
pPr.append(pStyle)
# 显式清除所有缩进
ind = OxmlElement('w:ind')
ind.set(qn('w:left'), '0')
ind.set(qn('w:right'), '0')
ind.set(qn('w:firstLine'), '0')
pPr.append(ind)
p.append(pPr)
return p
def create_section_title_two_lines_v2(title_en: str, title_cn: str):
"""创建两行的模块标题(只创建非空的标题)"""
result = []
if title_en and title_en.strip():
p_en = create_formatted_paragraph_v2(title_en, is_title=True, is_chinese=False)
result.append(p_en)
if title_cn and title_cn.strip():
p_cn = create_formatted_paragraph_v2(title_cn, is_title=True, is_chinese=True)
result.append(p_cn)
return result
def fill_health_assessment_v2(doc, assessment_result: dict):
"""
将V2版本的健康评估内容填充到文档
Args:
doc: Word文档对象
assessment_result: generate_health_assessment_v2 返回的结果
"""
if not assessment_result:
print(" 健康评估内容为空,跳过填充")
return
# 转换为sections格式
sections_data = convert_v2_to_sections_format(assessment_result)
sections = sections_data.get('sections', [])
if not sections:
print(" 转换后的sections为空跳过填充")
return
body = doc.element.body
children = list(body)
# 查找 "Overall Health Assessment" 位置
overall_start = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'overall health' in text and 'assessment' in text:
overall_start = i
print(f" 找到Overall Health Assessment位置: {i}")
break
if overall_start < 0:
print(" 未找到Overall Health Assessment位置")
return
# 找到下一个主要区域的位置
next_section_pos = len(children)
end_keywords = ['medical intervention', '医学干预',
'functional medical health advice', '功能医学健康建议']
for i in range(overall_start + 1, len(children)):
text = ''.join(children[i].itertext()).strip().lower()
if any(kw in text for kw in end_keywords):
next_section_pos = i
print(f" 找到下一区域位置: {i}")
break
# 删除标题之后、下一区域之前的所有模板内容
children = list(body)
elements_to_remove = []
for i in range(overall_start + 1, min(next_section_pos, len(children))):
elem = children[i]
if elem.tag.endswith('}sectPr'):
continue
br_elem = elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}br')
if br_elem is not None:
break_type = br_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type')
if break_type == 'page':
continue
elements_to_remove.append(elem)
for elem in elements_to_remove:
try:
body.remove(elem)
except:
pass
if elements_to_remove:
print(f" 已删除 {len(elements_to_remove)} 个模板占位内容")
# 重新获取位置
children = list(body)
insert_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'overall health' in text and 'assessment' in text:
insert_pos = i + 1
break
if insert_pos < 0:
print(" 无法确定插入位置")
return
# 插入新生成的内容
for idx, section in enumerate(sections):
title_en = section.get('title_en', '').strip()
title_cn = section.get('title_cn', '').strip()
paragraphs = section.get('paragraphs', [])
is_overview = section.get('is_overview', False)
is_conclusion = section.get('is_conclusion', False)
# 只在有标题的模块前插入空段落(总述和结尾没有标题,不需要空段落)
if idx > 0 and (title_en or title_cn):
empty_p = create_empty_paragraph_v2()
body.insert(insert_pos, empty_p)
insert_pos += 1
# 小节标题(只有当标题不为空时才插入)
if title_en or title_cn:
title_paragraphs = create_section_title_two_lines_v2(title_en, title_cn)
for title_p in title_paragraphs:
body.insert(insert_pos, title_p)
insert_pos += 1
# 段落内容
for para in paragraphs:
en_text = para.get('en', '')
if en_text:
p_en = create_formatted_paragraph_v2(en_text, is_chinese=False)
body.insert(insert_pos, p_en)
insert_pos += 1
cn_text = para.get('cn', '')
if cn_text:
p_cn = create_formatted_paragraph_v2(cn_text, is_chinese=True)
body.insert(insert_pos, p_cn)
insert_pos += 1
print(f" ✓ 已插入 {len(sections)} 个健康评估小节")
# ============================================================
# 主入口函数
# ============================================================
def generate_and_fill_health_assessment_v2(doc, matched_data: dict, api_key: str, call_deepseek_api):
"""
生成并填充整体健康情况分析V2版本
这是主入口函数,替代原有的 generate_health_assessment_content + fill_health_assessment_section
"""
if not api_key:
print(" ⚠️ 未提供DeepSeek API Key跳过健康评估生成")
return None
print("\n" + "=" * 60)
print("整体健康情况分析 V2")
print("=" * 60)
# 生成内容
assessment_result = generate_health_assessment_v2(matched_data, api_key, call_deepseek_api)
if assessment_result:
# 填充到文档
print("\n 📝 正在填充健康评估内容...")
fill_health_assessment_v2(doc, assessment_result)
print(" ✓ 整体健康情况分析完成")
else:
print(" ✗ 健康评估生成失败")
return assessment_result
# ============================================================
# 测试函数
# ============================================================
if __name__ == '__main__':
# 测试prompt构建
test_normal = [
{'abb': 'WBC', 'name': '白细胞计数', 'result': '6.5', 'unit': '10^9/L', 'reference': '4.0-10.0', 'system': 'Hematology'},
{'abb': 'RBC', 'name': '红细胞计数', 'result': '4.8', 'unit': '10^12/L', 'reference': '4.0-5.5', 'system': 'Hematology'},
]
test_abnormal = [
{'abb': 'TSH', 'name': '促甲状腺激素', 'result': '16.879', 'unit': 'μIU/mL', 'reference': '0.35-4.94', 'point': '', 'system': 'Endocrine'},
{'abb': 'AMH', 'name': '抗缪勒管激素', 'result': '0.17', 'unit': 'ng/mL', 'reference': '1.0-10.0', 'point': '', 'system': 'Endocrine'},
]
test_system_data = {
'Hematology': {'normal': test_normal, 'abnormal': [], 'borderline': []},
'Endocrine': {'normal': [], 'abnormal': test_abnormal, 'borderline': []},
'Immunology': {'normal': [], 'abnormal': [], 'borderline': []},
'Metabolism': {'normal': [], 'abnormal': [], 'borderline': []},
}
prompt = build_assessment_prompt(test_normal, test_abnormal, test_system_data)
print("=" * 60)
print("生成的Prompt预览前2000字符:")
print("=" * 60)
print(prompt[:2000])
print("...")