Files
yiliao/backend/health_content_generator.py

1770 lines
70 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
健康内容生成模块
包含生成健康评估和建议的函数
"""
import json
import re
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
# 常见的医学ABB缩写列表用于识别特殊英文缩写
MEDICAL_ABBS = {
'HIV', 'WBC', 'RBC', 'HCT', 'MCV', 'MCH', 'MCHC', 'RDW', 'PLT', 'MPV',
'NEUT', 'LYMPH', 'MONO', 'EOS', 'BAS', 'ESR', 'CRP', 'hs-CRP',
'TC', 'TG', 'HDL', 'LDL', 'VLDL', 'HbA1C', 'FBS', 'EAG',
'ALT', 'AST', 'ALP', 'GGT', 'LDH', 'CK', 'CK-MB',
'BUN', 'Scr', 'UA', 'eGFR', 'Cr',
'T3', 'T4', 'FT3', 'FT4', 'TSH', 'TgAb', 'TPOAb',
'IgG', 'IgA', 'IgM', 'IgE', 'C3', 'C4', 'RF', 'ANA', 'ASO',
'AFP', 'CEA', 'CA125', 'CA19-9', 'CA15-3', 'PSA', 'NSE', 'CYFRA21-1',
'PT', 'APTT', 'TT', 'INR', 'FIB', 'D-Dimer',
'Na', 'K', 'Cl', 'Ca', 'Mg', 'P', 'Fe', 'Zn', 'Cu', 'Pb', 'Hg', 'Cd',
'FSH', 'LH', 'E2', 'PROG', 'PRL', 'DHEAS', 'COR', 'IGF-1', 'AMH',
'PTH', 'VitD', '25-OH-VD', 'OST', 'TPINP', 'β-CTX',
'HBsAg', 'HBsAb', 'HBeAg', 'HBeAb', 'HBcAb', 'HCV', 'TRUST', 'TPPA',
'pH', 'SG', 'PRO', 'GLU', 'KET', 'BIL', 'URO', 'NIT', 'LEU', 'ERY',
'Hb', 'Fer', 'Hcy', 'ApoB', 'Lp(a)', 'TCO2', 'AG',
}
def is_medical_abb(word):
"""判断是否是医学ABB缩写"""
# 去除标点
clean_word = re.sub(r'[,.:;!?()]', '', word).strip()
if not clean_word:
return False
# 精确匹配
if clean_word in MEDICAL_ABBS:
return True
# 带%的ABB
if clean_word.endswith('%') and clean_word[:-1] in MEDICAL_ABBS:
return True
# 全大写且长度2-6的可能是ABB
if clean_word.isupper() and 2 <= len(clean_word) <= 6:
return True
# 包含数字的缩写如CA19-9, HbA1C
if re.match(r'^[A-Za-z]+[\d-]+[A-Za-z]*\d*$', clean_word):
return True
return False
def clean_markdown_formatting(text: str) -> str:
"""清理文本中的Markdown格式标记
移除以下Markdown标记
- **text** 加粗
- *text* 斜体
- __text__ 加粗
- _text_ 斜体
- `text` 代码
"""
if not text:
return text
# 移除 **text** 加粗标记
text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
# 移除 __text__ 加粗标记
text = re.sub(r'__([^_]+)__', r'\1', text)
# 移除 *text* 斜体标记(注意不要误删单个*
text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'\1', text)
# 移除 _text_ 斜体标记注意不要误删单个_
text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text)
# 移除 `text` 代码标记
text = re.sub(r'`([^`]+)`', r'\1', text)
return text
def create_formatted_paragraph(text, is_title=False, is_chinese=False):
"""创建带格式的段落 - 使用案例文件中的样式(无缩进)
样式规则(来自案例文件 Be.U Wellness Center功能医学健康报告&定制化方案-案例.docx
- 中文段落:样式 '0' (0中文正文) - 楷体12pt段后间距黑色文字
- 英文段落:样式 '00' (0英语正文) - 华文楷体10.5pt,灰色文字(767171)
- 标题:加粗
"""
# 清理Markdown格式标记
text = clean_markdown_formatting(text)
p = OxmlElement('w:p')
pPr = OxmlElement('w:pPr')
# 设置段落样式
pStyle = OxmlElement('w:pStyle')
if is_chinese:
pStyle.set(qn('w:val'), '0') # 0中文正文样式
else:
pStyle.set(qn('w:val'), '00') # 0英语正文样式
pPr.append(pStyle)
# 显式清除所有缩进(覆盖样式中的默认缩进)
ind = OxmlElement('w:ind')
ind.set(qn('w:left'), '0')
ind.set(qn('w:right'), '0')
ind.set(qn('w:firstLine'), '0')
pPr.append(ind)
# 如果是标题,在段落属性中添加加粗
if is_title:
rPr_para = OxmlElement('w:rPr')
b = OxmlElement('w:b')
rPr_para.append(b)
bCs = OxmlElement('w:bCs')
rPr_para.append(bCs)
pPr.append(rPr_para)
p.append(pPr)
# 创建文本run
r = OxmlElement('w:r')
rPr = OxmlElement('w:rPr')
# 如果是标题,添加加粗
if is_title:
b = OxmlElement('w:b')
rPr.append(b)
bCs = OxmlElement('w:bCs')
rPr.append(bCs)
# 设置字体颜色 - 英文段落使用灰色(767171),中文段落使用黑色(000000)
color = OxmlElement('w:color')
if is_chinese:
color.set(qn('w:val'), '000000') # 黑色
else:
color.set(qn('w:val'), '767171') # 灰色与模板0英语正文样式一致
color.set(qn('w:themeColor'), 'background2')
color.set(qn('w:themeShade'), '80')
rPr.append(color)
r.append(rPr)
t = OxmlElement('w:t')
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
t.text = text
r.append(t)
p.append(r)
return p
def create_empty_paragraph():
"""创建空段落(用于区域标题前后的间距,无缩进)"""
p = OxmlElement('w:p')
pPr = OxmlElement('w:pPr')
# 使用英文正文样式保持一致
pStyle = OxmlElement('w:pStyle')
pStyle.set(qn('w:val'), '00')
pPr.append(pStyle)
# 显式清除所有缩进
ind = OxmlElement('w:ind')
ind.set(qn('w:left'), '0')
ind.set(qn('w:right'), '0')
ind.set(qn('w:firstLine'), '0')
pPr.append(ind)
p.append(pPr)
return p
def create_section_title_two_lines(title_en: str, title_cn: str):
"""
创建两行的模块标题(用于 Overall Health Assessment
参考模板格式:
- 第一行:英文标题,样式 '0英语正文',加粗
- 第二行:中文标题,样式 '0中文正文',加粗
返回两个段落元素的列表
"""
# 英文标题段落 - 使用 0英语正文 样式,加粗
p_en = create_formatted_paragraph(title_en, is_title=True, is_chinese=False)
# 中文标题段落 - 使用 0中文正文 样式,加粗
p_cn = create_formatted_paragraph(title_cn, is_title=True, is_chinese=True)
return [p_en, p_cn]
def create_section_title_one_line(title_en: str, title_cn: str):
"""
创建单行的模块标题(用于 Functional Medical Health Advice
参考模板格式:
- 单行:英文 + 空格 + 中文,样式 '4二级-标题'样式ID: 4-
返回一个段落元素
"""
combined_title = f"{title_en} {title_cn}"
# 使用 4二级-标题 样式样式ID: 4-
p = OxmlElement('w:p')
pPr = OxmlElement('w:pPr')
# 设置段落样式为 4二级-标题样式ID是 '4-' 不是 '4'
pStyle = OxmlElement('w:pStyle')
pStyle.set(qn('w:val'), '4-') # 4二级-标题样式ID为 '4-'
pPr.append(pStyle)
# 显式清除所有缩进
ind = OxmlElement('w:ind')
ind.set(qn('w:left'), '0')
ind.set(qn('w:right'), '0')
ind.set(qn('w:firstLine'), '0')
pPr.append(ind)
p.append(pPr)
# 创建文本run
r = OxmlElement('w:r')
t = OxmlElement('w:t')
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
t.text = combined_title
r.append(t)
p.append(r)
return p
def is_qualitative_result_normal(result: str, reference: str) -> bool:
"""判断定性结果是否正常"""
if not result or not reference:
return False
result_lower = result.lower().strip().replace('-', '').replace(' ', '')
reference_lower = reference.lower().strip().replace('-', '').replace(' ', '')
if result_lower == reference_lower:
return True
equivalents = {
'nonreactive': ['nonreactive', 'non reactive'],
'negative': ['negative', 'neg'],
'positive': ['positive', 'pos'],
}
for key, variants in equivalents.items():
r_match = any(v.replace('-', '').replace(' ', '') == result_lower for v in variants)
ref_match = any(v.replace('-', '').replace(' ', '') == reference_lower for v in variants)
if r_match and ref_match:
return True
return False
def collect_all_abnormal_items(matched_data: dict, api_key: str = None) -> list:
"""收集所有异常项"""
# 加载配置获取中文项目名称
from config import load_abb_config, normalize_abb
abb_config = load_abb_config()
abb_to_info = abb_config.get('abb_to_info', {})
abnormal_items = []
for key, data in matched_data.items():
point = data.get('point', '').strip()
result = data.get('result', '').strip()
reference = data.get('reference', '').strip()
if point in ['', '', 'H', 'L', '', '']:
if is_qualitative_result_normal(result, reference):
continue
# 优先使用data中的abb字段其次使用字典key
abb = data.get('abb', key)
# 获取中文项目名称
normalized_abb = normalize_abb(abb, abb_config)
info = abb_to_info.get(normalized_abb, {})
if not info:
info = abb_to_info.get(abb, {})
if not info:
info = abb_to_info.get(normalized_abb.upper(), {})
if not info:
info = abb_to_info.get(abb.upper(), {})
# 优先使用配置文件中的中文名称
name = info.get('project_cn') or data.get('project_cn')
# 如果没有中文名称调用DeepSeek翻译
if not name and api_key:
from extract_and_fill_report import translate_project_name_to_chinese
english_name = info.get('project') or data.get('project', abb)
name = translate_project_name_to_chinese(abb, english_name, api_key)
elif not name:
name = info.get('project') or data.get('project', abb)
abnormal_items.append({
'abb': abb,
'name': name,
'result': result,
'point': point,
'reference': reference,
'unit': data.get('unit', ''),
'module': data.get('module', '')
})
return abnormal_items
def clean_reference_range(reference: str) -> str:
"""
清理参考范围格式:
1. 去掉括号
2. 将 <X 转换为 0-X
"""
import re
if not reference:
return reference
ref = reference.strip()
# 去掉各种括号
if ref.startswith('(') and ref.endswith(')'):
ref = ref[1:-1]
elif ref.startswith('') and ref.endswith(''):
ref = ref[1:-1]
elif ref.startswith('[') and ref.endswith(']'):
ref = ref[1:-1]
# 处理只有括号开头的情况
if ref.startswith('('):
ref = ref[1:]
if ref.endswith(')'):
ref = ref[:-1]
if ref.startswith(''):
ref = ref[1:]
if ref.endswith(''):
ref = ref[:-1]
ref = ref.strip()
# 将 <X 或 ≤X 转换为 0-X 格式
match = re.match(r'^[<≤]\s*([\d\.]+)\s*$', ref)
if match:
upper_value = match.group(1)
ref = f"0-{upper_value}"
match = re.match(r'^<=\s*([\d\.]+)\s*$', ref)
if match:
upper_value = match.group(1)
ref = f"0-{upper_value}"
return ref.strip()
def create_abnormal_item_table_xml(item: dict, idx: int, include_header: bool = False, clinical_en: str = None, clinical_cn: str = None):
"""
使用XML创建单个异常项表格与模板样式一致
格式(无表头时):
Row 0: ABB | Name | Result | Point | Refer | Unit - 数据行
Row 1: Clinical Significance (Merged) - 解释行
格式(有表头时):
Row 0: Header - Abb简称 | Project项目 | Result结果 | Point指示 | Refer参考 | Unit单位
Row 1: ABB | Name | Result | Point | Refer | Unit - 数据行
Row 2: Clinical Significance (Merged) - 解释行
Args:
item: 异常项数据
idx: 索引
include_header: 是否包含表头
clinical_en: 英文临床意义(可选,如果不提供则使用默认文本)
clinical_cn: 中文临床意义(可选,如果不提供则使用默认文本)
"""
# 创建表格元素
tbl = OxmlElement('w:tbl')
# 表格属性
tblPr = OxmlElement('w:tblPr')
tblW = OxmlElement('w:tblW')
tblW.set(qn('w:w'), '9638') # 总宽度约16cm
tblW.set(qn('w:type'), 'dxa')
tblPr.append(tblW)
# 表格居中
tblJc = OxmlElement('w:jc')
tblJc.set(qn('w:val'), 'center')
tblPr.append(tblJc)
# 表格边框 - 全部使用虚线灰色,顶部实线黑色
tblBorders = OxmlElement('w:tblBorders')
for border_name in ['top', 'left', 'bottom', 'right', 'insideH', 'insideV']:
border = OxmlElement(f'w:{border_name}')
if border_name == 'top':
border.set(qn('w:val'), 'single')
border.set(qn('w:sz'), '4')
border.set(qn('w:color'), '000000')
else:
border.set(qn('w:val'), 'dashed')
border.set(qn('w:sz'), '4')
border.set(qn('w:color'), 'AAAAAA')
tblBorders.append(border)
tblPr.append(tblBorders)
tbl.append(tblPr)
# 表格网格 - 调整列宽使其与表头对齐
# Abb(1417) | Project(2268) | Result(1417) | Point(1417) | Refer(1701) | Unit(1418) = 9638
tblGrid = OxmlElement('w:tblGrid')
col_widths = [1417, 2268, 1417, 1417, 1701, 1418] # 6列宽度
for w in col_widths:
gridCol = OxmlElement('w:gridCol')
gridCol.set(qn('w:w'), str(w))
tblGrid.append(gridCol)
tbl.append(tblGrid)
def create_cell_with_lines(lines, bold=False, merge_cols=0, col_width=None):
"""创建支持多行文本的单元格"""
tc = OxmlElement('w:tc')
tcPr = OxmlElement('w:tcPr')
# 设置单元格宽度
if col_width:
tcW = OxmlElement('w:tcW')
tcW.set(qn('w:w'), str(col_width))
tcW.set(qn('w:type'), 'dxa')
tcPr.append(tcW)
if merge_cols > 0:
gridSpan = OxmlElement('w:gridSpan')
gridSpan.set(qn('w:val'), str(merge_cols))
tcPr.append(gridSpan)
# 垂直居中
vAlign = OxmlElement('w:vAlign')
vAlign.set(qn('w:val'), 'center')
tcPr.append(vAlign)
tc.append(tcPr)
p = OxmlElement('w:p')
# 段落居中
pPr = OxmlElement('w:pPr')
jc = OxmlElement('w:jc')
jc.set(qn('w:val'), 'center')
pPr.append(jc)
p.append(pPr)
if isinstance(lines, list):
for i, line in enumerate(lines):
r = OxmlElement('w:r')
rPr = OxmlElement('w:rPr')
if bold:
b = OxmlElement('w:b')
rPr.append(b)
# 设置字体
rFonts = OxmlElement('w:rFonts')
rFonts.set(qn('w:ascii'), 'Times New Roman')
rFonts.set(qn('w:eastAsia'), '宋体')
rPr.append(rFonts)
sz = OxmlElement('w:sz')
sz.set(qn('w:val'), '21') # 10.5pt
rPr.append(sz)
r.append(rPr)
t = OxmlElement('w:t')
t.text = str(line)
r.append(t)
p.append(r)
# 添加换行(除了最后一行)
if i < len(lines) - 1:
r_br = OxmlElement('w:r')
br = OxmlElement('w:br')
r_br.append(br)
p.append(r_br)
else:
r = OxmlElement('w:r')
rPr = OxmlElement('w:rPr')
if bold:
b = OxmlElement('w:b')
rPr.append(b)
# 设置字体
rFonts = OxmlElement('w:rFonts')
rFonts.set(qn('w:ascii'), 'Times New Roman')
rFonts.set(qn('w:eastAsia'), '宋体')
rPr.append(rFonts)
sz = OxmlElement('w:sz')
sz.set(qn('w:val'), '21') # 10.5pt
rPr.append(sz)
r.append(rPr)
t = OxmlElement('w:t')
t.text = str(lines) if lines else ''
r.append(t)
p.append(r)
tc.append(p)
return tc
# 列宽数组
col_widths = [1417, 2268, 1417, 1417, 1701, 1418]
def create_row(cells_data):
"""创建行"""
tr = OxmlElement('w:tr')
for i, cell_data in enumerate(cells_data):
col_w = col_widths[i] if i < len(col_widths) else None
if isinstance(cell_data, tuple):
text, bold, merge = cell_data
tr.append(create_cell_with_lines(text, bold, merge, col_w))
else:
tr.append(create_cell_with_lines(cell_data, col_width=col_w))
return tr
# Row 0: 表头(可选)- 使用列表实现多行
if include_header:
header_row = create_row([
(['Abb', '简称'], True, 0),
(['Project', '项目'], True, 0),
(['Result', '结果'], True, 0),
(['Point', '提示'], True, 0),
(['Refer', '参考'], True, 0),
(['Unit', '单位'], True, 0),
])
tbl.append(header_row)
# 数据行
status = '' if item['point'] in ['', 'H', ''] else ''
# 清理参考范围格式
reference = clean_reference_range(item['reference'])
data_row = create_row([
(item['abb'], True, 0),
(item['name'], True, 0),
(item['result'], False, 0),
(status, False, 0),
(reference, False, 0),
(item['unit'], False, 0),
])
tbl.append(data_row)
# 异常指标汇总表格不需要临床意义行,只显示项目名和数据
return tbl
def fill_abnormal_index_summary(doc, abnormal_items: list, item_explanations: dict = None):
"""
填充异常指标汇总表格
"Abnormal Index异常指标汇总" 标题后插入异常项表格
使用与模板相同的表格样式
Args:
doc: Word文档对象
abnormal_items: 异常项列表
item_explanations: 项目临床意义解释字典 {ABB: {clinical_en: ..., clinical_cn: ...}}
步骤:
1. 找到 Abnormal Index 标题位置
2. 删除标题和 Overall Health Assessment 之间的所有表格和段落(占位符)
3. 插入新的异常项表格
"""
if not abnormal_items:
print(" 没有异常项目,跳过异常指标汇总")
return
if item_explanations is None:
item_explanations = {}
body = doc.element.body
children = list(body)
# 查找 "Abnormal Index" 标题位置
abnormal_index_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'abnormal index' in text or '异常指标汇总' in text:
abnormal_index_pos = i
print(f" 找到异常指标汇总标题位置: {i}")
break
if abnormal_index_pos < 0:
print(" 未找到Abnormal Index异常指标汇总位置")
return
# 查找 "Overall Health Assessment" 位置作为结束边界
overall_health_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'overall health' in text and 'assessment' in text:
overall_health_pos = i
break
if overall_health_pos < 0:
# 如果找不到,使用一个默认范围
overall_health_pos = abnormal_index_pos + 50
# 删除 Abnormal Index 和 Overall Health Assessment 之间的所有表格和段落
# 注意:必须从后往前删除,避免索引变化问题
children = list(body) # 重新获取
elements_to_remove = []
for i in range(abnormal_index_pos + 1, min(overall_health_pos, len(children))):
elem = children[i]
# 检查是否是表格元素或段落元素(删除所有内容)
if elem.tag.endswith('}tbl') or elem.tag.endswith('}p'):
elements_to_remove.append(elem)
# 从后往前删除
for elem in reversed(elements_to_remove):
try:
body.remove(elem)
except:
pass
if elements_to_remove:
print(f" 已删除 {len(elements_to_remove)} 个占位符表格")
# 重新获取children和位置
children = list(body)
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'abnormal index' in text or '异常指标汇总' in text:
abnormal_index_pos = i
break
# 在标题后插入异常项表格
insert_pos = abnormal_index_pos + 1
# 为每个异常项创建表格
for idx, item in enumerate(abnormal_items):
# 第一个表格包含表头
include_header = (idx == 0)
# 获取该项目的临床意义解释
abb = item.get('abb', '').upper()
explanation = item_explanations.get(abb, {})
clinical_en = explanation.get('clinical_en', '')
clinical_cn = explanation.get('clinical_cn', '')
tbl = create_abnormal_item_table_xml(item, idx, include_header, clinical_en, clinical_cn)
body.insert(insert_pos, tbl)
insert_pos += 1
# 添加空段落分隔
p = OxmlElement('w:p')
body.insert(insert_pos, p)
insert_pos += 1
# 在异常指标汇总表格后添加分页符(与案例文件格式一致)
# 异常指标汇总应该单独占一页,和 Overall Health Assessment 分开
page_break_p = OxmlElement('w:p')
page_break_r = OxmlElement('w:r')
page_break_br = OxmlElement('w:br')
page_break_br.set(qn('w:type'), 'page')
page_break_r.append(page_break_br)
page_break_p.append(page_break_r)
body.insert(insert_pos, page_break_p)
print(f" 已插入异常指标汇总表格,共 {len(abnormal_items)} 个异常项")
def generate_health_assessment_content(abnormal_items: list, api_key: str, call_deepseek_api) -> dict:
"""调用DeepSeek生成整体健康状况内容"""
if not api_key or not abnormal_items:
return {}
# 按模块分组异常项
module_items = {}
for item in abnormal_items:
module = item.get('module', '其他')
if module not in module_items:
module_items[module] = []
direction = '偏高/High' if item['point'] in ['', 'H', ''] else '偏低/Low'
ref_info = f",参考范围: {item['reference']}" if item.get('reference') else ""
module_items[module].append(f" - {item['name']} ({item['abb']}): {item['result']} {item['unit']} ({direction}){ref_info}")
# 构建详细的异常指标描述
abnormal_desc = []
for module, items in module_items.items():
abnormal_desc.append(f"{module}")
abnormal_desc.extend(items)
prompt = f"""# 角色设定
你是一位经验丰富的医疗专家,在大健康、抗衰老行业已经深耕多年,是全世界知名的功能医学专家、全科专家、荷尔蒙专家,在梅奥诊所等多家欧美私立医疗机构任职医疗院长。
# 任务
根据以下患者的血液检查报告异常指标,撰写"整体健康情况"分析报告。
# 异常指标汇总
{chr(10).join(abnormal_desc)}
# 写作要求
## 1. 整体结构
报告分为两部分:
- **第一部分**整体健康情况概述约150字中英文各150字
- **第二部分**:四个专项板块的详细分析
## 2. 整体健康情况概述第一个section
- 标题Overall Health Overview / 整体健康概述
- 内容:从功能医学角度,综合评估患者的整体健康状态
- 字数英文约150词中文约150字
- 要点:概括主要发现、整体健康水平、需要关注的重点领域
## 3. 四个专项板块
必须严格按照以下顺序:
- (I) Hematology and Inflammatory Status / (一)血液学与炎症状态
- (II) Hormonal and Endocrine Regulation / (二)荷尔蒙与内分泌调节
- (III) Immunology and Infection Risk / (三)免疫学与感染风险
- (IV) Nutrition and Metabolic Profile / (四)营养与代谢状况
## 4. 每个专项板块的内容结构(总分结构)
每个板块包含2-3个段落
- **第1段总述**该系统的整体健康状态评估约100字
- **第2-3段分述**:具体异常指标的详细分析,必须包含:
- 指标名称和具体数值
- 参考范围对比
- 临床意义解读
- 可能的原因分析
- 每段约100字
## 5. 写作风格
- 从功能医学与整体健康角度进行专业分析
- 不要遗漏任何一个异常指标
- 语言专业但易于理解
- 体现功能医学"未病先防"的理念
- 每段英文和中文内容要对应
## 6. 重要提示
- 如果某个板块没有相关异常指标,仍需撰写该板块,说明该系统指标正常
- 必须在分析中引用具体的检测数值和参考范围
# 输出格式JSON
```json
{{
"sections": [
{{
"title_en": "Overall Health Overview",
"title_cn": "整体健康概述",
"paragraphs": [
{{"en": "约150词的整体健康概述...", "cn": "约150字的整体健康概述..."}}
]
}},
{{
"title_en": "(I) Hematology and Inflammatory Status",
"title_cn": "(一)血液学与炎症状态",
"paragraphs": [
{{"en": "总述该系统整体状态约100词...", "cn": "总述该系统整体状态约100字..."}},
{{"en": "分述具体指标分析包含数值和参考范围约100词...", "cn": "分述具体指标分析包含数值和参考范围约100字..."}}
]
}},
{{
"title_en": "(II) Hormonal and Endocrine Regulation",
"title_cn": "(二)荷尔蒙与内分泌调节",
"paragraphs": [
{{"en": "总述...", "cn": "总述..."}},
{{"en": "分述...", "cn": "分述..."}}
]
}},
{{
"title_en": "(III) Immunology and Infection Risk",
"title_cn": "(三)免疫学与感染风险",
"paragraphs": [...]
}},
{{
"title_en": "(IV) Nutrition and Metabolic Profile",
"title_cn": "(四)营养与代谢状况",
"paragraphs": [...]
}}
]
}}
```
只返回JSON不要其他内容。"""
def parse_json_response(response_text):
"""解析JSON响应带容错处理"""
# 提取JSON部分
if '```json' in response_text:
response_text = response_text.split('```json')[1].split('```')[0]
elif '```' in response_text:
response_text = response_text.split('```')[1].split('```')[0]
response_text = response_text.strip()
# 尝试直接解析
try:
return json.loads(response_text)
except json.JSONDecodeError:
pass
# 尝试修复常见问题
# 1. 修复未闭合的字符串(在最后添加引号和括号)
if response_text.count('"') % 2 != 0:
response_text += '"'
# 2. 尝试补全JSON结构
open_braces = response_text.count('{') - response_text.count('}')
open_brackets = response_text.count('[') - response_text.count(']')
if open_brackets > 0:
# 检查是否需要先闭合对象
if open_braces > 0:
response_text += '}' * open_braces
response_text += ']' * open_brackets
elif open_braces > 0:
response_text += '}' * open_braces
try:
return json.loads(response_text)
except json.JSONDecodeError:
pass
# 3. 尝试提取部分有效的sections
import re
sections = []
section_pattern = r'\{\s*"title_en"\s*:\s*"([^"]+)"\s*,\s*"title_cn"\s*:\s*"([^"]+)"\s*,\s*"paragraphs"\s*:\s*\[\s*\{\s*"en"\s*:\s*"([^"]*(?:[^"\\]|\\.)*?)"\s*,\s*"cn"\s*:\s*"([^"]*(?:[^"\\]|\\.)*?)"\s*\}'
matches = re.findall(section_pattern, response_text, re.DOTALL)
for match in matches:
sections.append({
"title_en": match[0],
"title_cn": match[1],
"paragraphs": [{"en": match[2], "cn": match[3]}]
})
if sections:
return {"sections": sections}
return None
# 最多重试3次
for attempt in range(3):
try:
# 使用更大的max_tokens和更长的超时时间生成长文本
response = call_deepseek_api(prompt, api_key, max_tokens=4000, timeout=180)
# 检查API是否返回None超时或错误
if response is None:
if attempt < 2:
print(f" ⚠️ 第{attempt+1}次API请求失败超时或错误重试中...")
import time
time.sleep(3) # 等待3秒后重试
continue
result = parse_json_response(response)
if result and result.get('sections'):
print(f" ✓ 成功生成 {len(result['sections'])} 个健康评估板块")
return result
if attempt < 2:
print(f" ⚠️ 第{attempt+1}次生成格式不完整,重试中...")
except Exception as e:
if attempt < 2:
print(f" ⚠️ 第{attempt+1}次生成失败: {e},重试中...")
print(f" 生成健康评估失败: 多次尝试后仍无法获取有效响应")
return {}
def generate_functional_health_advice(abnormal_items: list, api_key: str, call_deepseek_api) -> dict:
"""调用DeepSeek生成功能医学健康建议内容新版5模块结构化格式"""
if not api_key or not abnormal_items:
return {}
# 按模块分组异常项
module_items = {}
for item in abnormal_items:
module = item.get('module', '其他')
if module not in module_items:
module_items[module] = []
direction = '偏高/High' if item['point'] in ['', 'H', ''] else '偏低/Low'
ref_info = f",参考范围: {item['reference']}" if item.get('reference') else ""
module_items[module].append(f" - {item['name']} ({item['abb']}): {item['result']} {item['unit']} ({direction}){ref_info}")
# 构建详细的异常指标描述
abnormal_desc = []
for module, items in module_items.items():
abnormal_desc.append(f"{module}")
abnormal_desc.extend(items)
prompt = f"""# 角色设定
你是Be.U Med功能医学团队的资深健康管理顾问在功能医学、营养医学、运动医学、睡眠医学及生活方式干预领域具有丰富的临床经验。
# 任务
根据以下患者的血液检查报告异常指标,撰写"功能医学健康建议"方案。该方案位于「医学干预」建议方案之后,侧重于日常可执行的健康管理策略。
# 异常指标汇总
{chr(10).join(abnormal_desc)}
# 核心原则
- 全篇禁止出现任何检验指标的具体数值、参考区间、单位、百分号或数字0-9
- 可以提及指标名称(如"黄体酮偏低""ESR升高"),但不要写具体数值
- 每个段落必须先写英文,再写对应的中文,不要混排
- 严禁使用不确定表述may, might, could, 可能, 也许, 似乎等)
- 使用肯定表述:建议、支持、助力、优化、需要、应当
# 五个模块(必须严格按顺序)
1. Nutrition Intervention 营养干预
2. Exercise Intervention 运动干预
3. Sleep & Stress Management 睡眠与压力管理
4. Lifestyle Adjustment 生活方式调整
5. Long-term Follow-up Plan 长期随访计划
# 每个模块的内容结构
- overview: 领域概述英文约100词中文约120字—— 该领域在功能医学中的重要性
- analysis: 检测关联分析英文约80词中文约100字—— 结合异常指标说明干预必要性(不写数值)
- recommendations: 3-5条具体建议每条包含英文30-50词和中文50-80字要具体可执行
- summary: 总结意义英文约80词中文约100字—— 该干预的整体价值和协同作用
# 输出格式JSON
```json
{{
"sections": [
{{
"title_en": "Nutrition Intervention",
"title_cn": "营养干预",
"overview": {{
"en": "英文领域概述约100词...",
"cn": "中文领域概述约120字..."
}},
"analysis": {{
"en": "英文检测关联分析约80词不写数值...",
"cn": "中文检测关联分析约100字..."
}},
"recommendations": [
{{"en": "Supplementation of B vitamins, folate, and iron to support hematopoiesis and cellular energy production.", "cn": "补充维生素B族、叶酸和铁以支持造血功能和细胞能量产生"}},
{{"en": "Adequate protein and healthy fats with cofactors to support hormonal balance.", "cn": "摄入足够的优质蛋白和健康脂肪(并配合锌、硒、镁等辅因子),以维持荷尔蒙平衡;"}},
{{"en": "Anti-inflammatory nutrients (omega-3, vitamins C/E, polyphenols) to reduce inflammation.", "cn": "用抗炎营养素(如ω-3、维生素C/E、多酚类降低炎症保护肠道健康。"}}
],
"summary": {{
"en": "英文总结意义约80词...",
"cn": "中文总结意义约100字..."
}}
}},
{{
"title_en": "Exercise Intervention",
"title_cn": "运动干预",
"overview": {{"en": "...", "cn": "..."}},
"analysis": {{"en": "...", "cn": "..."}},
"recommendations": [...],
"summary": {{"en": "...", "cn": "..."}}
}},
{{
"title_en": "Sleep & Stress Management",
"title_cn": "睡眠与压力管理",
"overview": {{"en": "...", "cn": "..."}},
"analysis": {{"en": "...", "cn": "..."}},
"recommendations": [...],
"summary": {{"en": "...", "cn": "..."}}
}},
{{
"title_en": "Lifestyle Adjustment",
"title_cn": "生活方式调整",
"overview": {{"en": "...", "cn": "..."}},
"analysis": {{"en": "...", "cn": "..."}},
"recommendations": [...],
"summary": {{"en": "...", "cn": "..."}}
}},
{{
"title_en": "Long-term Follow-up Plan",
"title_cn": "长期随访计划",
"overview": {{"en": "...", "cn": "..."}},
"analysis": {{"en": "...", "cn": "..."}},
"recommendations": [...],
"summary": {{"en": "...", "cn": "..."}}
}}
]
}}
```
只返回JSON不要其他内容。请确保每个板块都有完整的内容结构。"""
def parse_json_response(response_text):
"""解析JSON响应带容错处理"""
# 提取JSON部分
if '```json' in response_text:
response_text = response_text.split('```json')[1].split('```')[0]
elif '```' in response_text:
response_text = response_text.split('```')[1].split('```')[0]
response_text = response_text.strip()
# 尝试直接解析
try:
return json.loads(response_text)
except json.JSONDecodeError:
pass
# 尝试修复常见问题
if response_text.count('"') % 2 != 0:
response_text += '"'
open_braces = response_text.count('{') - response_text.count('}')
open_brackets = response_text.count('[') - response_text.count(']')
if open_brackets > 0:
if open_braces > 0:
response_text += '}' * open_braces
response_text += ']' * open_brackets
elif open_braces > 0:
response_text += '}' * open_braces
try:
return json.loads(response_text)
except json.JSONDecodeError:
pass
# 尝试提取部分有效的sections
import re
sections = []
section_pattern = r'\{\s*"title_en"\s*:\s*"([^"]+)"\s*,\s*"title_cn"\s*:\s*"([^"]+)"\s*,\s*"paragraphs"\s*:\s*\[\s*\{\s*"en"\s*:\s*"([^"]*(?:[^"\\]|\\.)*?)"\s*,\s*"cn"\s*:\s*"([^"]*(?:[^"\\]|\\.)*?)"\s*\}'
matches = re.findall(section_pattern, response_text, re.DOTALL)
for match in matches:
sections.append({
"title_en": match[0],
"title_cn": match[1],
"paragraphs": [{"en": match[2], "cn": match[3]}]
})
if sections:
return {"sections": sections}
return None
# 最多重试3次
for attempt in range(3):
try:
# 使用更大的max_tokens和更长的超时时间生成长文本内容较多需要8000 tokens
response = call_deepseek_api(prompt, api_key, max_tokens=8000, timeout=300)
# 检查API是否返回None超时或错误
if response is None:
if attempt < 2:
print(f" ⚠️ 第{attempt+1}次API请求失败超时或错误重试中...")
import time
time.sleep(3) # 等待3秒后重试
continue
result = parse_json_response(response)
if result and result.get('sections'):
# 检查每个section的内容数量兼容新旧格式
total_items = sum(
len(s.get('recommendations', [])) or len(s.get('paragraphs', []))
for s in result['sections']
)
print(f" ✓ 成功生成 {len(result['sections'])} 个功能医学建议板块,共 {total_items} 个内容项")
return result
if attempt < 2:
print(f" ⚠️ 第{attempt+1}次生成格式不完整,重试中...")
except Exception as e:
if attempt < 2:
print(f" ⚠️ 第{attempt+1}次生成失败: {e},重试中...")
print(f" 生成功能医学健康建议失败: 多次尝试后仍无法获取有效响应")
return {}
def fill_health_assessment_section(doc, assessment_content: dict):
"""将健康评估内容填充到文档的Overall Health Assessment区域
策略:
1. 找到 "Overall Health Assessment" 标题位置
2. 删除标题之后、下一个主要区域之前的所有模板内容(不保护介绍段落)
3. 在标题之后直接插入DeepSeek生成的内容包含五个模块表头和内容
格式参考模板:
- 区域标题后1个空段落
- 每个模块标题前1个空段落
"""
sections = assessment_content.get('sections', [])
if not sections:
print(" 健康评估内容为空,跳过填充")
return
body = doc.element.body
children = list(body)
# 查找 "Overall Health Assessment" 位置
overall_start = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'overall health' in text and 'assessment' in text:
overall_start = i
print(f" 找到Overall Health Assessment位置: {i}")
break
if overall_start < 0:
print(" 未找到Overall Health Assessment位置")
return
# 找到下一个主要区域的位置(作为删除的结束边界)
# 可能是Medical Intervention、Functional Medical Health Advice
next_section_pos = len(children)
end_keywords = ['medical intervention', '医学干预',
'functional medical health advice', '功能医学健康建议']
for i in range(overall_start + 1, len(children)):
text = ''.join(children[i].itertext()).strip().lower()
if any(kw in text for kw in end_keywords):
next_section_pos = i
print(f" 找到下一区域位置: {i}")
break
# 删除标题之后、下一区域之前的所有模板内容包括分页符因为Medical Intervention前不需要分页符
children = list(body) # 重新获取
elements_to_remove = []
for i in range(overall_start + 1, min(next_section_pos, len(children))):
elem = children[i]
# 跳过 sectPr 元素
if elem.tag.endswith('}sectPr'):
continue
# 不再保留分页符Medical Intervention前不需要分页符
elements_to_remove.append(elem)
for elem in elements_to_remove:
try:
body.remove(elem)
except:
pass
if elements_to_remove:
print(f" 已删除 {len(elements_to_remove)} 个模板占位内容")
# 重新获取位置(因为删除了元素)
children = list(body)
insert_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'overall health' in text and 'assessment' in text:
insert_pos = i + 1
break
if insert_pos < 0:
print(" 无法确定插入位置")
return
# 插入新生成的内容(五个模块表头 + 内容全部由DeepSeek生成
for idx, section in enumerate(sections):
title_en = section.get('title_en', '')
title_cn = section.get('title_cn', '')
paragraphs = section.get('paragraphs', [])
# 只在第二个及之后的模块标题前插入空段落(第一个模块紧跟区域标题,不需要空段落)
if idx > 0:
empty_p = create_empty_paragraph()
body.insert(insert_pos, empty_p)
insert_pos += 1
# 插入小节标题(两行格式:英文一行,中文一行)- 参考模板 Overall Health Assessment 格式
title_paragraphs = create_section_title_two_lines(title_en, title_cn)
for title_p in title_paragraphs:
body.insert(insert_pos, title_p)
insert_pos += 1
# 插入段落内容全部由DeepSeek生成
for para in paragraphs:
# 英文段落
en_text = para.get('en', '')
if en_text:
p_en = create_formatted_paragraph(en_text, is_chinese=False)
body.insert(insert_pos, p_en)
insert_pos += 1
# 中文段落
cn_text = para.get('cn', '')
if cn_text:
p_cn = create_formatted_paragraph(cn_text, is_chinese=True)
body.insert(insert_pos, p_cn)
insert_pos += 1
print(f" 已插入 {len(sections)} 个健康评估小节")
def create_page_break_paragraph():
"""创建包含分页符的段落"""
p = OxmlElement('w:p')
r = OxmlElement('w:r')
br = OxmlElement('w:br')
br.set(qn('w:type'), 'page')
r.append(br)
p.append(r)
return p
def fill_functional_health_advice_section(doc, advice_content: dict):
"""将功能医学健康建议内容填充到文档
策略:
1. 找到 "Functional Medical Health Advice" 标题位置
2. 在标题前插入分页符
3. 如果找不到,在 "Medical Intervention" 区域结束后创建该区域
4. 删除标题之后、下一个主要区域之前的所有模板内容(不保护介绍段落)
5. 在标题之后直接插入DeepSeek生成的内容包含五个模块表头和内容
"""
sections = advice_content.get('sections', [])
if not sections:
print(" 功能医学健康建议内容为空,跳过填充")
return
body = doc.element.body
children = list(body)
# 查找 "Functional Medical Health Advice" 位置
advice_start = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
text_lower = text.lower()
if ('functional medical health advice' in text_lower or
'功能医学健康建议' in text or
('functional' in text_lower and 'medical' in text_lower and 'health' in text_lower and 'advice' in text_lower)):
advice_start = i
print(f" 找到Functional Medical Health Advice位置: {i}")
break
if advice_start < 0:
# 尝试更宽松的匹配
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
text_lower = text.lower()
if 'functional' in text_lower and 'advice' in text_lower:
advice_start = i
print(f" 找到功能医学建议位置(宽松匹配): {i}")
break
# 在FHA标题前确保有分页符
if advice_start >= 0:
already_has_break = False
if advice_start > 0:
prev_elem = children[advice_start - 1]
br_elem = prev_elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}br')
if br_elem is not None:
break_type = br_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type')
if break_type == 'page':
already_has_break = True
if not already_has_break:
page_break = create_page_break_paragraph()
body.insert(advice_start, page_break)
advice_start += 1 # 标题位置偏移
print(f" 已在FHA标题前插入分页符")
# 重新获取children
children = list(body)
# 重建 FHA 标题:单行格式 + 4二级-标题样式(与整体健康情况标题格式一致:华文楷体、四号、加粗)
if advice_start >= 0:
old_title = children[advice_start]
# 创建新的单行标题段落
new_title = OxmlElement('w:p')
new_pPr = OxmlElement('w:pPr')
new_pStyle = OxmlElement('w:pStyle')
new_pStyle.set(qn('w:val'), '4-')
new_pPr.append(new_pStyle)
new_title.append(new_pPr)
new_r = OxmlElement('w:r')
new_t = OxmlElement('w:t')
new_t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
new_t.text = 'Functional Medical Health Advice 功能医学健康建议'
new_r.append(new_t)
new_title.append(new_r)
# 替换模板原标题
old_title.addprevious(new_title)
body.remove(old_title)
children = list(body)
# 更新 advice_start 位置
for i, elem in enumerate(children):
if elem is new_title:
advice_start = i
break
print(f" 已重建FHA标题为单行格式 + 4二级-标题样式")
if advice_start < 0:
# 如果找不到,需要创建该区域
print(" 未找到Functional Medical Health Advice位置尝试创建...")
insert_after_pos = -1
# 首先找到 "Medical Intervention" 的位置
medical_intervention_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'medical intervention' in text or '医学干预' in text:
medical_intervention_pos = i
print(f" 找到Medical Intervention位置: {i}")
break
if medical_intervention_pos >= 0:
# 从 Medical Intervention 之后找到该区域的结束位置
end_keywords = ['功能医学检测档案', 'functional medical examination',
'客户功能医学检测档案', '尿液检测', 'urine detection', 'urine test']
for i in range(medical_intervention_pos + 1, len(children)):
text = ''.join(children[i].itertext()).strip().lower()
if any(kw in text for kw in end_keywords):
insert_after_pos = i
print(f" 找到Medical Intervention结束位置: {i}")
break
# 如果没找到 Medical Intervention尝试找 Overall Health Assessment 之后的位置
if insert_after_pos < 0:
overall_health_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'overall health' in text and 'assessment' in text:
overall_health_pos = i
break
if overall_health_pos >= 0:
end_keywords = ['功能医学检测档案', 'functional medical examination',
'客户功能医学检测档案', '尿液检测', 'urine detection', 'urine test']
for i in range(overall_health_pos + 1, len(children)):
text = ''.join(children[i].itertext()).strip().lower()
if any(kw in text for kw in end_keywords):
insert_after_pos = i
print(f" 找到插入位置Overall Health之后: {i}")
break
if insert_after_pos < 0:
print(" 无法确定插入位置,跳过功能医学健康建议")
return
# 在FHA区域前插入分页符
page_break = create_page_break_paragraph()
body.insert(insert_after_pos, page_break)
insert_after_pos += 1
print(f" 已在FHA区域前插入分页符")
# 创建标题(使用 4二级-标题 样式,与整体健康情况标题格式一致:华文楷体、四号、加粗)
print(f" 在位置 {insert_after_pos} 创建Functional Medical Health Advice区域")
title_text = "Functional Medical Health Advice 功能医学健康建议"
title_p = OxmlElement('w:p')
title_pPr = OxmlElement('w:pPr')
title_pStyle = OxmlElement('w:pStyle')
title_pStyle.set(qn('w:val'), '4-')
title_pPr.append(title_pStyle)
title_p.append(title_pPr)
title_r = OxmlElement('w:r')
title_t = OxmlElement('w:t')
title_t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
title_t.text = title_text
title_r.append(title_t)
title_p.append(title_r)
body.insert(insert_after_pos, title_p)
insert_pos = insert_after_pos + 1
# 标题后插入1个空段落参考模板格式
empty_p = create_empty_paragraph()
body.insert(insert_pos, empty_p)
insert_pos += 1
# 直接插入DeepSeek生成的内容五个模块表头 + 内容)
for idx, section in enumerate(sections):
title_en = section.get('title_en', '')
title_cn = section.get('title_cn', '')
# 在每个模块标题前插入1个空段落参考模板格式
empty_p = create_empty_paragraph()
body.insert(insert_pos, empty_p)
insert_pos += 1
# 插入小节标题(单行格式:英文 + 中文)- 参考模板 Functional Medical Health Advice 格式
section_title_p = create_section_title_one_line(title_en, title_cn)
body.insert(insert_pos, section_title_p)
insert_pos += 1
# 支持新结构化格式overview/analysis/recommendations/summary和旧格式paragraphs
if section.get('overview'):
# 新结构化格式
overview = section['overview']
if overview.get('en'):
body.insert(insert_pos, create_formatted_paragraph(overview['en'], is_chinese=False))
insert_pos += 1
if overview.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(overview['cn'], is_chinese=True))
insert_pos += 1
analysis = section.get('analysis', {})
if analysis.get('en'):
body.insert(insert_pos, create_formatted_paragraph(analysis['en'], is_chinese=False))
insert_pos += 1
if analysis.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(analysis['cn'], is_chinese=True))
insert_pos += 1
body.insert(insert_pos, create_formatted_paragraph("Recommended strategies include:", is_chinese=False))
insert_pos += 1
body.insert(insert_pos, create_formatted_paragraph("建议措施包括:", is_chinese=True))
insert_pos += 1
for rec in section.get('recommendations', []):
if rec.get('en'):
body.insert(insert_pos, create_formatted_paragraph(rec['en'], is_chinese=False))
insert_pos += 1
if rec.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(rec['cn'], is_chinese=True))
insert_pos += 1
summary = section.get('summary', {})
if summary.get('en'):
body.insert(insert_pos, create_formatted_paragraph(summary['en'], is_chinese=False))
insert_pos += 1
if summary.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(summary['cn'], is_chinese=True))
insert_pos += 1
else:
# 旧格式paragraphs数组
for para in section.get('paragraphs', []):
en_text = para.get('en', '')
if en_text:
p_en = create_formatted_paragraph(en_text, is_chinese=False)
body.insert(insert_pos, p_en)
insert_pos += 1
cn_text = para.get('cn', '')
if cn_text:
p_cn = create_formatted_paragraph(cn_text, is_chinese=True)
body.insert(insert_pos, p_cn)
insert_pos += 1
print(f" 已创建并插入 {len(sections)} 个功能医学健康建议小节")
return
# 找到下一个主要区域的位置(作为删除的结束边界)
children = list(body) # 重新获取
next_section_pos = len(children)
end_keywords = ['功能医学检测档案', 'functional medical examination file',
'尿液检测', 'urine detection',
'客户功能医学检测档案', 'client functional medical']
for i in range(advice_start + 1, len(children)):
text = ''.join(children[i].itertext()).strip().lower()
if any(kw in text for kw in end_keywords):
next_section_pos = i
print(f" 找到下一区域位置: {i}")
break
# 删除标题之后、下一区域之前的所有模板内容(不再保护介绍段落)
children = list(body) # 重新获取
elements_to_remove = []
for i in range(advice_start + 1, min(next_section_pos, len(children))):
elem = children[i]
# 跳过 sectPr 元素
if not elem.tag.endswith('}sectPr'):
elements_to_remove.append(elem)
for elem in elements_to_remove:
try:
body.remove(elem)
except:
pass
if elements_to_remove:
print(f" 已删除 {len(elements_to_remove)} 个模板占位内容")
# 重新获取位置(因为删除了元素)
children = list(body)
insert_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip().lower()
if 'functional medical health advice' in text or '功能医学健康建议' in text:
insert_pos = i + 1
break
if insert_pos < 0:
print(" 无法确定插入位置")
return
# 插入固定总述引导段落
intro_paragraphs = [
{
"en": "Functional medicine goes beyond the diagnosis and medical treatment of diseases, placing greater emphasis on comprehensive health management for each individual. Beyond the aforementioned \"medical intervention\", the core of functional medicine lies in helping individuals improve their lifestyle from the root, optimize bodily functions, and enhance overall health. Through a comprehensive assessment of metabolism, immunity, hormones, nutrition, emotions, and daily habits, a personalized health optimization pathway can be tailored for each client.",
"cn": "功能医学不仅仅停留在疾病的诊断与医学治疗,更强调对个体的全方位健康管理。在上述「医学干预」之外,功能医学的核心在于帮助人们从源头改善生活方式、优化身体功能与提升整体健康状态。通过对代谢、免疫、荷尔蒙、营养、情绪及生活习惯等多个维度的综合评估,可以为每一位客户量身定制个性化的健康提升路径。"
},
{
"en": "Based on your test results and individual health status, the Be.U Med Functional Medicine Team provides you with scientific and actionable recommendations in the areas of nutrition adjustment, exercise prescription, sleep and stress management, and lifestyle optimization, aiming to help you achieve long-term health, chronic disease prevention, and overall well-being.",
"cn": "基于您的检测结果与个人健康状况Be.U Med功能医学团队从营养调节、运动处方、睡眠与压力管理、生活方式优化等方面为您提出科学、可执行的健康建议旨在帮助您实现真正的长期健康、慢病预防与身心平衡。"
}
]
for intro_para in intro_paragraphs:
en_text = intro_para.get('en', '')
if en_text:
p_en = create_formatted_paragraph(en_text, is_chinese=False)
body.insert(insert_pos, p_en)
insert_pos += 1
cn_text = intro_para.get('cn', '')
if cn_text:
p_cn = create_formatted_paragraph(cn_text, is_chinese=True)
body.insert(insert_pos, p_cn)
insert_pos += 1
# 插入五个模块内容
for idx, section in enumerate(sections):
title_en = section.get('title_en', '')
title_cn = section.get('title_cn', '')
# 在每个模块标题前插入1个空段落
empty_p = create_empty_paragraph()
body.insert(insert_pos, empty_p)
insert_pos += 1
# 插入小节标题(单行格式:英文 + 中文)
title_p = create_section_title_one_line(title_en, title_cn)
body.insert(insert_pos, title_p)
insert_pos += 1
# 支持新结构化格式overview/analysis/recommendations/summary和旧格式paragraphs
if section.get('overview'):
# 新结构化格式
# 领域概述
overview = section['overview']
if overview.get('en'):
body.insert(insert_pos, create_formatted_paragraph(overview['en'], is_chinese=False))
insert_pos += 1
if overview.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(overview['cn'], is_chinese=True))
insert_pos += 1
# 检测关联分析
analysis = section.get('analysis', {})
if analysis.get('en'):
body.insert(insert_pos, create_formatted_paragraph(analysis['en'], is_chinese=False))
insert_pos += 1
if analysis.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(analysis['cn'], is_chinese=True))
insert_pos += 1
# 建议引导语
body.insert(insert_pos, create_formatted_paragraph("Recommended strategies include:", is_chinese=False))
insert_pos += 1
body.insert(insert_pos, create_formatted_paragraph("建议措施包括:", is_chinese=True))
insert_pos += 1
# 具体建议
for rec in section.get('recommendations', []):
if rec.get('en'):
body.insert(insert_pos, create_formatted_paragraph(rec['en'], is_chinese=False))
insert_pos += 1
if rec.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(rec['cn'], is_chinese=True))
insert_pos += 1
# 总结意义
summary = section.get('summary', {})
if summary.get('en'):
body.insert(insert_pos, create_formatted_paragraph(summary['en'], is_chinese=False))
insert_pos += 1
if summary.get('cn'):
body.insert(insert_pos, create_formatted_paragraph(summary['cn'], is_chinese=True))
insert_pos += 1
else:
# 旧格式paragraphs数组
for para in section.get('paragraphs', []):
en_text = para.get('en', '')
if en_text:
body.insert(insert_pos, create_formatted_paragraph(en_text, is_chinese=False))
insert_pos += 1
cn_text = para.get('cn', '')
if cn_text:
body.insert(insert_pos, create_formatted_paragraph(cn_text, is_chinese=True))
insert_pos += 1
print(f" 已插入 {len(sections)} 个功能医学健康建议小节")
# 验证并确保"Functional Medical Health Advice"前有分页符
children = list(body)
advice_pos = -1
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
if 'Functional Medical Health Advice' in text or '功能医学健康建议' in text:
advice_pos = i
break
# 不再验证和插入Functional Medical Health Advice前的分页符
# 医学干预建议方案前不需要分页符
# 在"客户功能医学检测档案"前插入分页符(如果还没有的话)
children = list(body)
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
if '功能医学检测档案' in text or 'Functional Medical Examination File' in text:
# 检查前一个元素是否已经是分页符
already_has_break = False
if i > 0:
prev_elem = children[i - 1]
br_elem = prev_elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}br')
if br_elem is not None:
break_type = br_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type')
if break_type == 'page':
already_has_break = True
if not already_has_break:
# 在该元素前插入分页符
page_break = create_page_break_paragraph()
body.insert(i, page_break)
print(f" 已在'客户功能医学检测档案'前插入分页符")
else:
print(f" '客户功能医学检测档案'前已有分页符,跳过插入")
break
def generate_item_explanations(abnormal_items: list, api_key: str, call_deepseek_api) -> dict:
"""
为异常项获取临床意义解释
优先使用模板中的解释只有模板中没有的才调用DeepSeek生成
Args:
abnormal_items: 异常项列表
api_key: DeepSeek API Key
call_deepseek_api: API调用函数
Returns:
{ABB: {clinical_en: ..., clinical_cn: ...}, ...}
"""
from pathlib import Path
result = {}
items_need_generation = []
# 1. 首先尝试从模板解释文件获取
template_explanations_file = Path(__file__).parent / "template_explanations.json"
template_explanations = {}
if template_explanations_file.exists():
try:
with open(template_explanations_file, 'r', encoding='utf-8') as f:
template_explanations = json.load(f)
print(f" ✓ 已加载 {len(template_explanations)} 个模板解释")
except Exception as e:
print(f" ⚠️ 加载模板解释失败: {e}")
# 2. 检查每个异常项是否有模板解释
for item in abnormal_items:
abb = item.get('abb', '').upper().strip()
if not abb:
continue
# 尝试多种匹配方式
found = False
# 直接匹配
if abb in template_explanations:
exp = template_explanations[abb]
if exp.get('clinical_en') and exp.get('clinical_cn'):
result[abb] = exp
found = True
# 去除特殊字符后匹配
if not found:
abb_clean = ''.join(c for c in abb if c.isalnum())
for key, value in template_explanations.items():
key_clean = ''.join(c for c in key.upper() if c.isalnum())
if abb_clean == key_clean:
if value.get('clinical_en') and value.get('clinical_cn'):
result[abb] = value
found = True
break
if not found:
items_need_generation.append(item)
template_count = len(result)
print(f" ✓ 模板解释: {template_count} 个项目")
# 3. 如果有需要生成的项目调用DeepSeek
if items_need_generation and api_key:
print(f" ⏳ 需要DeepSeek生成: {len(items_need_generation)} 个项目")
# 构建项目描述
items_desc = []
for item in items_need_generation:
direction = '偏高' if item['point'] in ['', 'H', ''] else '偏低'
desc = f"- {item['abb']}: {item['name']}, 结果: {item['result']}"
if item.get('unit'):
desc += f" {item['unit']}"
desc += f" ({direction})"
if item.get('reference'):
desc += f", 参考范围: {item['reference']}"
items_desc.append(desc)
prompt = f"""你是一位医学检验专家,请为以下异常检测项目生成临床意义解释。
## 异常项目:
{chr(10).join(items_desc)}
## 要求:
1. 为每个项目提供英文和中文的临床意义解释
2. 解释应包含:该指标的作用、异常时可能的原因和健康影响
3. 语言专业但易于理解
4. 每个解释约30-60字
## 输出格式JSON
```json
{{
"ABB1": {{
"clinical_en": "English clinical significance explanation...",
"clinical_cn": "中文临床意义解释..."
}},
"ABB2": {{
"clinical_en": "...",
"clinical_cn": "..."
}}
}}
```
只返回JSON不要其他说明。使用项目的ABB缩写作为key。"""
try:
response = call_deepseek_api(prompt, api_key)
# 解析 JSON
if '```json' in response:
response = response.split('```json')[1].split('```')[0]
elif '```' in response:
response = response.split('```')[1].split('```')[0]
generated = json.loads(response.strip())
result.update(generated)
print(f" ✓ DeepSeek生成: {len(generated)} 个项目")
except Exception as e:
print(f" ✗ DeepSeek生成失败: {e}")
elif items_need_generation:
print(f" ⚠️ {len(items_need_generation)} 个项目无模板解释且无API Key")
print(f" 📊 解释来源统计: 模板 {template_count} 个, DeepSeek {len(result) - template_count}")
return result
def generate_and_fill_health_content(doc, matched_data: dict, api_key: str, call_deepseek_api):
"""根据异常项生成并填充健康评估和建议内容
V2版本
- 整体健康情况分析:功能医学整体观视角,包含优势指标和异常指标,四大系统分析
- 医学干预建议BHRT + IVNT + MSC 三大板块,紧扣前期问题给出解决方案
"""
if not api_key:
print(" 未提供DeepSeek API Key跳过健康内容生成")
return
print(" 正在收集异常项...")
abnormal_items = collect_all_abnormal_items(matched_data, api_key)
if not abnormal_items:
print(" 没有检测到异常项目,跳过内容生成")
return
print(f" 发现 {len(abnormal_items)} 个异常项目")
# 0. 先生成临床意义解释
print(" 正在调用DeepSeek生成临床意义解释...")
item_explanations = generate_item_explanations(abnormal_items, api_key, call_deepseek_api)
# 1. 使用V2版本生成整体健康情况分析
print(" 正在调用DeepSeek生成整体健康情况分析V2...")
from health_assessment_v2 import generate_health_assessment_v2, fill_health_assessment_v2
assessment_result = generate_health_assessment_v2(matched_data, api_key, call_deepseek_api)
# 2. 使用V2版本生成医学干预建议替代原有的功能医学健康建议
print(" 正在调用DeepSeek生成医学干预建议V2...")
from medical_intervention_v2 import generate_medical_intervention_v2, fill_medical_intervention_v2
# 收集所有检测项目用于性别检测
all_items = []
for module_name, module_data in matched_data.items():
if isinstance(module_data, dict) and 'items' in module_data:
all_items.extend(module_data['items'])
intervention_result = generate_medical_intervention_v2(abnormal_items, api_key, call_deepseek_api, all_items)
# 3. 生成功能医学健康建议
print(" 正在调用DeepSeek生成功能医学健康建议...")
advice_result = generate_functional_health_advice(abnormal_items, api_key, call_deepseek_api)
# 4. 按从后往前的顺序填充(避免位置偏移问题)
# 先填充功能医学健康建议(位置最后)
if advice_result and advice_result.get('sections'):
print(" 正在填充功能医学健康建议...")
fill_functional_health_advice_section(doc, advice_result)
# 再填充医学干预建议(位置靠后)- 使用V2版本
if intervention_result:
print(" 正在填充医学干预建议V2...")
fill_medical_intervention_v2(doc, intervention_result)
# 再填充整体健康评估(位置靠前)- 使用V2版本
if assessment_result:
print(" 正在填充整体健康情况分析V2...")
fill_health_assessment_v2(doc, assessment_result)
# 最后填充异常指标汇总(位置最前)
print(" 正在填充异常指标汇总...")
fill_abnormal_index_summary(doc, abnormal_items, item_explanations)
print(" 健康内容生成完成")