""" 整体健康情况分析模块 V2 按照功能医学整体观重新设计的报告分析生成器 核心原则: 1. 功能医学整体观视角,聚焦"系统功能平衡""机能趋势预判" 2. 专业、客观、严谨的语言风格 3. 只呈现"指标状态→功能提示→关注方向",不包含干预方案 """ import json import re from typing import Dict, List, Any, Tuple from docx.oxml import OxmlElement from docx.oxml.ns import qn # 系统分类映射:将模块映射到四大系统 SYSTEM_MAPPING = { # (I) 血液学与炎症状态 'Hematology': [ 'Complete Blood Count', # 血常规 'Blood Coagulation', # 凝血功能 'Inflammatory Reaction', # 炎症反应 ], # (II) 荷尔蒙与内分泌调节 'Endocrine': [ 'Thyroid Function', # 甲状腺功能 'Female Hormone', # 女性荷尔蒙 'Male Hormone', # 男性荷尔蒙 'Bone Metabolism', # 骨代谢 ], # (III) 免疫学与感染风险 'Immunology': [ 'Four Infectious Diseases', # 传染病四项 'Lymphocyte Subpopulation', # 淋巴细胞亚群 'Humoral Immunity', # 体液免疫 'Autoantibody', # 自身抗体 ], # (IV) 营养与代谢状况 'Metabolism': [ 'Blood Sugar', # 血糖 'Lipid Profile', # 血脂 'Liver Function', # 肝功能 'Kidney Function', # 肾功能 'Serum Electrolytes', # 血电解质 'Microelement', # 微量元素 'Urine Test', # 尿液检测 'Myocardial Enzyme', # 心肌酶谱 'Thromboembolism', # 心脑血管风险因子 'Tumor Markers', # 肿瘤标记物 ], } # 系统中英文名称 SYSTEM_NAMES = { 'Hematology': { 'en': '(I) Hematology and Inflammatory Status', 'cn': '(一)血液学与炎症状态' }, 'Endocrine': { 'en': '(II) Hormonal and Endocrine Regulation', 'cn': '(二)荷尔蒙与内分泌调节' }, 'Immunology': { 'en': '(III) Immunology and Infection Risk', 'cn': '(三)免疫学与感染风险' }, 'Metabolism': { 'en': '(IV) Nutrition and Metabolic Profile', 'cn': '(四)营养与代谢状况' }, } def get_system_for_module(module_name: str) -> str: """根据模块名称获取所属系统""" for system, modules in SYSTEM_MAPPING.items(): if module_name in modules: return system # 默认归入代谢系统 return 'Metabolism' def classify_items_by_system(matched_data: dict, config: dict = None) -> Dict[str, Dict[str, List]]: """ 将所有检测项目按四大系统分类 Returns: { 'Hematology': { 'normal': [...], # 正常指标 'abnormal': [...], # 异常指标 'borderline': [...] # 临界指标 }, ... } """ from config import load_abb_config, normalize_abb if config is None: config = load_abb_config() abb_to_info = config.get('abb_to_info', {}) result = { 'Hematology': {'normal': [], 'abnormal': [], 'borderline': []}, 'Endocrine': {'normal': [], 'abnormal': [], 'borderline': []}, 'Immunology': {'normal': [], 'abnormal': [], 'borderline': []}, 'Metabolism': {'normal': [], 'abnormal': [], 'borderline': []}, } for abb, data in matched_data.items(): point = data.get('point', '').strip() result_val = data.get('result', '').strip() reference = data.get('reference', '').strip() unit = data.get('unit', '').strip() # 获取模块信息 normalized_abb = normalize_abb(abb, config) info = abb_to_info.get(normalized_abb.upper(), {}) if not info: info = abb_to_info.get(abb.upper(), {}) module = info.get('module', data.get('module', '')) system = get_system_for_module(module) # 获取中文名称 name = info.get('project_cn') or data.get('project_cn') or info.get('project') or data.get('project', abb) item_info = { 'abb': abb, 'name': name, 'result': result_val, 'unit': unit, 'reference': reference, 'point': point, 'module': module, 'system': system } # 分类:正常、异常、临界 if point in ['↑', '↓', 'H', 'L', '高', '低']: # 判断是否是临界值(接近参考范围边界) is_borderline = _is_borderline_value(result_val, reference, point) if is_borderline: result[system]['borderline'].append(item_info) else: result[system]['abnormal'].append(item_info) else: # 正常指标 if result_val: # 只添加有结果的项目 result[system]['normal'].append(item_info) return result def _is_borderline_value(result: str, reference: str, point: str) -> bool: """ 判断是否是临界值(偏离参考范围不超过10%) """ try: result_num = float(re.sub(r'[^\d.]', '', result)) # 解析参考范围 ref_match = re.search(r'([\d.]+)\s*[-~]\s*([\d.]+)', reference) if ref_match: ref_low = float(ref_match.group(1)) ref_high = float(ref_match.group(2)) if point in ['↑', 'H', '高']: # 偏高:检查是否超出上限不超过10% if ref_high > 0: deviation = (result_num - ref_high) / ref_high return 0 < deviation <= 0.1 elif point in ['↓', 'L', '低']: # 偏低:检查是否低于下限不超过10% if ref_low > 0: deviation = (ref_low - result_num) / ref_low return 0 < deviation <= 0.1 except: pass return False def collect_all_items_for_assessment(matched_data: dict, api_key: str = None) -> Tuple[List, List, Dict]: """ 收集所有指标用于健康评估 Returns: (normal_items, abnormal_items, system_classified_data) """ from config import load_abb_config, normalize_abb config = load_abb_config() abb_to_info = config.get('abb_to_info', {}) normal_items = [] abnormal_items = [] for abb, data in matched_data.items(): point = data.get('point', '').strip() result_val = data.get('result', '').strip() reference = data.get('reference', '').strip() unit = data.get('unit', '').strip() if not result_val: continue # 获取项目信息 normalized_abb = normalize_abb(abb, config) info = abb_to_info.get(normalized_abb.upper(), {}) if not info: info = abb_to_info.get(abb.upper(), {}) module = info.get('module', data.get('module', '')) name = info.get('project_cn') or data.get('project_cn') or info.get('project') or data.get('project', abb) item_info = { 'abb': abb, 'name': name, 'result': result_val, 'unit': unit, 'reference': reference, 'point': point, 'module': module, 'system': get_system_for_module(module) } if point in ['↑', '↓', 'H', 'L', '高', '低']: abnormal_items.append(item_info) else: normal_items.append(item_info) # 按系统分类 system_data = classify_items_by_system(matched_data, config) return normal_items, abnormal_items, system_data def build_assessment_prompt(normal_items: List, abnormal_items: List, system_data: Dict) -> str: """ 构建整体健康情况分析的 Prompt(基于案例文档优化) """ # 构建正常指标描述 normal_desc = [] for item in normal_items[:30]: desc = f" - {item['name']} ({item['abb']}): {item['result']}" if item.get('unit'): desc += f" {item['unit']}" if item.get('reference'): desc += f" [参考: {item['reference']}]" normal_desc.append(desc) # 构建异常指标描述(按系统分组) abnormal_by_system = {} for item in abnormal_items: system = item.get('system', 'Metabolism') if system not in abnormal_by_system: abnormal_by_system[system] = [] direction = '偏高' if item['point'] in ['↑', 'H', '高'] else '偏低' is_borderline = _is_borderline_value(item['result'], item.get('reference', ''), item['point']) level = '临界' if is_borderline else '异常' desc = f" - {item['name']} ({item['abb']}): {item['result']}" if item.get('unit'): desc += f" {item['unit']}" desc += f" ({direction}, {level})" if item.get('reference'): desc += f" [参考: {item['reference']}]" abnormal_by_system[system].append(desc) # 构建系统分组的异常指标描述 system_abnormal_desc = [] for system_key, system_info in SYSTEM_NAMES.items(): items = abnormal_by_system.get(system_key, []) if items: system_abnormal_desc.append(f"\n【{system_info['cn']}】") system_abnormal_desc.extend(items) prompt = f"""# 角色设定 你是Be.U Med功能医学团队的资深医学顾问,在功能医学、整体健康、抗衰老医学领域具有丰富的临床经验。 # 任务 根据体检者的血液检查报告,撰写"整体健康情况分析"报告。 # 检测数据 ## 正常指标(部分) {chr(10).join(normal_desc) if normal_desc else ' 暂无数据'} ## 异常/临界指标(按系统分类) {chr(10).join(system_abnormal_desc) if system_abnormal_desc else ' 暂无异常指标'} # 核心原则(必须严格遵守) ## 1. 段落格式(极其重要!) - **每个段落必须先写英文,再写对应的中文** - **第一段:英文80-120词,中文80-120字** - **第二段:英文80-100词,中文约120字(严格控制在110-130字之间)** - 不要英中混排,必须分开 ## 2. 语言风格 - 专业、客观、严谨,体现功能医学视角 - 使用"提示""可能""建议""值得关注""需要注意"等引导词 - 禁用"必须""一定""保证""治愈"等绝对化表述 - 不做临床疾病诊断,聚焦功能状态分析 ## 3. 核心指标判定 - **核心指标**:从医学角度判定各生理学系统的关键指标 - **异常项**:超出参考范围的指标 + 逼近临界值的指标 - 指标必须精准,标注具体数值及单位 # 文章结构(必须严格遵循) ## 总述概述(2段) **第一段**:前半部分列重点正常项及数值,后半部分列重点异常项及数值 **第二段**:说明这些异常指标对整体健康的综合影响 ## 四大系统分析(固定顺序,每个系统2段) ### (I) Hematology and Inflammatory Status / (一)血液学与炎症状态 **第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值) **第二段**:说明该系统核心异常指标对其他生理系统的影响 ### (II) Hormonal and Endocrine Regulation / (二)荷尔蒙与内分泌调节 **第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值) **第二段**:说明该系统核心异常指标对其他生理系统的影响 ### (III) Immunology and Infection Risk / (三)免疫学与感染风险 **第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值) **第二段**:说明该系统核心异常指标对其他生理系统的影响 ### (IV) Nutrition and Metabolic Profile / (四)营养与代谢状况 **第一段**:前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值(含临界值) **第二段**:说明该系统核心异常指标对其他生理系统的影响 ## 结尾总结(2段) **第一段 - 功能医学健康管理重点**:概括本次检测发现的核心健康管理重点 **第二段 - 个性化管理方向**:说明往哪个方向开展个性化健康管理 # 输出格式(JSON) ```json {{ "overview": {{ "paragraph1": {{ "en": "英文(80-120词):前半部分列重点正常项及数值,后半部分列重点异常项及数值...", "cn": "中文(80-120字):对应翻译..." }}, "paragraph2": {{ "en": "英文(80-100词):说明异常指标对整体健康的综合影响...", "cn": "中文(约120字):对应翻译..." }} }}, "systems": [ {{ "key": "Hematology", "title_en": "(I) Hematology and Inflammatory Status", "title_cn": "(一)血液学与炎症状态", "paragraph1": {{ "en": "英文(80-120词):前半部分列该系统重点正常项及数值,后半部分列重点异常项及数值...", "cn": "中文(80-120字):对应翻译..." }}, "paragraph2": {{ "en": "英文(80-100词):说明该系统核心异常指标对其他生理系统的影响...", "cn": "中文(约120字):对应翻译..." }} }}, {{ "key": "Endocrine", "title_en": "(II) Hormonal and Endocrine Regulation", "title_cn": "(二)荷尔蒙与内分泌调节", "paragraph1": {{}}, "paragraph2": {{}} }}, {{ "key": "Immunology", "title_en": "(III) Immunology and Infection Risk", "title_cn": "(三)免疫学与感染风险", "paragraph1": {{}}, "paragraph2": {{}} }}, {{ "key": "Metabolism", "title_en": "(IV) Nutrition and Metabolic Profile", "title_cn": "(四)营养与代谢状况", "paragraph1": {{}}, "paragraph2": {{}} }} ], "conclusion": {{ "management_focus": {{ "en": "英文(80-120词):功能医学健康管理重点概括...", "cn": "中文(80-120字):对应翻译..." }}, "personalized_direction": {{ "en": "英文(80-120词):个性化管理方向说明...", "cn": "中文(80-120字):对应翻译..." }} }} }} ``` # 重要提示 1. **每个段落必须先英文后中文,不要混排** 2. **第一段:英文80-120词,中文80-120字** 3. **第二段:英文80-100词,中文约120字(严格控制在110-130字)** 4. **第一段结构:前半部分正常项+数值,后半部分异常项+数值** 5. **第二段结构:异常指标对其他系统的影响** 6. **结尾两段:功能医学管理重点 + 个性化管理方向** 7. **逼近临界值的指标也算作异常项** 8. **只返回JSON,不要其他内容**""" return prompt def generate_health_assessment_v2(matched_data: dict, api_key: str, call_deepseek_api) -> dict: """ 生成整体健康情况分析内容(V2版本) Args: matched_data: 匹配的检测数据 api_key: DeepSeek API Key call_deepseek_api: API调用函数 Returns: 包含整体分析和系统分析的字典 """ if not api_key: print(" ⚠️ 未提供API Key,跳过健康评估生成") return {} # 收集所有指标 normal_items, abnormal_items, system_data = collect_all_items_for_assessment(matched_data) if not normal_items and not abnormal_items: print(" ⚠️ 没有检测数据,跳过健康评估生成") return {} print(f" 📊 数据统计: 正常指标 {len(normal_items)} 个, 异常指标 {len(abnormal_items)} 个") # 构建prompt prompt = build_assessment_prompt(normal_items, abnormal_items, system_data) def parse_json_response(response_text): """解析JSON响应""" # 提取JSON部分 if '```json' in response_text: response_text = response_text.split('```json')[1].split('```')[0] elif '```' in response_text: response_text = response_text.split('```')[1].split('```')[0] response_text = response_text.strip() try: return json.loads(response_text) except json.JSONDecodeError: pass # 尝试修复常见问题 if response_text.count('"') % 2 != 0: response_text += '"' open_braces = response_text.count('{') - response_text.count('}') open_brackets = response_text.count('[') - response_text.count(']') if open_brackets > 0: if open_braces > 0: response_text += '}' * open_braces response_text += ']' * open_brackets elif open_braces > 0: response_text += '}' * open_braces try: return json.loads(response_text) except json.JSONDecodeError: return None # 最多重试3次 for attempt in range(3): try: print(f" 🤖 调用DeepSeek生成整体健康分析... (第{attempt+1}次)") response = call_deepseek_api(prompt, api_key, max_tokens=6000, timeout=180) if response is None: if attempt < 2: print(f" ⚠️ API请求失败,重试中...") import time time.sleep(3) continue result = parse_json_response(response) # 检查新格式(overview, systems)或旧格式(overall_analysis, system_analysis) if result and (result.get('overview') or result.get('systems') or result.get('overall_analysis') or result.get('system_analysis')): print(f" ✓ 成功生成整体健康分析") return result if attempt < 2: print(f" ⚠️ 响应格式不完整,重试中...") except Exception as e: if attempt < 2: print(f" ⚠️ 生成失败: {e},重试中...") print(f" ✗ 生成整体健康分析失败") return {} def convert_v2_to_sections_format(v2_result: dict) -> dict: """ 将V2格式转换为原有的sections格式,以便复用现有的填充函数 新格式:overview{paragraph1, paragraph2}, systems[], conclusion{management_focus, personalized_direction} """ sections = [] # 1. 总述部分(2段,不需要标题) overview = v2_result.get('overview', {}) if overview: paragraphs = [] # 第一段:正常项+异常项 para1 = overview.get('paragraph1', {}) if para1.get('en') or para1.get('cn'): paragraphs.append({ 'en': para1.get('en', ''), 'cn': para1.get('cn', '') }) # 第二段:异常指标对整体健康的影响 para2 = overview.get('paragraph2', {}) if para2.get('en') or para2.get('cn'): paragraphs.append({ 'en': para2.get('en', ''), 'cn': para2.get('cn', '') }) if paragraphs: sections.append({ 'title_en': '', 'title_cn': '', 'paragraphs': paragraphs, 'is_overview': True }) # 2. 四大系统分析 systems = v2_result.get('systems', []) for system in systems: paragraphs = [] # 第一段:正常项+异常项 para1 = system.get('paragraph1', {}) if para1.get('en') or para1.get('cn'): paragraphs.append({ 'en': para1.get('en', ''), 'cn': para1.get('cn', '') }) # 第二段:异常指标对其他系统的影响 para2 = system.get('paragraph2', {}) if para2.get('en') or para2.get('cn'): paragraphs.append({ 'en': para2.get('en', ''), 'cn': para2.get('cn', '') }) # 兼容旧格式(paragraphs数组) if not paragraphs and system.get('paragraphs'): for para in system.get('paragraphs', []): if para.get('en') or para.get('cn'): paragraphs.append({ 'en': para.get('en', ''), 'cn': para.get('cn', '') }) if paragraphs: sections.append({ 'title_en': system.get('title_en', ''), 'title_cn': system.get('title_cn', ''), 'paragraphs': paragraphs }) # 3. 结尾总结(2段) conclusion = v2_result.get('conclusion', {}) if conclusion: paragraphs = [] # 第一段:功能医学健康管理重点 mgmt_focus = conclusion.get('management_focus', {}) if mgmt_focus.get('en') or mgmt_focus.get('cn'): paragraphs.append({ 'en': mgmt_focus.get('en', ''), 'cn': mgmt_focus.get('cn', '') }) # 第二段:个性化管理方向 pers_dir = conclusion.get('personalized_direction', {}) if pers_dir.get('en') or pers_dir.get('cn'): paragraphs.append({ 'en': pers_dir.get('en', ''), 'cn': pers_dir.get('cn', '') }) # 兼容旧格式(直接en/cn) if not paragraphs and (conclusion.get('en') or conclusion.get('cn')): paragraphs.append({ 'en': conclusion.get('en', ''), 'cn': conclusion.get('cn', '') }) if paragraphs: sections.append({ 'title_en': '', 'title_cn': '', 'paragraphs': paragraphs, 'is_conclusion': True }) # 兼容旧格式(overall_analysis, system_analysis[]) if not sections: overall = v2_result.get('overall_analysis', {}) if overall: paragraphs = [] summary = overall.get('summary', {}) if summary.get('en') or summary.get('cn'): paragraphs.append({'en': summary.get('en', ''), 'cn': summary.get('cn', '')}) strength = overall.get('strength_indicators', {}) if strength.get('en') or strength.get('cn'): paragraphs.append({'en': strength.get('en', ''), 'cn': strength.get('cn', '')}) abnormal = overall.get('abnormal_indicators', {}) if abnormal.get('en') or abnormal.get('cn'): paragraphs.append({'en': abnormal.get('en', ''), 'cn': abnormal.get('cn', '')}) focus = overall.get('focus_direction', {}) if focus.get('en') or focus.get('cn'): paragraphs.append({'en': focus.get('en', ''), 'cn': focus.get('cn', '')}) if paragraphs: sections.append({ 'title_en': overall.get('title_en', ''), 'title_cn': overall.get('title_cn', ''), 'paragraphs': paragraphs }) system_analysis = v2_result.get('system_analysis', []) for system in system_analysis: paragraphs = [] for key in ['summary', 'strength_indicators', 'abnormal_indicators', 'focus_direction']: item = system.get(key, {}) if item.get('en') or item.get('cn'): paragraphs.append({'en': item.get('en', ''), 'cn': item.get('cn', '')}) if paragraphs: sections.append({ 'title_en': system.get('title_en', ''), 'title_cn': system.get('title_cn', ''), 'paragraphs': paragraphs }) return {'sections': sections} # ============================================================ # 文档填充函数 # ============================================================ def clean_markdown_formatting(text: str) -> str: """清理文本中的Markdown格式标记""" if not text: return text text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) text = re.sub(r'__([^_]+)__', r'\1', text) text = re.sub(r'(? 0 and (title_en or title_cn): empty_p = create_empty_paragraph_v2() body.insert(insert_pos, empty_p) insert_pos += 1 # 小节标题(只有当标题不为空时才插入) if title_en or title_cn: title_paragraphs = create_section_title_two_lines_v2(title_en, title_cn) for title_p in title_paragraphs: body.insert(insert_pos, title_p) insert_pos += 1 # 段落内容 for para in paragraphs: en_text = para.get('en', '') if en_text: p_en = create_formatted_paragraph_v2(en_text, is_chinese=False) body.insert(insert_pos, p_en) insert_pos += 1 cn_text = para.get('cn', '') if cn_text: p_cn = create_formatted_paragraph_v2(cn_text, is_chinese=True) body.insert(insert_pos, p_cn) insert_pos += 1 print(f" ✓ 已插入 {len(sections)} 个健康评估小节") # ============================================================ # 主入口函数 # ============================================================ def generate_and_fill_health_assessment_v2(doc, matched_data: dict, api_key: str, call_deepseek_api): """ 生成并填充整体健康情况分析(V2版本) 这是主入口函数,替代原有的 generate_health_assessment_content + fill_health_assessment_section """ if not api_key: print(" ⚠️ 未提供DeepSeek API Key,跳过健康评估生成") return None print("\n" + "=" * 60) print("整体健康情况分析 V2") print("=" * 60) # 生成内容 assessment_result = generate_health_assessment_v2(matched_data, api_key, call_deepseek_api) if assessment_result: # 填充到文档 print("\n 📝 正在填充健康评估内容...") fill_health_assessment_v2(doc, assessment_result) print(" ✓ 整体健康情况分析完成") else: print(" ✗ 健康评估生成失败") return assessment_result # ============================================================ # 测试函数 # ============================================================ if __name__ == '__main__': # 测试prompt构建 test_normal = [ {'abb': 'WBC', 'name': '白细胞计数', 'result': '6.5', 'unit': '10^9/L', 'reference': '4.0-10.0', 'system': 'Hematology'}, {'abb': 'RBC', 'name': '红细胞计数', 'result': '4.8', 'unit': '10^12/L', 'reference': '4.0-5.5', 'system': 'Hematology'}, ] test_abnormal = [ {'abb': 'TSH', 'name': '促甲状腺激素', 'result': '16.879', 'unit': 'μIU/mL', 'reference': '0.35-4.94', 'point': '↑', 'system': 'Endocrine'}, {'abb': 'AMH', 'name': '抗缪勒管激素', 'result': '0.17', 'unit': 'ng/mL', 'reference': '1.0-10.0', 'point': '↓', 'system': 'Endocrine'}, ] test_system_data = { 'Hematology': {'normal': test_normal, 'abnormal': [], 'borderline': []}, 'Endocrine': {'normal': [], 'abnormal': test_abnormal, 'borderline': []}, 'Immunology': {'normal': [], 'abnormal': [], 'borderline': []}, 'Metabolism': {'normal': [], 'abnormal': [], 'borderline': []}, } prompt = build_assessment_prompt(test_normal, test_abnormal, test_system_data) print("=" * 60) print("生成的Prompt预览(前2000字符):") print("=" * 60) print(prompt[:2000]) print("...")