""" 从模板文件重新提取所有检测项目的临床意义 生成正确的 template_explanations.json """ from docx import Document import json def main(): doc = Document('template_complete.docx') explanations = {} # 遍历所有表格 for table in doc.tables: rows = list(table.rows) for i, row in enumerate(rows): cells = row.cells if not cells: continue first_cell = cells[0].text.strip() # 跳过空行、表头、模块标题 if not first_cell: continue if 'Abb' in first_cell or '简称' in first_cell: continue if 'Clinical' in first_cell: continue # 检查是否是ABB行(短文本,不含占位符,不含中文模块名) if len(first_cell) > 40 or '{{' in first_cell: continue # 跳过模块标题(包含换行符和中文) if '\n' in first_cell and any('\u4e00' <= c <= '\u9fff' for c in first_cell): continue # 这是一个ABB,查找下一行的临床意义 abb = first_cell # 在当前行或下一行查找临床意义 clinical_text = None # 先检查当前行的其他单元格 for cell in cells: text = cell.text.strip() if 'Clinical Significance:' in text and '临床意义:' in text: clinical_text = text break # 如果当前行没有,检查下一行 if not clinical_text and i + 1 < len(rows): next_row = rows[i + 1] for cell in next_row.cells: text = cell.text.strip() if 'Clinical Significance:' in text and '临床意义:' in text: clinical_text = text break if clinical_text: # 提取英文和中文 parts = clinical_text.split('临床意义:') if len(parts) == 2: en = parts[0].replace('Clinical Significance:', '').strip() cn = parts[1].strip() if en and cn: # 标准化ABB名称 abb_key = abb.upper().strip() abb_key = abb_key.replace(' - ', '-').replace('(', '(').replace(')', ')') if abb_key not in explanations: explanations[abb_key] = { 'clinical_en': en, 'clinical_cn': cn } print(f'从模板提取了 {len(explanations)} 个项目的临床意义') # 保存 with open('template_explanations.json', 'w', encoding='utf-8') as f: json.dump(explanations, f, ensure_ascii=False, indent=2) print(f'已保存到 template_explanations.json') # 验证 Color if 'COLOR' in explanations: print(f'\nCOLOR 验证:') print(f'EN: {explanations["COLOR"]["clinical_en"][:80]}...') print(f'CN: {explanations["COLOR"]["clinical_cn"][:80]}...') if __name__ == '__main__': main()