""" 从模板文件中提取所有检测项目的临床意义 """ from docx import Document import json import re def extract_all_explanations(): doc = Document('template_complete.docx') explanations = {} # 遍历所有表格 for table_idx, table in enumerate(doc.tables): rows = table.rows if len(rows) < 2: continue # 检查是否是检测项目表格(通过表头判断) header_text = ' '.join([cell.text.strip() for cell in rows[0].cells]) # 遍历每一行 current_abb = None for row_idx, row in enumerate(rows): cells = row.cells if not cells: continue # 获取第一列文本(通常是ABB) first_cell_text = cells[0].text.strip() # 跳过表头行 if 'Abb' in first_cell_text or '简称' in first_cell_text: continue # 检查是否是ABB行(短文本,不是临床意义) if first_cell_text and len(first_cell_text) < 40: if not first_cell_text.startswith('Clinical') and not first_cell_text.startswith('临床'): # 可能是ABB current_abb = first_cell_text # 查找临床意义 for cell in cells: text = cell.text.strip() if 'Clinical Significance:' in text and '临床意义:' in text: # 提取英文和中文 parts = text.split('临床意义:') if len(parts) == 2: en = parts[0].replace('Clinical Significance:', '').strip() cn = parts[1].strip() if current_abb and en and cn: # 标准化ABB名称 abb_key = current_abb.upper().strip() # 处理特殊字符 abb_key = abb_key.replace(' - ', '-').replace('(', '(').replace(')', ')') if abb_key not in explanations: explanations[abb_key] = { 'clinical_en': en, 'clinical_cn': cn } print(f'提取: {abb_key}') return explanations def main(): print('从模板提取临床意义...') print('=' * 60) template_explanations = extract_all_explanations() print(f'\n从模板提取了 {len(template_explanations)} 个项目') # 读取现有文件 try: with open('template_explanations.json', 'r', encoding='utf-8') as f: existing = json.load(f) print(f'现有文件中有 {len(existing)} 个项目') except: existing = {} print('创建新文件') # 用模板内容更新(模板优先) updated_count = 0 for abb, exp in template_explanations.items(): if abb not in existing or existing[abb] != exp: existing[abb] = exp updated_count += 1 # 保存 with open('template_explanations.json', 'w', encoding='utf-8') as f: json.dump(existing, f, ensure_ascii=False, indent=2) print(f'\n更新了 {updated_count} 个项目') print(f'最终文件包含 {len(existing)} 个项目') # 检查配置文件中的项目是否都有临床意义 print('\n' + '=' * 60) print('检查配置文件中的项目覆盖情况...') with open('abb_mapping_config.json', 'r', encoding='utf-8') as f: config = json.load(f) config_abbs = set() for module_name, module_data in config.get('modules', {}).items(): for item in module_data.get('items', []): abb = item.get('abb', '').upper().strip() abb = abb.replace(' - ', '-').replace('(', '(').replace(')', ')') config_abbs.add(abb) # 检查缺失 missing = [] for abb in config_abbs: if abb not in existing: # 尝试一些变体 found = False variants = [ abb, abb.replace('-', ' '), abb.replace(' ', '-'), abb.replace('%', ''), abb + ' COUNT', abb + ' TYPE', ] for v in variants: if v in existing: found = True break if not found: missing.append(abb) if missing: print(f'\n缺失临床意义的项目 ({len(missing)}):') for abb in sorted(missing): print(f' {abb}') else: print('\n所有配置项目都有临床意义!') if __name__ == '__main__': main()