""" 从模板文件重新生成 abb_mapping_config.json 只包含模板中实际存在的项目 """ from docx import Document import json import re def extract_items_from_template(): """从模板中提取所有检测项目""" doc = Document('template_complete.docx') # 模块映射:根据模板中的标题识别模块 module_items = {} current_module = None # 遍历所有表格 for table in doc.tables: for row in table.rows: row_text = ' '.join([cell.text.strip() for cell in row.cells]) # 检测模块标题 if 'Urine Detection' in row_text or '尿液检测' in row_text: current_module = 'Urine Test' elif 'Complete Blood Count' in row_text or '血常规' in row_text: current_module = 'Complete Blood Count' elif 'Blood Sugar' in row_text or '血糖' in row_text: current_module = 'Blood Sugar' elif 'Lipid Profile' in row_text or '血脂' in row_text: current_module = 'Lipid Profile' elif 'Blood Type' in row_text and '血型' in row_text: current_module = 'Blood Type' elif 'Blood Coagulation' in row_text or '凝血' in row_text: current_module = 'Blood Coagulation' elif 'Four Infectious' in row_text or '传染病' in row_text: current_module = 'Four Infectious Diseases' elif 'Serum Electrolytes' in row_text or '电解质' in row_text: current_module = 'Serum Electrolytes' elif 'Liver Function' in row_text or '肝功能' in row_text: current_module = 'Liver Function' elif 'Kidney Function' in row_text or '肾功能' in row_text: current_module = 'Kidney Function' elif 'Myocardial Enzyme' in row_text or '心肌酶' in row_text: current_module = 'Myocardial Enzyme' elif 'Thyroid Function' in row_text or '甲状腺' in row_text: current_module = 'Thyroid Function' elif 'Thromboembolism' in row_text or '血栓' in row_text: current_module = 'Thromboembolism' elif 'Bone Metabolism' in row_text or '骨代谢' in row_text: current_module = 'Bone Metabolism' elif 'Microelement' in row_text or '微量元素' in row_text: current_module = 'Microelement' elif 'Humoral Immunity' in row_text or '体液免疫' in row_text: current_module = 'Humoral Immunity' elif 'Inflammatory' in row_text or '炎症' in row_text: current_module = 'Inflammatory Reaction' elif 'Autoantibody' in row_text or '自身抗体' in row_text: current_module = 'Autoantibody' elif 'Female Hormone' in row_text or '女性荷尔蒙' in row_text: current_module = 'Female Hormone' elif 'Male Hormone' in row_text or '男性荷尔蒙' in row_text: current_module = 'Male Hormone' elif 'Tumor Markers' in row_text or '肿瘤标志物' in row_text: current_module = 'Tumor Markers' elif 'Lymphocyte' in row_text or '淋巴细胞亚群' in row_text: current_module = 'Lymphocyte Subpopulation' # 提取ABB(第一列,短文本) first_cell = row.cells[0].text.strip() if row.cells else '' if first_cell and len(first_cell) < 30: if 'Abb' not in first_cell and '简称' not in first_cell: if not first_cell.startswith('Clinical') and not first_cell.startswith('临床'): # 检查是否有临床意义(确认是数据行) has_clinical = any('Clinical Significance' in cell.text for cell in row.cells) if has_clinical and current_module: if current_module not in module_items: module_items[current_module] = [] # 获取项目名称(第二列) project_name = row.cells[1].text.strip() if len(row.cells) > 1 else first_cell # 避免重复 existing_abbs = [item['abb'] for item in module_items[current_module]] if first_cell not in existing_abbs: module_items[current_module].append({ 'abb': first_cell, 'project': project_name }) return module_items def main(): print('从模板提取检测项目...') module_items = extract_items_from_template() total = sum(len(items) for items in module_items.values()) print(f'共提取 {len(module_items)} 个模块, {total} 个项目') for module, items in module_items.items(): print(f' {module}: {len(items)} 项') for item in items[:3]: print(f' - {item["abb"]}') if len(items) > 3: print(f' ... 还有 {len(items)-3} 项') if __name__ == '__main__': main()