Files
yiliao/backend/rebuild_config_from_template.py

108 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
从模板文件重新生成 abb_mapping_config.json
只包含模板中实际存在的项目
"""
from docx import Document
import json
import re
def extract_items_from_template():
"""从模板中提取所有检测项目"""
doc = Document('template_complete.docx')
# 模块映射:根据模板中的标题识别模块
module_items = {}
current_module = None
# 遍历所有表格
for table in doc.tables:
for row in table.rows:
row_text = ' '.join([cell.text.strip() for cell in row.cells])
# 检测模块标题
if 'Urine Detection' in row_text or '尿液检测' in row_text:
current_module = 'Urine Test'
elif 'Complete Blood Count' in row_text or '血常规' in row_text:
current_module = 'Complete Blood Count'
elif 'Blood Sugar' in row_text or '血糖' in row_text:
current_module = 'Blood Sugar'
elif 'Lipid Profile' in row_text or '血脂' in row_text:
current_module = 'Lipid Profile'
elif 'Blood Type' in row_text and '血型' in row_text:
current_module = 'Blood Type'
elif 'Blood Coagulation' in row_text or '凝血' in row_text:
current_module = 'Blood Coagulation'
elif 'Four Infectious' in row_text or '传染病' in row_text:
current_module = 'Four Infectious Diseases'
elif 'Serum Electrolytes' in row_text or '电解质' in row_text:
current_module = 'Serum Electrolytes'
elif 'Liver Function' in row_text or '肝功能' in row_text:
current_module = 'Liver Function'
elif 'Kidney Function' in row_text or '肾功能' in row_text:
current_module = 'Kidney Function'
elif 'Myocardial Enzyme' in row_text or '心肌酶' in row_text:
current_module = 'Myocardial Enzyme'
elif 'Thyroid Function' in row_text or '甲状腺' in row_text:
current_module = 'Thyroid Function'
elif 'Thromboembolism' in row_text or '血栓' in row_text:
current_module = 'Thromboembolism'
elif 'Bone Metabolism' in row_text or '骨代谢' in row_text:
current_module = 'Bone Metabolism'
elif 'Microelement' in row_text or '微量元素' in row_text:
current_module = 'Microelement'
elif 'Humoral Immunity' in row_text or '体液免疫' in row_text:
current_module = 'Humoral Immunity'
elif 'Inflammatory' in row_text or '炎症' in row_text:
current_module = 'Inflammatory Reaction'
elif 'Autoantibody' in row_text or '自身抗体' in row_text:
current_module = 'Autoantibody'
elif 'Female Hormone' in row_text or '女性荷尔蒙' in row_text:
current_module = 'Female Hormone'
elif 'Male Hormone' in row_text or '男性荷尔蒙' in row_text:
current_module = 'Male Hormone'
elif 'Tumor Markers' in row_text or '肿瘤标志物' in row_text:
current_module = 'Tumor Markers'
elif 'Lymphocyte' in row_text or '淋巴细胞亚群' in row_text:
current_module = 'Lymphocyte Subpopulation'
# 提取ABB第一列短文本
first_cell = row.cells[0].text.strip() if row.cells else ''
if first_cell and len(first_cell) < 30:
if 'Abb' not in first_cell and '简称' not in first_cell:
if not first_cell.startswith('Clinical') and not first_cell.startswith('临床'):
# 检查是否有临床意义(确认是数据行)
has_clinical = any('Clinical Significance' in cell.text for cell in row.cells)
if has_clinical and current_module:
if current_module not in module_items:
module_items[current_module] = []
# 获取项目名称(第二列)
project_name = row.cells[1].text.strip() if len(row.cells) > 1 else first_cell
# 避免重复
existing_abbs = [item['abb'] for item in module_items[current_module]]
if first_cell not in existing_abbs:
module_items[current_module].append({
'abb': first_cell,
'project': project_name
})
return module_items
def main():
print('从模板提取检测项目...')
module_items = extract_items_from_template()
total = sum(len(items) for items in module_items.values())
print(f'共提取 {len(module_items)} 个模块, {total} 个项目')
for module, items in module_items.items():
print(f' {module}: {len(items)}')
for item in items[:3]:
print(f' - {item["abb"]}')
if len(items) > 3:
print(f' ... 还有 {len(items)-3}')
if __name__ == '__main__':
main()