Files
yiliao/backend/rebuild_template_explanations.py

96 lines
3.4 KiB
Python
Raw Permalink Normal View History

"""
从模板文件重新提取所有检测项目的临床意义
生成正确的 template_explanations.json
"""
from docx import Document
import json
def main():
doc = Document('template_complete.docx')
explanations = {}
# 遍历所有表格
for table in doc.tables:
rows = list(table.rows)
for i, row in enumerate(rows):
cells = row.cells
if not cells:
continue
first_cell = cells[0].text.strip()
# 跳过空行、表头、模块标题
if not first_cell:
continue
if 'Abb' in first_cell or '简称' in first_cell:
continue
if 'Clinical' in first_cell:
continue
# 检查是否是ABB行短文本不含占位符不含中文模块名
if len(first_cell) > 40 or '{{' in first_cell:
continue
# 跳过模块标题(包含换行符和中文)
if '\n' in first_cell and any('\u4e00' <= c <= '\u9fff' for c in first_cell):
continue
# 这是一个ABB查找下一行的临床意义
abb = first_cell
# 在当前行或下一行查找临床意义
clinical_text = None
# 先检查当前行的其他单元格
for cell in cells:
text = cell.text.strip()
if 'Clinical Significance:' in text and '临床意义:' in text:
clinical_text = text
break
# 如果当前行没有,检查下一行
if not clinical_text and i + 1 < len(rows):
next_row = rows[i + 1]
for cell in next_row.cells:
text = cell.text.strip()
if 'Clinical Significance:' in text and '临床意义:' in text:
clinical_text = text
break
if clinical_text:
# 提取英文和中文
parts = clinical_text.split('临床意义:')
if len(parts) == 2:
en = parts[0].replace('Clinical Significance:', '').strip()
cn = parts[1].strip()
if en and cn:
# 标准化ABB名称
abb_key = abb.upper().strip()
abb_key = abb_key.replace(' - ', '-').replace('', '(').replace('', ')')
if abb_key not in explanations:
explanations[abb_key] = {
'clinical_en': en,
'clinical_cn': cn
}
print(f'从模板提取了 {len(explanations)} 个项目的临床意义')
# 保存
with open('template_explanations.json', 'w', encoding='utf-8') as f:
json.dump(explanations, f, ensure_ascii=False, indent=2)
print(f'已保存到 template_explanations.json')
# 验证 Color
if 'COLOR' in explanations:
print(f'\nCOLOR 验证:')
print(f'EN: {explanations["COLOR"]["clinical_en"][:80]}...')
print(f'CN: {explanations["COLOR"]["clinical_cn"][:80]}...')
if __name__ == '__main__':
main()