Files
yiliao/backend/rebuild_template_explanations.py

96 lines
3.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
从模板文件重新提取所有检测项目的临床意义
生成正确的 template_explanations.json
"""
from docx import Document
import json
def main():
doc = Document('template_complete.docx')
explanations = {}
# 遍历所有表格
for table in doc.tables:
rows = list(table.rows)
for i, row in enumerate(rows):
cells = row.cells
if not cells:
continue
first_cell = cells[0].text.strip()
# 跳过空行、表头、模块标题
if not first_cell:
continue
if 'Abb' in first_cell or '简称' in first_cell:
continue
if 'Clinical' in first_cell:
continue
# 检查是否是ABB行短文本不含占位符不含中文模块名
if len(first_cell) > 40 or '{{' in first_cell:
continue
# 跳过模块标题(包含换行符和中文)
if '\n' in first_cell and any('\u4e00' <= c <= '\u9fff' for c in first_cell):
continue
# 这是一个ABB查找下一行的临床意义
abb = first_cell
# 在当前行或下一行查找临床意义
clinical_text = None
# 先检查当前行的其他单元格
for cell in cells:
text = cell.text.strip()
if 'Clinical Significance:' in text and '临床意义:' in text:
clinical_text = text
break
# 如果当前行没有,检查下一行
if not clinical_text and i + 1 < len(rows):
next_row = rows[i + 1]
for cell in next_row.cells:
text = cell.text.strip()
if 'Clinical Significance:' in text and '临床意义:' in text:
clinical_text = text
break
if clinical_text:
# 提取英文和中文
parts = clinical_text.split('临床意义:')
if len(parts) == 2:
en = parts[0].replace('Clinical Significance:', '').strip()
cn = parts[1].strip()
if en and cn:
# 标准化ABB名称
abb_key = abb.upper().strip()
abb_key = abb_key.replace(' - ', '-').replace('', '(').replace('', ')')
if abb_key not in explanations:
explanations[abb_key] = {
'clinical_en': en,
'clinical_cn': cn
}
print(f'从模板提取了 {len(explanations)} 个项目的临床意义')
# 保存
with open('template_explanations.json', 'w', encoding='utf-8') as f:
json.dump(explanations, f, ensure_ascii=False, indent=2)
print(f'已保存到 template_explanations.json')
# 验证 Color
if 'COLOR' in explanations:
print(f'\nCOLOR 验证:')
print(f'EN: {explanations["COLOR"]["clinical_en"][:80]}...')
print(f'CN: {explanations["COLOR"]["clinical_cn"][:80]}...')
if __name__ == '__main__':
main()