96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
"""
|
||
从模板文件重新提取所有检测项目的临床意义
|
||
生成正确的 template_explanations.json
|
||
"""
|
||
from docx import Document
|
||
import json
|
||
|
||
def main():
|
||
doc = Document('template_complete.docx')
|
||
|
||
explanations = {}
|
||
|
||
# 遍历所有表格
|
||
for table in doc.tables:
|
||
rows = list(table.rows)
|
||
|
||
for i, row in enumerate(rows):
|
||
cells = row.cells
|
||
if not cells:
|
||
continue
|
||
|
||
first_cell = cells[0].text.strip()
|
||
|
||
# 跳过空行、表头、模块标题
|
||
if not first_cell:
|
||
continue
|
||
if 'Abb' in first_cell or '简称' in first_cell:
|
||
continue
|
||
if 'Clinical' in first_cell:
|
||
continue
|
||
|
||
# 检查是否是ABB行(短文本,不含占位符,不含中文模块名)
|
||
if len(first_cell) > 40 or '{{' in first_cell:
|
||
continue
|
||
|
||
# 跳过模块标题(包含换行符和中文)
|
||
if '\n' in first_cell and any('\u4e00' <= c <= '\u9fff' for c in first_cell):
|
||
continue
|
||
|
||
# 这是一个ABB,查找下一行的临床意义
|
||
abb = first_cell
|
||
|
||
# 在当前行或下一行查找临床意义
|
||
clinical_text = None
|
||
|
||
# 先检查当前行的其他单元格
|
||
for cell in cells:
|
||
text = cell.text.strip()
|
||
if 'Clinical Significance:' in text and '临床意义:' in text:
|
||
clinical_text = text
|
||
break
|
||
|
||
# 如果当前行没有,检查下一行
|
||
if not clinical_text and i + 1 < len(rows):
|
||
next_row = rows[i + 1]
|
||
for cell in next_row.cells:
|
||
text = cell.text.strip()
|
||
if 'Clinical Significance:' in text and '临床意义:' in text:
|
||
clinical_text = text
|
||
break
|
||
|
||
if clinical_text:
|
||
# 提取英文和中文
|
||
parts = clinical_text.split('临床意义:')
|
||
if len(parts) == 2:
|
||
en = parts[0].replace('Clinical Significance:', '').strip()
|
||
cn = parts[1].strip()
|
||
|
||
if en and cn:
|
||
# 标准化ABB名称
|
||
abb_key = abb.upper().strip()
|
||
abb_key = abb_key.replace(' - ', '-').replace('(', '(').replace(')', ')')
|
||
|
||
if abb_key not in explanations:
|
||
explanations[abb_key] = {
|
||
'clinical_en': en,
|
||
'clinical_cn': cn
|
||
}
|
||
|
||
print(f'从模板提取了 {len(explanations)} 个项目的临床意义')
|
||
|
||
# 保存
|
||
with open('template_explanations.json', 'w', encoding='utf-8') as f:
|
||
json.dump(explanations, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f'已保存到 template_explanations.json')
|
||
|
||
# 验证 Color
|
||
if 'COLOR' in explanations:
|
||
print(f'\nCOLOR 验证:')
|
||
print(f'EN: {explanations["COLOR"]["clinical_en"][:80]}...')
|
||
print(f'CN: {explanations["COLOR"]["clinical_cn"][:80]}...')
|
||
|
||
if __name__ == '__main__':
|
||
main()
|