96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
|
|
"""
|
|||
|
|
从模板文件重新提取所有检测项目的临床意义
|
|||
|
|
生成正确的 template_explanations.json
|
|||
|
|
"""
|
|||
|
|
from docx import Document
|
|||
|
|
import json
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
doc = Document('template_complete.docx')
|
|||
|
|
|
|||
|
|
explanations = {}
|
|||
|
|
|
|||
|
|
# 遍历所有表格
|
|||
|
|
for table in doc.tables:
|
|||
|
|
rows = list(table.rows)
|
|||
|
|
|
|||
|
|
for i, row in enumerate(rows):
|
|||
|
|
cells = row.cells
|
|||
|
|
if not cells:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
first_cell = cells[0].text.strip()
|
|||
|
|
|
|||
|
|
# 跳过空行、表头、模块标题
|
|||
|
|
if not first_cell:
|
|||
|
|
continue
|
|||
|
|
if 'Abb' in first_cell or '简称' in first_cell:
|
|||
|
|
continue
|
|||
|
|
if 'Clinical' in first_cell:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 检查是否是ABB行(短文本,不含占位符,不含中文模块名)
|
|||
|
|
if len(first_cell) > 40 or '{{' in first_cell:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 跳过模块标题(包含换行符和中文)
|
|||
|
|
if '\n' in first_cell and any('\u4e00' <= c <= '\u9fff' for c in first_cell):
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 这是一个ABB,查找下一行的临床意义
|
|||
|
|
abb = first_cell
|
|||
|
|
|
|||
|
|
# 在当前行或下一行查找临床意义
|
|||
|
|
clinical_text = None
|
|||
|
|
|
|||
|
|
# 先检查当前行的其他单元格
|
|||
|
|
for cell in cells:
|
|||
|
|
text = cell.text.strip()
|
|||
|
|
if 'Clinical Significance:' in text and '临床意义:' in text:
|
|||
|
|
clinical_text = text
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 如果当前行没有,检查下一行
|
|||
|
|
if not clinical_text and i + 1 < len(rows):
|
|||
|
|
next_row = rows[i + 1]
|
|||
|
|
for cell in next_row.cells:
|
|||
|
|
text = cell.text.strip()
|
|||
|
|
if 'Clinical Significance:' in text and '临床意义:' in text:
|
|||
|
|
clinical_text = text
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if clinical_text:
|
|||
|
|
# 提取英文和中文
|
|||
|
|
parts = clinical_text.split('临床意义:')
|
|||
|
|
if len(parts) == 2:
|
|||
|
|
en = parts[0].replace('Clinical Significance:', '').strip()
|
|||
|
|
cn = parts[1].strip()
|
|||
|
|
|
|||
|
|
if en and cn:
|
|||
|
|
# 标准化ABB名称
|
|||
|
|
abb_key = abb.upper().strip()
|
|||
|
|
abb_key = abb_key.replace(' - ', '-').replace('(', '(').replace(')', ')')
|
|||
|
|
|
|||
|
|
if abb_key not in explanations:
|
|||
|
|
explanations[abb_key] = {
|
|||
|
|
'clinical_en': en,
|
|||
|
|
'clinical_cn': cn
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
print(f'从模板提取了 {len(explanations)} 个项目的临床意义')
|
|||
|
|
|
|||
|
|
# 保存
|
|||
|
|
with open('template_explanations.json', 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(explanations, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f'已保存到 template_explanations.json')
|
|||
|
|
|
|||
|
|
# 验证 Color
|
|||
|
|
if 'COLOR' in explanations:
|
|||
|
|
print(f'\nCOLOR 验证:')
|
|||
|
|
print(f'EN: {explanations["COLOR"]["clinical_en"][:80]}...')
|
|||
|
|
print(f'CN: {explanations["COLOR"]["clinical_cn"][:80]}...')
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
main()
|