Files
yiliao/backend/extract_template_explanations.py

141 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
从模板文件中提取所有检测项目的临床意义
"""
from docx import Document
import json
import re
def extract_all_explanations():
doc = Document('template_complete.docx')
explanations = {}
# 遍历所有表格
for table_idx, table in enumerate(doc.tables):
rows = table.rows
if len(rows) < 2:
continue
# 检查是否是检测项目表格(通过表头判断)
header_text = ' '.join([cell.text.strip() for cell in rows[0].cells])
# 遍历每一行
current_abb = None
for row_idx, row in enumerate(rows):
cells = row.cells
if not cells:
continue
# 获取第一列文本通常是ABB
first_cell_text = cells[0].text.strip()
# 跳过表头行
if 'Abb' in first_cell_text or '简称' in first_cell_text:
continue
# 检查是否是ABB行短文本不是临床意义
if first_cell_text and len(first_cell_text) < 40:
if not first_cell_text.startswith('Clinical') and not first_cell_text.startswith('临床'):
# 可能是ABB
current_abb = first_cell_text
# 查找临床意义
for cell in cells:
text = cell.text.strip()
if 'Clinical Significance:' in text and '临床意义:' in text:
# 提取英文和中文
parts = text.split('临床意义:')
if len(parts) == 2:
en = parts[0].replace('Clinical Significance:', '').strip()
cn = parts[1].strip()
if current_abb and en and cn:
# 标准化ABB名称
abb_key = current_abb.upper().strip()
# 处理特殊字符
abb_key = abb_key.replace(' - ', '-').replace('', '(').replace('', ')')
if abb_key not in explanations:
explanations[abb_key] = {
'clinical_en': en,
'clinical_cn': cn
}
print(f'提取: {abb_key}')
return explanations
def main():
print('从模板提取临床意义...')
print('=' * 60)
template_explanations = extract_all_explanations()
print(f'\n从模板提取了 {len(template_explanations)} 个项目')
# 读取现有文件
try:
with open('template_explanations.json', 'r', encoding='utf-8') as f:
existing = json.load(f)
print(f'现有文件中有 {len(existing)} 个项目')
except:
existing = {}
print('创建新文件')
# 用模板内容更新(模板优先)
updated_count = 0
for abb, exp in template_explanations.items():
if abb not in existing or existing[abb] != exp:
existing[abb] = exp
updated_count += 1
# 保存
with open('template_explanations.json', 'w', encoding='utf-8') as f:
json.dump(existing, f, ensure_ascii=False, indent=2)
print(f'\n更新了 {updated_count} 个项目')
print(f'最终文件包含 {len(existing)} 个项目')
# 检查配置文件中的项目是否都有临床意义
print('\n' + '=' * 60)
print('检查配置文件中的项目覆盖情况...')
with open('abb_mapping_config.json', 'r', encoding='utf-8') as f:
config = json.load(f)
config_abbs = set()
for module_name, module_data in config.get('modules', {}).items():
for item in module_data.get('items', []):
abb = item.get('abb', '').upper().strip()
abb = abb.replace(' - ', '-').replace('', '(').replace('', ')')
config_abbs.add(abb)
# 检查缺失
missing = []
for abb in config_abbs:
if abb not in existing:
# 尝试一些变体
found = False
variants = [
abb,
abb.replace('-', ' '),
abb.replace(' ', '-'),
abb.replace('%', ''),
abb + ' COUNT',
abb + ' TYPE',
]
for v in variants:
if v in existing:
found = True
break
if not found:
missing.append(abb)
if missing:
print(f'\n缺失临床意义的项目 ({len(missing)}):')
for abb in sorted(missing):
print(f' {abb}')
else:
print('\n所有配置项目都有临床意义!')
if __name__ == '__main__':
main()