141 lines
4.8 KiB
Python
141 lines
4.8 KiB
Python
|
|
"""
|
|||
|
|
从模板文件中提取所有检测项目的临床意义
|
|||
|
|
"""
|
|||
|
|
from docx import Document
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
def extract_all_explanations():
|
|||
|
|
doc = Document('template_complete.docx')
|
|||
|
|
|
|||
|
|
explanations = {}
|
|||
|
|
|
|||
|
|
# 遍历所有表格
|
|||
|
|
for table_idx, table in enumerate(doc.tables):
|
|||
|
|
rows = table.rows
|
|||
|
|
if len(rows) < 2:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 检查是否是检测项目表格(通过表头判断)
|
|||
|
|
header_text = ' '.join([cell.text.strip() for cell in rows[0].cells])
|
|||
|
|
|
|||
|
|
# 遍历每一行
|
|||
|
|
current_abb = None
|
|||
|
|
for row_idx, row in enumerate(rows):
|
|||
|
|
cells = row.cells
|
|||
|
|
if not cells:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 获取第一列文本(通常是ABB)
|
|||
|
|
first_cell_text = cells[0].text.strip()
|
|||
|
|
|
|||
|
|
# 跳过表头行
|
|||
|
|
if 'Abb' in first_cell_text or '简称' in first_cell_text:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 检查是否是ABB行(短文本,不是临床意义)
|
|||
|
|
if first_cell_text and len(first_cell_text) < 40:
|
|||
|
|
if not first_cell_text.startswith('Clinical') and not first_cell_text.startswith('临床'):
|
|||
|
|
# 可能是ABB
|
|||
|
|
current_abb = first_cell_text
|
|||
|
|
|
|||
|
|
# 查找临床意义
|
|||
|
|
for cell in cells:
|
|||
|
|
text = cell.text.strip()
|
|||
|
|
if 'Clinical Significance:' in text and '临床意义:' in text:
|
|||
|
|
# 提取英文和中文
|
|||
|
|
parts = text.split('临床意义:')
|
|||
|
|
if len(parts) == 2:
|
|||
|
|
en = parts[0].replace('Clinical Significance:', '').strip()
|
|||
|
|
cn = parts[1].strip()
|
|||
|
|
|
|||
|
|
if current_abb and en and cn:
|
|||
|
|
# 标准化ABB名称
|
|||
|
|
abb_key = current_abb.upper().strip()
|
|||
|
|
# 处理特殊字符
|
|||
|
|
abb_key = abb_key.replace(' - ', '-').replace('(', '(').replace(')', ')')
|
|||
|
|
|
|||
|
|
if abb_key not in explanations:
|
|||
|
|
explanations[abb_key] = {
|
|||
|
|
'clinical_en': en,
|
|||
|
|
'clinical_cn': cn
|
|||
|
|
}
|
|||
|
|
print(f'提取: {abb_key}')
|
|||
|
|
|
|||
|
|
return explanations
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
print('从模板提取临床意义...')
|
|||
|
|
print('=' * 60)
|
|||
|
|
|
|||
|
|
template_explanations = extract_all_explanations()
|
|||
|
|
print(f'\n从模板提取了 {len(template_explanations)} 个项目')
|
|||
|
|
|
|||
|
|
# 读取现有文件
|
|||
|
|
try:
|
|||
|
|
with open('template_explanations.json', 'r', encoding='utf-8') as f:
|
|||
|
|
existing = json.load(f)
|
|||
|
|
print(f'现有文件中有 {len(existing)} 个项目')
|
|||
|
|
except:
|
|||
|
|
existing = {}
|
|||
|
|
print('创建新文件')
|
|||
|
|
|
|||
|
|
# 用模板内容更新(模板优先)
|
|||
|
|
updated_count = 0
|
|||
|
|
for abb, exp in template_explanations.items():
|
|||
|
|
if abb not in existing or existing[abb] != exp:
|
|||
|
|
existing[abb] = exp
|
|||
|
|
updated_count += 1
|
|||
|
|
|
|||
|
|
# 保存
|
|||
|
|
with open('template_explanations.json', 'w', encoding='utf-8') as f:
|
|||
|
|
json.dump(existing, f, ensure_ascii=False, indent=2)
|
|||
|
|
|
|||
|
|
print(f'\n更新了 {updated_count} 个项目')
|
|||
|
|
print(f'最终文件包含 {len(existing)} 个项目')
|
|||
|
|
|
|||
|
|
# 检查配置文件中的项目是否都有临床意义
|
|||
|
|
print('\n' + '=' * 60)
|
|||
|
|
print('检查配置文件中的项目覆盖情况...')
|
|||
|
|
|
|||
|
|
with open('abb_mapping_config.json', 'r', encoding='utf-8') as f:
|
|||
|
|
config = json.load(f)
|
|||
|
|
|
|||
|
|
config_abbs = set()
|
|||
|
|
for module_name, module_data in config.get('modules', {}).items():
|
|||
|
|
for item in module_data.get('items', []):
|
|||
|
|
abb = item.get('abb', '').upper().strip()
|
|||
|
|
abb = abb.replace(' - ', '-').replace('(', '(').replace(')', ')')
|
|||
|
|
config_abbs.add(abb)
|
|||
|
|
|
|||
|
|
# 检查缺失
|
|||
|
|
missing = []
|
|||
|
|
for abb in config_abbs:
|
|||
|
|
if abb not in existing:
|
|||
|
|
# 尝试一些变体
|
|||
|
|
found = False
|
|||
|
|
variants = [
|
|||
|
|
abb,
|
|||
|
|
abb.replace('-', ' '),
|
|||
|
|
abb.replace(' ', '-'),
|
|||
|
|
abb.replace('%', ''),
|
|||
|
|
abb + ' COUNT',
|
|||
|
|
abb + ' TYPE',
|
|||
|
|
]
|
|||
|
|
for v in variants:
|
|||
|
|
if v in existing:
|
|||
|
|
found = True
|
|||
|
|
break
|
|||
|
|
if not found:
|
|||
|
|
missing.append(abb)
|
|||
|
|
|
|||
|
|
if missing:
|
|||
|
|
print(f'\n缺失临床意义的项目 ({len(missing)}):')
|
|||
|
|
for abb in sorted(missing):
|
|||
|
|
print(f' {abb}')
|
|||
|
|
else:
|
|||
|
|
print('\n所有配置项目都有临床意义!')
|
|||
|
|
|
|||
|
|
if __name__ == '__main__':
|
|||
|
|
main()
|