141 lines
4.8 KiB
Python
141 lines
4.8 KiB
Python
"""
|
||
从模板文件中提取所有检测项目的临床意义
|
||
"""
|
||
from docx import Document
|
||
import json
|
||
import re
|
||
|
||
def extract_all_explanations():
|
||
doc = Document('template_complete.docx')
|
||
|
||
explanations = {}
|
||
|
||
# 遍历所有表格
|
||
for table_idx, table in enumerate(doc.tables):
|
||
rows = table.rows
|
||
if len(rows) < 2:
|
||
continue
|
||
|
||
# 检查是否是检测项目表格(通过表头判断)
|
||
header_text = ' '.join([cell.text.strip() for cell in rows[0].cells])
|
||
|
||
# 遍历每一行
|
||
current_abb = None
|
||
for row_idx, row in enumerate(rows):
|
||
cells = row.cells
|
||
if not cells:
|
||
continue
|
||
|
||
# 获取第一列文本(通常是ABB)
|
||
first_cell_text = cells[0].text.strip()
|
||
|
||
# 跳过表头行
|
||
if 'Abb' in first_cell_text or '简称' in first_cell_text:
|
||
continue
|
||
|
||
# 检查是否是ABB行(短文本,不是临床意义)
|
||
if first_cell_text and len(first_cell_text) < 40:
|
||
if not first_cell_text.startswith('Clinical') and not first_cell_text.startswith('临床'):
|
||
# 可能是ABB
|
||
current_abb = first_cell_text
|
||
|
||
# 查找临床意义
|
||
for cell in cells:
|
||
text = cell.text.strip()
|
||
if 'Clinical Significance:' in text and '临床意义:' in text:
|
||
# 提取英文和中文
|
||
parts = text.split('临床意义:')
|
||
if len(parts) == 2:
|
||
en = parts[0].replace('Clinical Significance:', '').strip()
|
||
cn = parts[1].strip()
|
||
|
||
if current_abb and en and cn:
|
||
# 标准化ABB名称
|
||
abb_key = current_abb.upper().strip()
|
||
# 处理特殊字符
|
||
abb_key = abb_key.replace(' - ', '-').replace('(', '(').replace(')', ')')
|
||
|
||
if abb_key not in explanations:
|
||
explanations[abb_key] = {
|
||
'clinical_en': en,
|
||
'clinical_cn': cn
|
||
}
|
||
print(f'提取: {abb_key}')
|
||
|
||
return explanations
|
||
|
||
def main():
|
||
print('从模板提取临床意义...')
|
||
print('=' * 60)
|
||
|
||
template_explanations = extract_all_explanations()
|
||
print(f'\n从模板提取了 {len(template_explanations)} 个项目')
|
||
|
||
# 读取现有文件
|
||
try:
|
||
with open('template_explanations.json', 'r', encoding='utf-8') as f:
|
||
existing = json.load(f)
|
||
print(f'现有文件中有 {len(existing)} 个项目')
|
||
except:
|
||
existing = {}
|
||
print('创建新文件')
|
||
|
||
# 用模板内容更新(模板优先)
|
||
updated_count = 0
|
||
for abb, exp in template_explanations.items():
|
||
if abb not in existing or existing[abb] != exp:
|
||
existing[abb] = exp
|
||
updated_count += 1
|
||
|
||
# 保存
|
||
with open('template_explanations.json', 'w', encoding='utf-8') as f:
|
||
json.dump(existing, f, ensure_ascii=False, indent=2)
|
||
|
||
print(f'\n更新了 {updated_count} 个项目')
|
||
print(f'最终文件包含 {len(existing)} 个项目')
|
||
|
||
# 检查配置文件中的项目是否都有临床意义
|
||
print('\n' + '=' * 60)
|
||
print('检查配置文件中的项目覆盖情况...')
|
||
|
||
with open('abb_mapping_config.json', 'r', encoding='utf-8') as f:
|
||
config = json.load(f)
|
||
|
||
config_abbs = set()
|
||
for module_name, module_data in config.get('modules', {}).items():
|
||
for item in module_data.get('items', []):
|
||
abb = item.get('abb', '').upper().strip()
|
||
abb = abb.replace(' - ', '-').replace('(', '(').replace(')', ')')
|
||
config_abbs.add(abb)
|
||
|
||
# 检查缺失
|
||
missing = []
|
||
for abb in config_abbs:
|
||
if abb not in existing:
|
||
# 尝试一些变体
|
||
found = False
|
||
variants = [
|
||
abb,
|
||
abb.replace('-', ' '),
|
||
abb.replace(' ', '-'),
|
||
abb.replace('%', ''),
|
||
abb + ' COUNT',
|
||
abb + ' TYPE',
|
||
]
|
||
for v in variants:
|
||
if v in existing:
|
||
found = True
|
||
break
|
||
if not found:
|
||
missing.append(abb)
|
||
|
||
if missing:
|
||
print(f'\n缺失临床意义的项目 ({len(missing)}):')
|
||
for abb in sorted(missing):
|
||
print(f' {abb}')
|
||
else:
|
||
print('\n所有配置项目都有临床意义!')
|
||
|
||
if __name__ == '__main__':
|
||
main()
|