初始化医疗报告生成项目,添加核心代码文件
This commit is contained in:
551
backend/test_extraction_logic.py
Normal file
551
backend/test_extraction_logic.py
Normal file
@@ -0,0 +1,551 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
测试提取逻辑 - 不调用OCR/DeepSeek API,纯本地测试
|
||||
测试内容:
|
||||
1. parse_medical_data_v2: OCR文本 → 检测项解析
|
||||
2. classify_abb_module: ABB/项目名 → 模块分类(含中文关键词)
|
||||
3. match_with_template: 提取数据 → 模板匹配
|
||||
"""
|
||||
import sys
|
||||
import os
|
||||
import io
|
||||
import json
|
||||
|
||||
# 修复 Windows 终端 UTF-8
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
||||
|
||||
# 确保 backend 目录在 path 中
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from parse_medical_v2 import parse_medical_data_v2, clean_extracted_data_v2
|
||||
from extract_and_fill_report import classify_abb_module, match_with_template
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 测试1: classify_abb_module - ABB硬编码映射
|
||||
# ============================================================
|
||||
def test_abb_mapping():
|
||||
"""测试ABB硬编码映射能否正确分类"""
|
||||
print("\n" + "=" * 70)
|
||||
print("[测试1] ABB硬编码映射")
|
||||
print("=" * 70)
|
||||
|
||||
test_cases = [
|
||||
# (abb, project_name, expected_module)
|
||||
# 尿检
|
||||
("COLOR", "Color", "Urine Detection"),
|
||||
("PH", "pH", "Urine Detection"),
|
||||
("PRO", "Protein", "Urine Detection"),
|
||||
("SG", "Specific Gravity", "Urine Detection"),
|
||||
# 血常规
|
||||
("WBC", "White Blood Cell", "Complete Blood Count"),
|
||||
("RBC", "Red Blood Cell", "Complete Blood Count"),
|
||||
("HGB", "Hemoglobin", "Complete Blood Count"),
|
||||
("PLT", "Platelet Count", "Complete Blood Count"),
|
||||
("ESR", "ESR 1 Hour", "Complete Blood Count"),
|
||||
# 肝功能
|
||||
("ALT", "Alanine Aminotransferase", "Liver Function"),
|
||||
("AST", "Aspartate Aminotransferase", "Liver Function"),
|
||||
("GGT", "Gamma GT", "Liver Function"),
|
||||
("TBIL", "Total Bilirubin", "Liver Function"),
|
||||
("ALB", "Albumin", "Liver Function"),
|
||||
# 肾功能
|
||||
("BUN", "Blood Urea Nitrogen", "Kidney Function"),
|
||||
("CREA", "Creatinine", "Kidney Function"),
|
||||
("UA", "Uric Acid", "Kidney Function"),
|
||||
# 血脂
|
||||
("TC", "Total Cholesterol", "Lipid Panel"),
|
||||
("TG", "Triglyceride", "Lipid Panel"),
|
||||
("HDL", "HDL Cholesterol", "Lipid Panel"),
|
||||
("LDL", "LDL Cholesterol", "Lipid Panel"),
|
||||
# 电解质
|
||||
("NA", "Sodium", "Electrolytes"),
|
||||
("K", "Potassium", "Electrolytes"),
|
||||
("CL", "Chloride", "Electrolytes"),
|
||||
("CA", "Calcium", "Electrolytes"),
|
||||
# 血糖
|
||||
("FPG", "Fasting Glucose", "Glucose"),
|
||||
("HBA1C", "HbA1c", "Glucose"),
|
||||
# 甲状腺
|
||||
("TSH", "TSH", "Thyroid"),
|
||||
("FT3", "Free T3", "Thyroid"),
|
||||
("FT4", "Free T4", "Thyroid"),
|
||||
# 激素
|
||||
("E2", "Estradiol", "Hormone"),
|
||||
("FSH", "FSH", "Hormone"),
|
||||
("LH", "LH", "Hormone"),
|
||||
("CORTISOL", "Cortisol", "Hormone"),
|
||||
# 肿瘤标志物
|
||||
("AFP", "Alpha Fetoprotein", "Tumor Markers"),
|
||||
("CEA", "CEA", "Tumor Markers"),
|
||||
("CA125", "CA125", "Tumor Markers"),
|
||||
("PSA", "PSA", "Tumor Markers"),
|
||||
# 凝血
|
||||
("PT", "Prothrombin Time", "Coagulation"),
|
||||
("APTT", "APTT", "Coagulation"),
|
||||
("FIB", "Fibrinogen", "Coagulation"),
|
||||
# 传染病
|
||||
("HBSAG", "HBsAg", "Infectious Disease"),
|
||||
("HIV", "HIV", "Infectious Disease"),
|
||||
# 免疫功能
|
||||
("IGG", "IgG", "Immune Function"),
|
||||
("C3", "Complement C3", "Immune Function"),
|
||||
("CRP", "CRP", "Immune Function"),
|
||||
# 骨代谢
|
||||
("OSTE", "Osteocalcin", "Bone Metabolism"),
|
||||
("PTH", "PTH", "Bone Metabolism"),
|
||||
# 重金属
|
||||
("PB", "Lead", "Heavy Metals"),
|
||||
("HG", "Mercury", "Heavy Metals"),
|
||||
# 维生素
|
||||
("VITB12", "Vitamin B12", "Vitamin"),
|
||||
("FOLATE", "Folate", "Vitamin"),
|
||||
# 同型半胱氨酸
|
||||
("HCY", "Homocysteine", "Homocysteine"),
|
||||
# 血型
|
||||
("ABO", "ABO Blood Group", "Blood Type"),
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
for abb, project, expected in test_cases:
|
||||
result = classify_abb_module(abb, project, api_key=None)
|
||||
if result == expected:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
print(f" [FAIL] ABB={abb}, project={project}")
|
||||
print(f" 期望: {expected}, 实际: {result}")
|
||||
|
||||
print(f"\n 结果: {passed} 通过, {failed} 失败 / 共 {len(test_cases)} 项")
|
||||
return failed == 0
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 测试2: classify_abb_module - 中文关键词匹配
|
||||
# ============================================================
|
||||
def test_chinese_keyword_matching():
|
||||
"""测试中文关键词能否正确匹配模块"""
|
||||
print("\n" + "=" * 70)
|
||||
print("[测试2] 中文关键词匹配")
|
||||
print("=" * 70)
|
||||
|
||||
# 用不在ABB映射中的假ABB,强制走keyword匹配
|
||||
test_cases = [
|
||||
# (abb, project_name_cn, expected_module)
|
||||
# 尿液
|
||||
("X001", "尿液分析", "Urine Detection"),
|
||||
("X002", "尿检常规", "Urine Detection"),
|
||||
("X003", "隐血试验", "Urine Detection"),
|
||||
("X004", "酮体检测", "Urine Detection"),
|
||||
# 血常规
|
||||
("X010", "红细胞计数", "Complete Blood Count"),
|
||||
("X011", "白细胞分类", "Complete Blood Count"),
|
||||
("X012", "血红蛋白测定", "Complete Blood Count"),
|
||||
("X013", "血小板计数", "Complete Blood Count"),
|
||||
("X014", "中性粒细胞百分比", "Complete Blood Count"),
|
||||
("X015", "嗜酸性粒细胞", "Complete Blood Count"),
|
||||
("X016", "单核细胞计数", "Complete Blood Count"),
|
||||
# 肝功能
|
||||
("X020", "肝功能全套", "Liver Function"),
|
||||
("X021", "总蛋白测定", "Liver Function"),
|
||||
("X022", "白蛋白测定", "Liver Function"),
|
||||
("X023", "胆红素测定", "Liver Function"),
|
||||
("X024", "转氨酶检测", "Liver Function"),
|
||||
("X025", "谷氨酰转肽酶", "Liver Function"),
|
||||
# 肾功能
|
||||
("X030", "肾功能检测", "Kidney Function"),
|
||||
("X031", "血清肌酐", "Kidney Function"),
|
||||
("X032", "尿素氮测定", "Kidney Function"),
|
||||
("X033", "尿酸检测", "Kidney Function"),
|
||||
# 血脂
|
||||
("X040", "总胆固醇", "Lipid Panel"),
|
||||
("X041", "甘油三酯测定", "Lipid Panel"),
|
||||
("X042", "高密度脂蛋白", "Lipid Panel"),
|
||||
("X043", "血脂四项", "Lipid Panel"),
|
||||
# 血糖
|
||||
("X050", "空腹血糖测定", "Glucose"),
|
||||
("X051", "糖化血红蛋白检测", "Glucose"),
|
||||
("X052", "随机血糖", "Glucose"),
|
||||
# 甲状腺
|
||||
("X060", "甲状腺功能", "Thyroid"),
|
||||
("X061", "促甲状腺激素", "Thyroid"),
|
||||
# 激素
|
||||
("X070", "雌二醇测定", "Hormone"),
|
||||
("X071", "孕酮检测", "Hormone"),
|
||||
("X072", "睾酮水平", "Hormone"),
|
||||
("X073", "皮质醇测定", "Hormone"),
|
||||
("X074", "催乳素检测", "Hormone"),
|
||||
("X075", "荷尔蒙全套", "Hormone"),
|
||||
("X076", "促卵泡生成素", "Hormone"),
|
||||
("X077", "促黄体生成素", "Hormone"),
|
||||
("X078", "脱氢表雄酮硫酸盐", "Hormone"),
|
||||
("X079", "胰岛素样生长因子", "Hormone"),
|
||||
("X080", "抗缪勒管激素", "Hormone"),
|
||||
# 肿瘤标志物
|
||||
("X090", "肿瘤标志物全套", "Tumor Markers"),
|
||||
("X091", "甲胎蛋白检测", "Tumor Markers"),
|
||||
("X092", "癌胚抗原测定", "Tumor Markers"),
|
||||
("X093", "铁蛋白检测", "Tumor Markers"),
|
||||
("X094", "糖类抗原125", "Tumor Markers"),
|
||||
("X095", "前列腺特异性抗原", "Tumor Markers"),
|
||||
("X096", "鳞状细胞癌抗原", "Tumor Markers"),
|
||||
("X097", "神经元特异性烯醇化酶", "Tumor Markers"),
|
||||
# 凝血
|
||||
("X100", "凝血功能检测", "Coagulation"),
|
||||
("X101", "纤维蛋白原测定", "Coagulation"),
|
||||
# 传染病
|
||||
("X110", "乙肝五项", "Infectious Disease"),
|
||||
("X111", "丙肝抗体", "Infectious Disease"),
|
||||
("X112", "梅毒筛查", "Infectious Disease"),
|
||||
("X113", "传染病四项", "Infectious Disease"),
|
||||
# 免疫功能
|
||||
("X120", "免疫球蛋白测定", "Immune Function"),
|
||||
("X121", "补体C3检测", "Immune Function"),
|
||||
("X122", "c反应蛋白测定", "Immune Function"),
|
||||
("X123", "抗核抗体检测", "Immune Function"),
|
||||
("X124", "类风湿因子测定", "Immune Function"),
|
||||
("X125", "红细胞沉降速率", "Immune Function"),
|
||||
# 骨代谢
|
||||
("X130", "骨代谢标志物", "Bone Metabolism"),
|
||||
("X131", "骨钙素检测", "Bone Metabolism"),
|
||||
("X132", "甲状旁腺激素", "Bone Metabolism"),
|
||||
("X133", "25-羟维生素d检测", "Bone Metabolism"),
|
||||
# 重金属
|
||||
("X140", "微量元素检测", "Heavy Metals"),
|
||||
("X141", "重金属筛查", "Heavy Metals"),
|
||||
# 同型半胱氨酸
|
||||
("X150", "同型半胱氨酸检测", "Homocysteine"),
|
||||
# 血型
|
||||
("X160", "ABO血型鉴定", "Blood Type"),
|
||||
# 电解质
|
||||
("X170", "电解质全套", "Electrolytes"),
|
||||
("X171", "血清钾测定", "Electrolytes"),
|
||||
("X172", "血清钠检测", "Electrolytes"),
|
||||
("X173", "血清钙测定", "Electrolytes"),
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
for abb, project, expected in test_cases:
|
||||
result = classify_abb_module(abb, project, api_key=None)
|
||||
if result == expected:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
print(f" [FAIL] project={project}")
|
||||
print(f" 期望: {expected}, 实际: {result}")
|
||||
|
||||
print(f"\n 结果: {passed} 通过, {failed} 失败 / 共 {len(test_cases)} 项")
|
||||
return failed == 0
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 测试3: parse_medical_data_v2 - OCR文本解析
|
||||
# ============================================================
|
||||
def test_parse_ocr_text():
|
||||
"""测试OCR文本解析能否正确提取检测项"""
|
||||
print("\n" + "=" * 70)
|
||||
print("[测试3] OCR文本解析 (parse_medical_data_v2)")
|
||||
print("=" * 70)
|
||||
|
||||
# 模拟典型的百度OCR提取文本(英文报告格式)
|
||||
sample_ocr_text = """Page 1
|
||||
Patient Name: MR. TEST PATIENT
|
||||
Sex : Male Age : 45Y
|
||||
Collected Date/Time: 20 Jan 2025
|
||||
|
||||
Complete Blood Count
|
||||
Total WBC............... 6.50 *10^3/mm3 (4.0-10.0)
|
||||
Red Blood Cell.......... 4.69 *10^6/mm3 (4.5-5.5)
|
||||
Hemoglobin(Hb)......... 14.2 g/dL (13.0-17.0)
|
||||
Hematocrit(HCT)........ 41.3 % (40-54)
|
||||
MCV.................... 88.1 fL (80-100)
|
||||
MCH.................... 30.3 pg (27-34)
|
||||
MCHC................... 34.4 g/dL (32-36)
|
||||
Platelet Count......... 230 *10^3/mm3 (150-400)
|
||||
Neutrophil............. 62.3 % (40-70)
|
||||
Lymphocyte............. 28.5 % (20-40)
|
||||
Monocyte............... 6.2 % (2-8)
|
||||
Eosinophil............. 2.5 % (1-6)
|
||||
Basophil............... 0.5 % (0-1)
|
||||
ESR 1 Hour............. 8 mm/hr (0-15)
|
||||
|
||||
Liver Function
|
||||
ALT(Alanine Transaminase)...... 25 U/L (0-41)
|
||||
AST(Aspartate Transaminase).... 22 U/L (0-40)
|
||||
GGT( Gamma GT)................. 30 U/L (8-61)
|
||||
ALP(Alkaline Phosphatase)...... 70 U/L (40-130)
|
||||
Total Bilirubin................ 0.8 mg/dL (0.1-1.2)
|
||||
Direct Bilirubin............... 0.2 mg/dL (0-0.3)
|
||||
Total Protein.................. 7.2 g/dL (6.6-8.3)
|
||||
Albumin........................ 4.5 g/dL (3.5-5.2)
|
||||
Globulin....................... 2.7 g/dL (2.0-3.5)
|
||||
|
||||
Kidney Function
|
||||
BUN............................ 15 mg/dL (6-20)
|
||||
Creatinine..................... 0.95 mg/dL (0.67-1.17)
|
||||
Uric Acid...................... 5.8 mg/dL (3.4-7.0)
|
||||
eGFR........................... 92 mL/min (>90)
|
||||
|
||||
Lipid Profile
|
||||
Total Cholesterol.............. 195 mg/dL (<200)
|
||||
Triglyceride................... 120 mg/dL (<150)
|
||||
HDL-Cholesterol................ 55 mg/dL (>40)
|
||||
LDL-Cholesterol(Direct)........ 118 mg/dL (<100)
|
||||
|
||||
Glucose(Fasting)............... 95 mg/dL (74-100)
|
||||
HbA1c.......................... 5.7 % (4.0-5.6)
|
||||
|
||||
Thyroid Function
|
||||
TSH............................ 2.15 mIU/L (0.27-4.2)
|
||||
Free T3........................ 3.2 pg/mL (2.0-4.4)
|
||||
Free T4........................ 1.25 ng/dL (0.93-1.7)
|
||||
|
||||
Hormones
|
||||
Estradiol(E2).................. 28.5 pg/mL (11.3-43.2)
|
||||
Testosterone................... 450 ng/dL (249-836)
|
||||
Cortisol....................... 12.5 ug/dL (6.2-19.4)
|
||||
FSH............................ 5.85 mIU/mL (1.5-12.4)
|
||||
LH(Luteinizing Hormone)....... 4.2 mIU/mL (1.7-8.6)
|
||||
Prolactin...................... 8.5 ng/mL (4.0-15.2)
|
||||
DHEA-Sulphate.................. 280 ug/dL (88.9-427)
|
||||
IGF-1.......................... 165 ng/mL (101-267)
|
||||
|
||||
Tumor Markers
|
||||
AFP(Alpha Fetoprotein)......... 3.2 ng/mL (0-7)
|
||||
CEA(Carcinoembryonic Antigen).. 2.1 ng/mL (0-5)
|
||||
Total PSA...................... 0.8 ng/mL (0-4)
|
||||
CA125.......................... 12.5 U/mL (0-35)
|
||||
|
||||
Coagulation
|
||||
Prothrombin Time(PT)........... 12.5 sec (10-14)
|
||||
APTT........................... 28.3 sec (25-35)
|
||||
Thrombin Time(TT).............. 16.2 sec (14-21)
|
||||
Fibrinogen..................... 2.8 g/L (2.0-4.0)
|
||||
INR............................ 0.93 (0.8-1.2)
|
||||
|
||||
Infectious Disease
|
||||
HBsAg(Hepatitis B Surface Antigen)... Negative
|
||||
HBsAb(Hepatitis B Surface Antibody).. Positive
|
||||
HCV Ab (Hepatitis C Antibody)........ Non Reactive
|
||||
HIV-1/HIV-2 Antibody................. Non Reactive
|
||||
RPR (Rapid Plasma Reagin)............ Non Reactive
|
||||
|
||||
Electrolytes
|
||||
Sodium......................... 140 mmol/L (136-145)
|
||||
Potassium...................... 4.2 mmol/L (3.5-5.1)
|
||||
Chloride....................... 103 mmol/L (98-107)
|
||||
Calcium........................ 9.5 mg/dL (8.6-10.2)
|
||||
|
||||
Immune Function
|
||||
Immunoglobulin G(IgG).......... 1050 mg/dL (700-1600)
|
||||
Immunoglobulin A(IgA).......... 220 mg/dL (70-400)
|
||||
Immunoglobulin M(IgM).......... 95 mg/dL (40-230)
|
||||
Complement C3(B1C)............. 110 mg/dL (90-180)
|
||||
Complement C4.................. 28 mg/dL (10-40)
|
||||
C-Reactive Protein(High Sens).. 0.5 mg/L (<3)
|
||||
|
||||
Bone Metabolism
|
||||
N-mid Osteocalcin.............. 15.2 ng/mL (14-46)
|
||||
PTH(Intact).................... 35 pg/mL (15-65)
|
||||
Vitamin D(25-OH Vitamin D Total) 32 ng/mL (30-100)
|
||||
|
||||
Blood Type
|
||||
ABO Group...................... A
|
||||
Rh Group....................... Positive
|
||||
|
||||
Homocysteine................... 10.5 umol/L (5-15)
|
||||
|
||||
Vitamin B12.................... 450 pg/mL (197-771)
|
||||
Folate......................... 12.3 ng/mL (>3.0)
|
||||
"""
|
||||
|
||||
items = parse_medical_data_v2(sample_ocr_text, "test_sample.pdf")
|
||||
items = clean_extracted_data_v2(items)
|
||||
|
||||
print(f" 解析出 {len(items)} 个检测项")
|
||||
|
||||
# 期望至少能解析出的关键ABB
|
||||
expected_abbs = {
|
||||
'WBC', 'RBC', 'Hb', 'HCT', 'MCV', 'MCH', 'MCHC', 'PLT',
|
||||
'NEUT', 'LYMPH', 'MONO', 'EOS', 'BAS', 'ESR',
|
||||
'ALT', 'AST', 'GGT', 'ALP', 'TBil', 'DBil', 'TP', 'ALB', 'GLB',
|
||||
'BUN', 'Scr', 'UA', 'eGFR',
|
||||
'TC', 'TG', 'HDL', 'LDL',
|
||||
'FBS', 'HbA1C',
|
||||
'TSH', 'FT3', 'FT4',
|
||||
'E2', 'T', 'COR', 'FSH', 'LH', 'PRL', 'DHEAS', 'IGF-1',
|
||||
'AFP', 'CEA', 'TPSA', 'CA125',
|
||||
'PT', 'APTT', 'TT', 'FIB', 'INR',
|
||||
'HBsAg', 'HBsAb', 'HCV', 'HIV', 'TRUST',
|
||||
'Na', 'K', 'Cl', 'Ca',
|
||||
'IgG', 'IgA', 'IgM', 'C3', 'C4', 'hs-CRP',
|
||||
'OST', 'PTH', '25-OH-VD2+D3',
|
||||
'ABO', 'Rh',
|
||||
'Hcy',
|
||||
'VitB12', 'Folate',
|
||||
}
|
||||
|
||||
found_abbs = {item['abb'] for item in items}
|
||||
matched = expected_abbs & found_abbs
|
||||
missing = expected_abbs - found_abbs
|
||||
extra = found_abbs - expected_abbs
|
||||
|
||||
print(f" 期望 {len(expected_abbs)} 个ABB")
|
||||
print(f" 匹配 {len(matched)} 个")
|
||||
|
||||
if missing:
|
||||
print(f" [WARN] 未匹配 {len(missing)} 个: {sorted(missing)}")
|
||||
if extra:
|
||||
print(f" [INFO] 额外识别 {len(extra)} 个: {sorted(extra)}")
|
||||
|
||||
# 打印所有解析出的项目详情
|
||||
print(f"\n {'ABB':<15} {'结果':<12} {'标记':<4} {'单位':<20} {'参考范围'}")
|
||||
print(" " + "-" * 70)
|
||||
for item in sorted(items, key=lambda x: x['abb']):
|
||||
abb = item['abb']
|
||||
result = item.get('result', '')[:10]
|
||||
point = item.get('point', '')
|
||||
unit = item.get('unit', '')[:18]
|
||||
ref = item.get('reference', '')[:25]
|
||||
marker = "✓" if abb in expected_abbs else " "
|
||||
print(f" {marker} {abb:<13} {result:<12} {point:<4} {unit:<20} {ref}")
|
||||
|
||||
coverage = len(matched) / len(expected_abbs) * 100 if expected_abbs else 0
|
||||
print(f"\n 覆盖率: {coverage:.1f}% ({len(matched)}/{len(expected_abbs)})")
|
||||
return coverage >= 70 # 至少70%覆盖率算通过
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 测试4: 分类 + 模板匹配联合测试
|
||||
# ============================================================
|
||||
def test_classify_with_template():
|
||||
"""测试提取数据经过分类后能否正确归入模块"""
|
||||
print("\n" + "=" * 70)
|
||||
print("[测试4] 分类 → 模板匹配联合测试")
|
||||
print("=" * 70)
|
||||
|
||||
# 加载真实配置
|
||||
config_path = os.path.join(os.path.dirname(__file__), "abb_mapping_config.json")
|
||||
if not os.path.exists(config_path):
|
||||
print(" [SKIP] 配置文件不存在")
|
||||
return True
|
||||
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
|
||||
# 模拟提取的数据(混合英文ABB和中文项目名)
|
||||
mock_items = [
|
||||
{"abb": "WBC", "project": "White Blood Cell", "result": "6.5", "point": "", "unit": "*10^3/mm3", "reference": "(4.0-10.0)", "source": "test.pdf"},
|
||||
{"abb": "ALT", "project": "Alanine Aminotransferase", "result": "25", "point": "", "unit": "U/L", "reference": "(0-41)", "source": "test.pdf"},
|
||||
{"abb": "TC", "project": "Total Cholesterol", "result": "195", "point": "", "unit": "mg/dL", "reference": "(<200)", "source": "test.pdf"},
|
||||
{"abb": "TSH", "project": "TSH", "result": "2.15", "point": "", "unit": "mIU/L", "reference": "(0.27-4.2)", "source": "test.pdf"},
|
||||
{"abb": "AFP", "project": "Alpha Fetoprotein", "result": "3.2", "point": "", "unit": "ng/mL", "reference": "(0-7)", "source": "test.pdf"},
|
||||
{"abb": "E2", "project": "Estradiol", "result": "28.5", "point": "", "unit": "pg/mL", "reference": "", "source": "test.pdf"},
|
||||
{"abb": "PT", "project": "Prothrombin Time", "result": "12.5", "point": "", "unit": "sec", "reference": "(10-14)", "source": "test.pdf"},
|
||||
{"abb": "HBsAg", "project": "HBsAg", "result": "Negative", "point": "", "unit": "", "reference": "", "source": "test.pdf"},
|
||||
{"abb": "Na", "project": "Sodium", "result": "140", "point": "", "unit": "mmol/L", "reference": "(136-145)", "source": "test.pdf"},
|
||||
{"abb": "Hcy", "project": "Homocysteine", "result": "10.5", "point": "", "unit": "umol/L", "reference": "(5-15)", "source": "test.pdf"},
|
||||
]
|
||||
|
||||
matched = match_with_template(mock_items, config)
|
||||
print(f"\n 模板匹配结果: {len(matched)} 个项目")
|
||||
|
||||
# 检查每个项目分类
|
||||
for abb in ['WBC', 'ALT', 'TC', 'TSH', 'AFP', 'E2', 'PT', 'HBsAg', 'Na', 'Hcy']:
|
||||
data = matched.get(abb, {})
|
||||
project = data.get('project', '?')
|
||||
result = data.get('result', '?')
|
||||
module = classify_abb_module(abb, project, api_key=None)
|
||||
print(f" {abb:<8} result={result:<10} → [{module}]")
|
||||
|
||||
return len(matched) >= 8
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 测试5: 边界情况 - 关键词冲突
|
||||
# ============================================================
|
||||
def test_keyword_conflicts():
|
||||
"""测试潜在的关键词冲突场景"""
|
||||
print("\n" + "=" * 70)
|
||||
print("[测试5] 关键词冲突/边界测试")
|
||||
print("=" * 70)
|
||||
|
||||
test_cases = [
|
||||
# 长关键词应优先于短关键词
|
||||
("X200", "红细胞沉降速率测定", "Immune Function"), # 不应匹配到 CBC 的 '红细胞'
|
||||
("X201", "红细胞计数", "Complete Blood Count"), # 应正常匹配 '红细胞'
|
||||
# 白蛋白 vs 白细胞
|
||||
("X202", "血清白蛋白", "Liver Function"), # '白蛋白' → Liver
|
||||
("X203", "白细胞分类计数", "Complete Blood Count"), # '白细胞' → CBC
|
||||
# 甲状腺 vs 甲状旁腺
|
||||
("X204", "甲状旁腺激素检测", "Bone Metabolism"), # '甲状旁腺' → Bone
|
||||
("X205", "甲状腺功能五项", "Thyroid"), # '甲状腺' → Thyroid
|
||||
# 维生素D归属
|
||||
("X206", "25-羟维生素d总量", "Bone Metabolism"), # '维生素d' → Bone (非Vitamin)
|
||||
# 尿酸 不应匹配 尿液
|
||||
("X207", "血清尿酸", "Kidney Function"), # '尿酸' → Kidney
|
||||
# 胆固醇 不应匹配 胆红素
|
||||
("X208", "总胆固醇", "Lipid Panel"), # '胆固醇' → Lipid
|
||||
("X209", "总胆红素", "Liver Function"), # '胆红素' → Liver
|
||||
# 免疫缺陷病毒
|
||||
("X210", "人类免疫缺陷病毒抗体", "Infectious Disease"), # 不应匹配 '免疫球蛋白'
|
||||
]
|
||||
|
||||
passed = 0
|
||||
failed = 0
|
||||
for abb, project, expected in test_cases:
|
||||
result = classify_abb_module(abb, project, api_key=None)
|
||||
status = "OK" if result == expected else "FAIL"
|
||||
if result == expected:
|
||||
passed += 1
|
||||
else:
|
||||
failed += 1
|
||||
icon = "✓" if status == "OK" else "✗"
|
||||
print(f" {icon} {project:<25} 期望: {expected:<20} 实际: {result}")
|
||||
|
||||
print(f"\n 结果: {passed} 通过, {failed} 失败 / 共 {len(test_cases)} 项")
|
||||
return failed == 0
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 主函数
|
||||
# ============================================================
|
||||
def main():
|
||||
print("=" * 70)
|
||||
print(" 医疗数据提取逻辑测试")
|
||||
print(" (不调用OCR/DeepSeek API,纯本地离线测试)")
|
||||
print("=" * 70)
|
||||
|
||||
results = {}
|
||||
results["ABB硬编码映射"] = test_abb_mapping()
|
||||
results["中文关键词匹配"] = test_chinese_keyword_matching()
|
||||
results["OCR文本解析"] = test_parse_ocr_text()
|
||||
results["分类+模板匹配"] = test_classify_with_template()
|
||||
results["关键词冲突检测"] = test_keyword_conflicts()
|
||||
|
||||
# 汇总
|
||||
print("\n" + "=" * 70)
|
||||
print(" 测试汇总")
|
||||
print("=" * 70)
|
||||
all_pass = True
|
||||
for name, passed in results.items():
|
||||
icon = "✓ PASS" if passed else "✗ FAIL"
|
||||
print(f" {icon} {name}")
|
||||
if not passed:
|
||||
all_pass = False
|
||||
|
||||
print("=" * 70)
|
||||
if all_pass:
|
||||
print(" 所有测试通过!")
|
||||
else:
|
||||
print(" 存在失败项,请检查上方详情")
|
||||
print("=" * 70)
|
||||
|
||||
return 0 if all_pass else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user