初始化医疗报告生成项目,添加核心代码文件
This commit is contained in:
412
backend/config.py
Normal file
412
backend/config.py
Normal file
@@ -0,0 +1,412 @@
|
||||
"""
|
||||
配置文件 - 统一管理路径和参数
|
||||
"""
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
# 项目根目录
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
BACKEND_ROOT = Path(__file__).parent
|
||||
|
||||
# ==================== 路径配置 ====================
|
||||
|
||||
# PDF输入目录(存放原始医疗报告PDF)
|
||||
PDF_INPUT_DIR = Path(r"c:\Users\UI\Desktop\医疗报告\医疗报告智能体")
|
||||
|
||||
# Word模板文件
|
||||
TEMPLATE_COMPLETE = BACKEND_ROOT / "template_complete.docx" # 用于 extract_and_fill_report.py
|
||||
TEMPLATE_DOCXTPL = PROJECT_ROOT / "template_docxtpl.docx" # 用于 fill_with_docxtpl.py
|
||||
|
||||
# 配置文件
|
||||
ABB_MAPPING_CONFIG = BACKEND_ROOT / "abb_mapping_config.json"
|
||||
|
||||
# 输出目录
|
||||
REPORTS_OUTPUT_DIR = BACKEND_ROOT / "reports"
|
||||
REPORTS_OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
# 缓存文件
|
||||
EXTRACTED_DATA_FILE = BACKEND_ROOT / "extracted_medical_data.json"
|
||||
ANALYZED_DATA_FILE = BACKEND_ROOT / "analyzed_medical_data.json"
|
||||
DEEPSEEK_PROCESSED_DATA_FILE = BACKEND_ROOT / "deepseek_processed_data.json"
|
||||
DEEPSEEK_CACHE_FILE = BACKEND_ROOT / "deepseek_cache.json"
|
||||
|
||||
# ==================== OCR配置 ====================
|
||||
|
||||
# 百度OCR配置(从环境变量读取)
|
||||
BAIDU_OCR_APP_ID = os.getenv("BAIDU_OCR_APP_ID", "")
|
||||
BAIDU_OCR_API_KEY = os.getenv("BAIDU_OCR_API_KEY", "")
|
||||
BAIDU_OCR_SECRET_KEY = os.getenv("BAIDU_OCR_SECRET_KEY", "")
|
||||
|
||||
# ==================== LLM配置 ====================
|
||||
|
||||
# DeepSeek配置
|
||||
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY", "")
|
||||
DEEPSEEK_API_BASE = os.getenv("DEEPSEEK_API_BASE", "https://api.deepseek.com")
|
||||
|
||||
# OpenAI配置
|
||||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
|
||||
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE", "")
|
||||
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
|
||||
|
||||
# Ollama配置
|
||||
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
||||
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5:7b")
|
||||
|
||||
# Coze配置
|
||||
COZE_API_KEY = os.getenv("COZE_API_KEY", "")
|
||||
COZE_WORKFLOW_ID = os.getenv("COZE_WORKFLOW_ID", "")
|
||||
|
||||
# ==================== 功能开关 ====================
|
||||
|
||||
# 是否启用DeepSeek分析(在extract_and_fill_report.py中使用)
|
||||
ENABLE_DEEPSEEK_ANALYSIS = os.getenv("ENABLE_DEEPSEEK_ANALYSIS", "false").lower() == "true"
|
||||
|
||||
# ==================== 辅助函数 ====================
|
||||
|
||||
def load_abb_config() -> dict:
|
||||
"""
|
||||
加载ABB映射配置文件
|
||||
|
||||
Returns:
|
||||
dict: 包含以下键的字典:
|
||||
- modules: 模块配置字典
|
||||
- abb_list: 所有ABB列表
|
||||
- abb_to_module: ABB到模块的映射
|
||||
- abb_to_info: ABB到详细信息的映射
|
||||
- abb_aliases: ABB别名映射
|
||||
- module_aliases: 模块名称别名映射
|
||||
"""
|
||||
import json
|
||||
|
||||
result = {
|
||||
'modules': {},
|
||||
'abb_list': [],
|
||||
'abb_to_module': {},
|
||||
'abb_to_info': {},
|
||||
'abb_aliases': {},
|
||||
'module_aliases': {}
|
||||
}
|
||||
|
||||
if not ABB_MAPPING_CONFIG.exists():
|
||||
return result
|
||||
|
||||
with open(ABB_MAPPING_CONFIG, 'r', encoding='utf-8') as f:
|
||||
config = json.load(f)
|
||||
|
||||
# 新格式:基于模块的配置
|
||||
if 'modules' in config and isinstance(config['modules'], dict):
|
||||
result['modules'] = config['modules']
|
||||
result['abb_aliases'] = config.get('abb_aliases', {})
|
||||
result['module_aliases'] = config.get('module_aliases', {})
|
||||
|
||||
# 定义大小写敏感的ABB(这些ABB有大小写冲突,必须精确匹配)
|
||||
case_sensitive_abbs = {'TG', 'Tg'} # TG=甘油三酯, Tg=甲状腺球蛋白
|
||||
|
||||
for module_name, module_data in config['modules'].items():
|
||||
items = module_data.get('items', [])
|
||||
for item in items:
|
||||
abb = item.get('abb', '')
|
||||
if abb:
|
||||
result['abb_list'].append(abb)
|
||||
info = {
|
||||
'abb': abb,
|
||||
'project': item.get('project', ''),
|
||||
'project_cn': item.get('project_cn', ''),
|
||||
'module': module_name,
|
||||
'module_cn': module_data.get('cn_name', '')
|
||||
}
|
||||
# 对于大小写敏感的ABB,使用原始大小写作为key
|
||||
if abb in case_sensitive_abbs:
|
||||
result['abb_to_module'][abb] = module_name
|
||||
result['abb_to_info'][abb] = info
|
||||
else:
|
||||
# 其他ABB使用大写作为key(保持向后兼容)
|
||||
result['abb_to_module'][abb.upper()] = module_name
|
||||
result['abb_to_info'][abb.upper()] = info
|
||||
|
||||
# 旧格式:items列表
|
||||
elif 'items' in config:
|
||||
for item in config['items']:
|
||||
abb = item.get('abb', '')
|
||||
if abb:
|
||||
result['abb_list'].append(abb)
|
||||
module = item.get('module', '')
|
||||
result['abb_to_module'][abb.upper()] = module
|
||||
result['abb_to_info'][abb.upper()] = {
|
||||
'abb': abb,
|
||||
'project': item.get('project', ''),
|
||||
'project_cn': item.get('project_cn', ''),
|
||||
'module': module
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def normalize_abb(abb: str, config: dict = None) -> str:
|
||||
"""
|
||||
标准化ABB名称(处理别名)
|
||||
|
||||
Args:
|
||||
abb: 原始ABB名称
|
||||
config: ABB配置(可选,如果不提供则自动加载)
|
||||
|
||||
Returns:
|
||||
标准化后的ABB名称
|
||||
"""
|
||||
if config is None:
|
||||
config = load_abb_config()
|
||||
|
||||
aliases = config.get('abb_aliases', {})
|
||||
abb_upper = abb.upper()
|
||||
|
||||
# 检查是否有别名
|
||||
if abb in aliases:
|
||||
return aliases[abb]
|
||||
if abb_upper in aliases:
|
||||
return aliases[abb_upper]
|
||||
|
||||
return abb
|
||||
|
||||
|
||||
def normalize_module_name(module: str, config: dict = None) -> str:
|
||||
"""
|
||||
标准化模块名称(处理DeepSeek返回的不同名称)
|
||||
|
||||
Args:
|
||||
module: 原始模块名称
|
||||
config: ABB配置(可选,如果不提供则自动加载)
|
||||
|
||||
Returns:
|
||||
标准化后的模块名称
|
||||
"""
|
||||
if config is None:
|
||||
config = load_abb_config()
|
||||
|
||||
module_aliases = config.get('module_aliases', {})
|
||||
|
||||
# 检查是否有别名
|
||||
if module in module_aliases:
|
||||
return module_aliases[module]
|
||||
|
||||
# 尝试不区分大小写匹配
|
||||
module_lower = module.lower()
|
||||
for alias, standard in module_aliases.items():
|
||||
if alias.lower() == module_lower:
|
||||
return standard
|
||||
|
||||
return module
|
||||
|
||||
|
||||
def get_standard_module_order() -> list:
|
||||
"""
|
||||
获取标准模块顺序(基于2.pdf模板)
|
||||
优先从配置文件读取order字段,确保与2.pdf一致
|
||||
|
||||
Returns:
|
||||
模块名称列表,按标准顺序排列
|
||||
"""
|
||||
config = load_abb_config()
|
||||
modules = config.get('modules', {})
|
||||
|
||||
# 如果配置中有order字段,按order排序
|
||||
if modules and any('order' in m for m in modules.values()):
|
||||
sorted_modules = sorted(
|
||||
modules.items(),
|
||||
key=lambda x: x[1].get('order', 999)
|
||||
)
|
||||
return [name for name, _ in sorted_modules]
|
||||
|
||||
# 默认顺序(与2.pdf一致)
|
||||
return [
|
||||
'Urine Test', # 1. 尿液检测 (第16-19页)
|
||||
'Complete Blood Count', # 2. 血常规 (第20-26页)
|
||||
'Blood Sugar', # 3. 血糖 (第27-28页)
|
||||
'Lipid Profile', # 4. 血脂 (第29-31页)
|
||||
'Blood Type', # 5. 血型 (第32-33页)
|
||||
'Blood Coagulation', # 6. 凝血功能 (第34-36页)
|
||||
'Four Infectious Diseases', # 7. 传染病四项 (第37-40页)
|
||||
'Serum Electrolytes', # 8. 血电解质 (第41-43页)
|
||||
'Liver Function', # 9. 肝功能 (第44-47页)
|
||||
'Kidney Function', # 10. 肾功能 (第48-49页)
|
||||
'Myocardial Enzyme', # 11. 心肌酶谱 (第50-51页)
|
||||
'Thyroid Function', # 12. 甲状腺功能 (第52-54页)
|
||||
'Thromboembolism', # 13. 心脑血管风险因子 (第55-56页)
|
||||
'Bone Metabolism', # 14. 骨代谢 (第57-59页)
|
||||
'Microelement', # 15. 微量元素 (第60-62页)
|
||||
'Lymphocyte Subpopulation', # 16. 淋巴细胞亚群 (第63-64页)
|
||||
'Humoral Immunity', # 17. 体液免疫 (第65-67页)
|
||||
'Inflammatory Reaction', # 18. 炎症反应 (第68-69页)
|
||||
'Autoantibody', # 19. 自身抗体 (第70-71页)
|
||||
'Female Hormone', # 20. 女性荷尔蒙 (第72-75页)
|
||||
'Male Hormone', # 21. 男性荷尔蒙 (第76-79页)
|
||||
'Tumor Markers', # 22. 肿瘤标记物 (第80-84页)
|
||||
'Imaging', # 23. 影像学检查 (第85-88页)
|
||||
'Female-specific', # 24. 女性专项检查 (第89-91页)
|
||||
]
|
||||
|
||||
|
||||
def get_standard_item_order(module_name: str, config: dict = None) -> list:
|
||||
"""
|
||||
获取指定模块的标准项目顺序
|
||||
|
||||
Args:
|
||||
module_name: 模块名称
|
||||
config: ABB配置(可选)
|
||||
|
||||
Returns:
|
||||
该模块的ABB列表,按标准顺序排列
|
||||
"""
|
||||
if config is None:
|
||||
config = load_abb_config()
|
||||
|
||||
modules = config.get('modules', {})
|
||||
if module_name in modules:
|
||||
items = modules[module_name].get('items', [])
|
||||
return [item.get('abb', '') for item in items]
|
||||
|
||||
return []
|
||||
|
||||
|
||||
def sort_items_by_standard_order(items: list, module_name: str, config: dict = None) -> list:
|
||||
"""
|
||||
按标准顺序排序项目列表
|
||||
|
||||
Args:
|
||||
items: [(abb, data), ...] 格式的项目列表
|
||||
module_name: 模块名称
|
||||
config: ABB配置(可选)
|
||||
|
||||
Returns:
|
||||
排序后的项目列表,标准项目在前,非标准项目在后
|
||||
"""
|
||||
if config is None:
|
||||
config = load_abb_config()
|
||||
|
||||
standard_order = get_standard_item_order(module_name, config)
|
||||
abb_aliases = config.get('abb_aliases', {})
|
||||
|
||||
# 创建顺序映射(先精确匹配,再大写匹配)
|
||||
# 大小写敏感的ABB(如TG/Tg)需要精确匹配
|
||||
case_sensitive_abbs = {'TG', 'Tg'}
|
||||
order_map_exact = {abb: i for i, abb in enumerate(standard_order)}
|
||||
order_map_upper = {abb.upper(): i for i, abb in enumerate(standard_order) if abb not in case_sensitive_abbs}
|
||||
|
||||
# 分离标准项目和非标准项目
|
||||
standard_items = []
|
||||
extra_items = []
|
||||
|
||||
for abb, data in items:
|
||||
# 先标准化ABB(处理别名)
|
||||
normalized_abb = normalize_abb(abb, config)
|
||||
|
||||
# 先尝试精确匹配(使用标准化后的ABB)
|
||||
if normalized_abb in order_map_exact:
|
||||
standard_items.append((abb, data, order_map_exact[normalized_abb]))
|
||||
# 再尝试原始ABB精确匹配
|
||||
elif abb in order_map_exact:
|
||||
standard_items.append((abb, data, order_map_exact[abb]))
|
||||
# 再尝试大写匹配(排除大小写敏感的ABB)
|
||||
elif normalized_abb.upper() in order_map_upper:
|
||||
standard_items.append((abb, data, order_map_upper[normalized_abb.upper()]))
|
||||
elif abb.upper() in order_map_upper:
|
||||
standard_items.append((abb, data, order_map_upper[abb.upper()]))
|
||||
else:
|
||||
extra_items.append((abb, data))
|
||||
|
||||
# 标准项目按顺序排序
|
||||
standard_items.sort(key=lambda x: x[2])
|
||||
sorted_standard = [(abb, data) for abb, data, _ in standard_items]
|
||||
|
||||
# 非标准项目按ABB字母顺序排序,添加到末尾
|
||||
extra_items.sort(key=lambda x: x[0].upper())
|
||||
|
||||
return sorted_standard + extra_items
|
||||
|
||||
|
||||
def get_output_path(prefix: str = "filled_report", suffix: str = ".docx") -> Path:
|
||||
"""
|
||||
生成输出文件路径(自动递增版本号)
|
||||
|
||||
Args:
|
||||
prefix: 文件名前缀
|
||||
suffix: 文件后缀
|
||||
|
||||
Returns:
|
||||
输出文件路径
|
||||
"""
|
||||
existing = list(REPORTS_OUTPUT_DIR.glob(f"{prefix}_*.docx"))
|
||||
if not existing:
|
||||
version = 1
|
||||
else:
|
||||
versions = []
|
||||
for p in existing:
|
||||
name = p.stem
|
||||
try:
|
||||
# 尝试提取版本号(格式: prefix_v1, prefix_20240101_120000等)
|
||||
if name.startswith(prefix):
|
||||
rest = name[len(prefix):]
|
||||
if rest.startswith('_v'):
|
||||
v_str = rest[2:]
|
||||
if v_str.isdigit():
|
||||
versions.append(int(v_str))
|
||||
elif rest.startswith('_') and len(rest) > 1:
|
||||
# 尝试提取时间戳后的版本号
|
||||
parts = rest.split('_')
|
||||
if len(parts) > 1:
|
||||
last_part = parts[-1]
|
||||
if last_part.isdigit():
|
||||
versions.append(int(last_part))
|
||||
except:
|
||||
continue
|
||||
version = max(versions) + 1 if versions else 1
|
||||
|
||||
return REPORTS_OUTPUT_DIR / f"{prefix}_v{version}{suffix}"
|
||||
|
||||
|
||||
def check_required_files() -> dict:
|
||||
"""
|
||||
检查必需文件是否存在
|
||||
|
||||
Returns:
|
||||
dict: {文件路径: 是否存在}
|
||||
"""
|
||||
return {
|
||||
"template_complete": TEMPLATE_COMPLETE.exists(),
|
||||
"template_docxtpl": TEMPLATE_DOCXTPL.exists(),
|
||||
"config_file": ABB_MAPPING_CONFIG.exists(),
|
||||
"pdf_input_dir": PDF_INPUT_DIR.exists(),
|
||||
}
|
||||
|
||||
|
||||
def print_config_summary():
|
||||
"""打印配置摘要"""
|
||||
print("\n" + "=" * 70)
|
||||
print("配置摘要")
|
||||
print("=" * 70)
|
||||
|
||||
print("\n[路径配置]")
|
||||
print(f" PDF输入目录: {PDF_INPUT_DIR}")
|
||||
print(f" 模板文件 (extract): {TEMPLATE_COMPLETE}")
|
||||
print(f" 模板文件 (docxtpl): {TEMPLATE_DOCXTPL}")
|
||||
print(f" 配置文件: {ABB_MAPPING_CONFIG}")
|
||||
print(f" 输出目录: {REPORTS_OUTPUT_DIR}")
|
||||
|
||||
print("\n[文件检查]")
|
||||
files_status = check_required_files()
|
||||
for name, exists in files_status.items():
|
||||
status = "[OK] 存在" if exists else "[X] 缺失"
|
||||
print(f" {name}: {status}")
|
||||
|
||||
print("\n[API配置]")
|
||||
print(f" 百度OCR: {'[OK] 已配置' if BAIDU_OCR_API_KEY else '[X] 未配置'}")
|
||||
print(f" DeepSeek: {'[OK] 已配置' if DEEPSEEK_API_KEY else '[X] 未配置'}")
|
||||
print(f" OpenAI: {'[OK] 已配置' if OPENAI_API_KEY else '[X] 未配置'}")
|
||||
print(f" Ollama: {'[OK] 已配置' if OLLAMA_HOST else '[X] 未配置'}")
|
||||
print(f" Coze: {'[OK] 已配置' if COZE_API_KEY else '[X] 未配置'}")
|
||||
|
||||
print("=" * 70 + "\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 测试配置
|
||||
print_config_summary()
|
||||
Reference in New Issue
Block a user