初始化医疗报告生成项目,添加核心代码文件

This commit is contained in:
2026-02-13 18:32:52 +08:00
commit faaf2158d4
69 changed files with 29836 additions and 0 deletions

177
backend/xml_safe_save.py Normal file
View File

@@ -0,0 +1,177 @@
"""
安全保存模块 - 使用 lxml 精确处理 XML 元素
"""
import zipfile
import shutil
import os
import re
from pathlib import Path
from lxml import etree
def safe_save(doc, output_path, template_path):
"""
安全保存 - 使用 lxml 精确处理 XML
策略:
1. 先保存文档到临时文件
2. 使用 lxml 解析 XML
3. 从模板复制前四页元素(到 Client Health Program 为止)
4. 从处理后文件复制 Client Health Program 之后的所有内容
5. 合并并保存
"""
import tempfile
output_path = Path(output_path)
template_path = Path(template_path)
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
temp_fd, temp_path = tempfile.mkstemp(suffix='.docx')
os.close(temp_fd)
try:
# 1. 保存到临时文件
doc.save(temp_path)
# 2. 读取模板 XML
with zipfile.ZipFile(template_path, 'r') as z:
template_xml = z.read('word/document.xml')
template_tree = etree.fromstring(template_xml)
template_body = template_tree.find('.//w:body', ns)
# 3. 读取处理后 XML
with zipfile.ZipFile(temp_path, 'r') as z:
modified_xml = z.read('word/document.xml')
modified_tree = etree.fromstring(modified_xml)
modified_body = modified_tree.find('.//w:body', ns)
if template_body is None or modified_body is None:
print(" [安全保存] 无法解析 XML body")
shutil.copy(temp_path, output_path)
return
template_children = list(template_body)
modified_children = list(modified_body)
# 4. 找到模板中的保护边界Client Health Program 之后)
boundary_pos = -1
for i, elem in enumerate(template_children):
text = ''.join(elem.itertext()).strip()
if 'Client Health Program' in text or '客户健康方案' in text:
boundary_pos = i + 1 # 包括这个元素
break
if boundary_pos < 0:
# 默认使用 80 个元素
boundary_pos = min(80, len(template_children))
# 5. 找到处理后文件中的数据起始位置
# 关键修改:从 Client Health Program 之后开始,而不是从 health report analysis 开始
# 这样可以保留 Functional Medical Health Advice 等内容
data_start_pos = -1
# 首先尝试找 Client Health Program 的位置
for i, elem in enumerate(modified_children):
text = ''.join(elem.itertext()).strip()
if 'Client Health Program' in text or '客户健康方案' in text:
data_start_pos = i + 1 # 从 Client Health Program 之后开始
print(f" [安全保存] 找到 Client Health Program 位置: {i}")
break
# 如果找不到,使用备用关键词
if data_start_pos < 0:
start_keywords = ['health report analysis', '健康报告分析',
'abnormal index', '异常指标',
'functional medical health advice', '功能医学健康建议',
'urine detection', '尿液检测']
for i, elem in enumerate(modified_children):
text = ''.join(elem.itertext()).strip().lower()
if any(kw in text for kw in start_keywords):
data_start_pos = i
break
if data_start_pos < 0:
data_start_pos = boundary_pos
print(f" [安全保存] 边界位置:{boundary_pos}, 数据起始:{data_start_pos}")
# 6. 清空模板 body重新构建
# 保存模板的 sectPr 元素(包含页脚引用)
sectPr = None
for elem in template_children:
if elem.tag.endswith('}sectPr'):
sectPr = etree.fromstring(etree.tostring(elem))
break
# 清空 body
for elem in list(template_body):
template_body.remove(elem)
# 7. 添加模板的前 boundary_pos 个元素(前四页)
# 重新读取模板以获取原始元素
with zipfile.ZipFile(template_path, 'r') as z:
orig_template_xml = z.read('word/document.xml')
orig_template_tree = etree.fromstring(orig_template_xml)
orig_template_body = orig_template_tree.find('.//w:body', ns)
orig_template_children = list(orig_template_body)
protected_count = 0
for i in range(min(boundary_pos, len(orig_template_children))):
elem = orig_template_children[i]
if elem.tag.endswith('}sectPr'):
continue
elem_copy = etree.fromstring(etree.tostring(elem))
template_body.append(elem_copy)
protected_count += 1
# 8. 添加处理后文件的数据部分(从 Client Health Program 之后开始)
data_count = 0
for i in range(data_start_pos, len(modified_children)):
elem = modified_children[i]
if elem.tag.endswith('}sectPr'):
continue
elem_copy = etree.fromstring(etree.tostring(elem))
template_body.append(elem_copy)
data_count += 1
# 9. 添加 sectPr
if sectPr is not None:
template_body.append(sectPr)
print(f" [安全保存] 保护部分:{protected_count}, 数据部分:{data_count}")
# 10. 保存 XML
new_xml = etree.tostring(template_tree, xml_declaration=True, encoding='UTF-8', standalone='yes')
# 11. 基于模板创建输出文件
temp_result = str(output_path) + '.temp.docx'
with zipfile.ZipFile(template_path, 'r') as zin:
with zipfile.ZipFile(temp_result, 'w', zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
if item.filename == 'word/document.xml':
zout.writestr(item, new_xml)
else:
zout.writestr(item, zin.read(item.filename))
# 12. 移动到最终位置
if output_path.exists():
output_path.unlink()
shutil.move(temp_result, output_path)
print(f" [安全保存] ✓ 完成")
except Exception as e:
print(f" [安全保存] 错误: {e}")
import traceback
traceback.print_exc()
# 回退到普通保存
doc.save(output_path)
finally:
for f in [temp_path, str(output_path) + '.temp.docx']:
if os.path.exists(f):
try:
os.remove(f)
except:
pass