""" 安全保存模块 - 使用 lxml 精确处理 XML 元素 """ import zipfile import shutil import os import re from pathlib import Path from lxml import etree def safe_save(doc, output_path, template_path): """ 安全保存 - 使用 lxml 精确处理 XML 策略: 1. 先保存文档到临时文件 2. 使用 lxml 解析 XML 3. 从模板复制前四页元素(到 Client Health Program 为止) 4. 从处理后文件复制 Client Health Program 之后的所有内容 5. 合并并保存 """ import tempfile output_path = Path(output_path) template_path = Path(template_path) ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} temp_fd, temp_path = tempfile.mkstemp(suffix='.docx') os.close(temp_fd) try: # 1. 保存到临时文件 doc.save(temp_path) # 2. 读取模板 XML with zipfile.ZipFile(template_path, 'r') as z: template_xml = z.read('word/document.xml') template_tree = etree.fromstring(template_xml) template_body = template_tree.find('.//w:body', ns) # 3. 读取处理后 XML with zipfile.ZipFile(temp_path, 'r') as z: modified_xml = z.read('word/document.xml') modified_tree = etree.fromstring(modified_xml) modified_body = modified_tree.find('.//w:body', ns) if template_body is None or modified_body is None: print(" [安全保存] 无法解析 XML body") shutil.copy(temp_path, output_path) return template_children = list(template_body) modified_children = list(modified_body) # 4. 找到模板中的保护边界(Client Health Program 之后) boundary_pos = -1 for i, elem in enumerate(template_children): text = ''.join(elem.itertext()).strip() if 'Client Health Program' in text or '客户健康方案' in text: boundary_pos = i + 1 # 包括这个元素 break if boundary_pos < 0: # 默认使用 80 个元素 boundary_pos = min(80, len(template_children)) # 5. 找到处理后文件中的数据起始位置 # 关键修改:从 Client Health Program 之后开始,而不是从 health report analysis 开始 # 这样可以保留 Functional Medical Health Advice 等内容 data_start_pos = -1 # 首先尝试找 Client Health Program 的位置 for i, elem in enumerate(modified_children): text = ''.join(elem.itertext()).strip() if 'Client Health Program' in text or '客户健康方案' in text: data_start_pos = i + 1 # 从 Client Health Program 之后开始 print(f" [安全保存] 找到 Client Health Program 位置: {i}") break # 如果找不到,使用备用关键词 if data_start_pos < 0: start_keywords = ['health report analysis', '健康报告分析', 'abnormal index', '异常指标', 'functional medical health advice', '功能医学健康建议', 'urine detection', '尿液检测'] for i, elem in enumerate(modified_children): text = ''.join(elem.itertext()).strip().lower() if any(kw in text for kw in start_keywords): data_start_pos = i break if data_start_pos < 0: data_start_pos = boundary_pos print(f" [安全保存] 边界位置:{boundary_pos}, 数据起始:{data_start_pos}") # 6. 清空模板 body,重新构建 # 保存模板的 sectPr 元素(包含页脚引用) sectPr = None for elem in template_children: if elem.tag.endswith('}sectPr'): sectPr = etree.fromstring(etree.tostring(elem)) break # 清空 body for elem in list(template_body): template_body.remove(elem) # 7. 添加模板的前 boundary_pos 个元素(前四页) # 重新读取模板以获取原始元素 with zipfile.ZipFile(template_path, 'r') as z: orig_template_xml = z.read('word/document.xml') orig_template_tree = etree.fromstring(orig_template_xml) orig_template_body = orig_template_tree.find('.//w:body', ns) orig_template_children = list(orig_template_body) protected_count = 0 for i in range(min(boundary_pos, len(orig_template_children))): elem = orig_template_children[i] if elem.tag.endswith('}sectPr'): continue elem_copy = etree.fromstring(etree.tostring(elem)) template_body.append(elem_copy) protected_count += 1 # 8. 添加处理后文件的数据部分(从 Client Health Program 之后开始) data_count = 0 for i in range(data_start_pos, len(modified_children)): elem = modified_children[i] if elem.tag.endswith('}sectPr'): continue elem_copy = etree.fromstring(etree.tostring(elem)) template_body.append(elem_copy) data_count += 1 # 9. 添加 sectPr if sectPr is not None: template_body.append(sectPr) print(f" [安全保存] 保护部分:{protected_count}, 数据部分:{data_count}") # 10. 保存 XML new_xml = etree.tostring(template_tree, xml_declaration=True, encoding='UTF-8', standalone='yes') # 11. 基于模板创建输出文件 temp_result = str(output_path) + '.temp.docx' with zipfile.ZipFile(template_path, 'r') as zin: with zipfile.ZipFile(temp_result, 'w', zipfile.ZIP_DEFLATED) as zout: for item in zin.infolist(): if item.filename == 'word/document.xml': zout.writestr(item, new_xml) else: zout.writestr(item, zin.read(item.filename)) # 12. 移动到最终位置 if output_path.exists(): output_path.unlink() shutil.move(temp_result, output_path) print(f" [安全保存] ✓ 完成") except Exception as e: print(f" [安全保存] 错误: {e}") import traceback traceback.print_exc() # 回退到普通保存 doc.save(output_path) finally: for f in [temp_path, str(output_path) + '.temp.docx']: if os.path.exists(f): try: os.remove(f) except: pass