"""对比模板和生成文件的格式差异""" from docx import Document from docx.shared import Pt, Inches import os def analyze_document(filepath, name): """分析文档结构""" print(f"\n{'='*60}") print(f"分析: {name}") print(f"文件: {filepath}") print(f"{'='*60}") doc = Document(filepath) # 找到 Urine Detection 模块 found_urine = False urine_start = -1 for i, elem in enumerate(doc.element.body): text = elem.text if hasattr(elem, 'text') and elem.text else '' if 'Urine' in text and 'Detection' in text: urine_start = i found_urine = True break if not found_urine: print("未找到 Urine Detection 模块") return print(f"\n找到 Urine Detection 位置: {urine_start}") print(f"\n从 Urine Detection 开始的前30个元素:") print("-" * 60) for i in range(urine_start, min(urine_start + 30, len(doc.element.body))): elem = doc.element.body[i] tag = elem.tag.split('}')[-1] text = elem.text if hasattr(elem, 'text') else '' text_preview = text[:80].replace('\n', '\\n') if text else '' # 获取更多信息 extra_info = "" if tag == 'p': # 检查段落样式 p_elem = elem style_elem = p_elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}pStyle') if style_elem is not None: extra_info = f" [style: {style_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val')}]" # 检查是否有图片 drawings = p_elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}drawing') if drawings: extra_info += f" [有图片: {len(drawings)}个]" elif tag == 'tbl': # 统计表格行数和列数 rows = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tr') extra_info = f" [行数: {len(rows)}]" print(f" [{i}] <{tag}>{extra_info}: {text_preview}") def main(): # 模板文件 template_path = r"../Be.U Wellness Center功能医学健康报告&定制化方案-案例.docx" # 生成的文件 - 找最新的 reports_dir = "reports" if os.path.exists(reports_dir): files = [f for f in os.listdir(reports_dir) if f.startswith('filled_report_') and f.endswith('.docx')] if files: files.sort(reverse=True) generated_path = os.path.join(reports_dir, files[0]) else: print("未找到生成的报告文件") return else: print("reports目录不存在") return # 分析两个文档 analyze_document(template_path, "模板文件") analyze_document(generated_path, "生成文件") if __name__ == "__main__": main()