Files
yiliao/backend/compare_format.py

85 lines
2.9 KiB
Python

"""对比模板和生成文件的格式差异"""
from docx import Document
from docx.shared import Pt, Inches
import os
def analyze_document(filepath, name):
"""分析文档结构"""
print(f"\n{'='*60}")
print(f"分析: {name}")
print(f"文件: {filepath}")
print(f"{'='*60}")
doc = Document(filepath)
# 找到 Urine Detection 模块
found_urine = False
urine_start = -1
for i, elem in enumerate(doc.element.body):
text = elem.text if hasattr(elem, 'text') and elem.text else ''
if 'Urine' in text and 'Detection' in text:
urine_start = i
found_urine = True
break
if not found_urine:
print("未找到 Urine Detection 模块")
return
print(f"\n找到 Urine Detection 位置: {urine_start}")
print(f"\n从 Urine Detection 开始的前30个元素:")
print("-" * 60)
for i in range(urine_start, min(urine_start + 30, len(doc.element.body))):
elem = doc.element.body[i]
tag = elem.tag.split('}')[-1]
text = elem.text if hasattr(elem, 'text') else ''
text_preview = text[:80].replace('\n', '\\n') if text else ''
# 获取更多信息
extra_info = ""
if tag == 'p':
# 检查段落样式
p_elem = elem
style_elem = p_elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}pStyle')
if style_elem is not None:
extra_info = f" [style: {style_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val')}]"
# 检查是否有图片
drawings = p_elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}drawing')
if drawings:
extra_info += f" [有图片: {len(drawings)}个]"
elif tag == 'tbl':
# 统计表格行数和列数
rows = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tr')
extra_info = f" [行数: {len(rows)}]"
print(f" [{i}] <{tag}>{extra_info}: {text_preview}")
def main():
# 模板文件
template_path = r"../Be.U Wellness Center功能医学健康报告&定制化方案-案例.docx"
# 生成的文件 - 找最新的
reports_dir = "reports"
if os.path.exists(reports_dir):
files = [f for f in os.listdir(reports_dir) if f.startswith('filled_report_') and f.endswith('.docx')]
if files:
files.sort(reverse=True)
generated_path = os.path.join(reports_dir, files[0])
else:
print("未找到生成的报告文件")
return
else:
print("reports目录不存在")
return
# 分析两个文档
analyze_document(template_path, "模板文件")
analyze_document(generated_path, "生成文件")
if __name__ == "__main__":
main()