85 lines
2.9 KiB
Python
85 lines
2.9 KiB
Python
"""对比模板和生成文件的格式差异"""
|
|
from docx import Document
|
|
from docx.shared import Pt, Inches
|
|
import os
|
|
|
|
def analyze_document(filepath, name):
|
|
"""分析文档结构"""
|
|
print(f"\n{'='*60}")
|
|
print(f"分析: {name}")
|
|
print(f"文件: {filepath}")
|
|
print(f"{'='*60}")
|
|
|
|
doc = Document(filepath)
|
|
|
|
# 找到 Urine Detection 模块
|
|
found_urine = False
|
|
urine_start = -1
|
|
|
|
for i, elem in enumerate(doc.element.body):
|
|
text = elem.text if hasattr(elem, 'text') and elem.text else ''
|
|
if 'Urine' in text and 'Detection' in text:
|
|
urine_start = i
|
|
found_urine = True
|
|
break
|
|
|
|
if not found_urine:
|
|
print("未找到 Urine Detection 模块")
|
|
return
|
|
|
|
print(f"\n找到 Urine Detection 位置: {urine_start}")
|
|
print(f"\n从 Urine Detection 开始的前30个元素:")
|
|
print("-" * 60)
|
|
|
|
for i in range(urine_start, min(urine_start + 30, len(doc.element.body))):
|
|
elem = doc.element.body[i]
|
|
tag = elem.tag.split('}')[-1]
|
|
text = elem.text if hasattr(elem, 'text') else ''
|
|
text_preview = text[:80].replace('\n', '\\n') if text else ''
|
|
|
|
# 获取更多信息
|
|
extra_info = ""
|
|
if tag == 'p':
|
|
# 检查段落样式
|
|
p_elem = elem
|
|
style_elem = p_elem.find('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}pStyle')
|
|
if style_elem is not None:
|
|
extra_info = f" [style: {style_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val')}]"
|
|
|
|
# 检查是否有图片
|
|
drawings = p_elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}drawing')
|
|
if drawings:
|
|
extra_info += f" [有图片: {len(drawings)}个]"
|
|
|
|
elif tag == 'tbl':
|
|
# 统计表格行数和列数
|
|
rows = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tr')
|
|
extra_info = f" [行数: {len(rows)}]"
|
|
|
|
print(f" [{i}] <{tag}>{extra_info}: {text_preview}")
|
|
|
|
def main():
|
|
# 模板文件
|
|
template_path = r"../Be.U Wellness Center功能医学健康报告&定制化方案-案例.docx"
|
|
|
|
# 生成的文件 - 找最新的
|
|
reports_dir = "reports"
|
|
if os.path.exists(reports_dir):
|
|
files = [f for f in os.listdir(reports_dir) if f.startswith('filled_report_') and f.endswith('.docx')]
|
|
if files:
|
|
files.sort(reverse=True)
|
|
generated_path = os.path.join(reports_dir, files[0])
|
|
else:
|
|
print("未找到生成的报告文件")
|
|
return
|
|
else:
|
|
print("reports目录不存在")
|
|
return
|
|
|
|
# 分析两个文档
|
|
analyze_document(template_path, "模板文件")
|
|
analyze_document(generated_path, "生成文件")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|