"""分析生成文件的结构问题""" from docx import Document from lxml import etree import zipfile import os def analyze_file(filepath, name): """分析文件结构""" print(f"\n{'='*70}") print(f"分析: {name}") print(f"文件: {filepath}") print(f"{'='*70}") # 读取 XML with zipfile.ZipFile(filepath, 'r') as z: xml_content = z.read('word/document.xml') tree = etree.fromstring(xml_content) ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} body = tree.find('.//w:body', ns) # 找到 Urine Detection 相关的元素 print("\n搜索 'Urine Detection' 相关元素:") print("-" * 70) urine_positions = [] for i, elem in enumerate(body): text = ''.join(elem.itertext()).strip() if 'Urine' in text and 'Detection' in text: tag = elem.tag.split('}')[-1] text_preview = text[:100].replace('\n', '\\n') print(f" [{i}] <{tag}>: {text_preview}...") urine_positions.append(i) if not urine_positions: print(" 未找到") return # 分析第一个 Urine Detection 位置前后的元素 first_pos = urine_positions[0] print(f"\n从第一个 Urine Detection (位置 {first_pos}) 开始的40个元素:") print("-" * 70) for i in range(first_pos, min(first_pos + 40, len(body))): elem = body[i] tag = elem.tag.split('}')[-1] text = ''.join(elem.itertext()).strip() text_preview = text[:80].replace('\n', '\\n') if text else '[空]' # 额外信息 extra = "" if tag == 'tbl': rows = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tr') extra = f" [行数:{len(rows)}]" # 检查是否是表头 if len(rows) == 1 and ('Abb' in text or 'Project' in text): extra += " [表头]" elif tag == 'p': # 检查是否有分页符 page_breaks = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}br') for br in page_breaks: br_type = br.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type') if br_type == 'page': extra = " [分页符]" break print(f" [{i}] <{tag}>{extra}: {text_preview}") def main(): # 模板 template_path = r"../Be.U Wellness Center功能医学健康报告&定制化方案-案例.docx" # 最新生成的文件 generated_path = "reports/filled_report_20260115_204528.docx" analyze_file(template_path, "模板") analyze_file(generated_path, "生成文件") if __name__ == "__main__": main()