初始化医疗报告生成项目,添加核心代码文件
This commit is contained in:
80
backend/analyze_output.py
Normal file
80
backend/analyze_output.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""分析生成文件的结构问题"""
|
||||
from docx import Document
|
||||
from lxml import etree
|
||||
import zipfile
|
||||
import os
|
||||
|
||||
def analyze_file(filepath, name):
|
||||
"""分析文件结构"""
|
||||
print(f"\n{'='*70}")
|
||||
print(f"分析: {name}")
|
||||
print(f"文件: {filepath}")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# 读取 XML
|
||||
with zipfile.ZipFile(filepath, 'r') as z:
|
||||
xml_content = z.read('word/document.xml')
|
||||
|
||||
tree = etree.fromstring(xml_content)
|
||||
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
||||
body = tree.find('.//w:body', ns)
|
||||
|
||||
# 找到 Urine Detection 相关的元素
|
||||
print("\n搜索 'Urine Detection' 相关元素:")
|
||||
print("-" * 70)
|
||||
|
||||
urine_positions = []
|
||||
for i, elem in enumerate(body):
|
||||
text = ''.join(elem.itertext()).strip()
|
||||
if 'Urine' in text and 'Detection' in text:
|
||||
tag = elem.tag.split('}')[-1]
|
||||
text_preview = text[:100].replace('\n', '\\n')
|
||||
print(f" [{i}] <{tag}>: {text_preview}...")
|
||||
urine_positions.append(i)
|
||||
|
||||
if not urine_positions:
|
||||
print(" 未找到")
|
||||
return
|
||||
|
||||
# 分析第一个 Urine Detection 位置前后的元素
|
||||
first_pos = urine_positions[0]
|
||||
print(f"\n从第一个 Urine Detection (位置 {first_pos}) 开始的40个元素:")
|
||||
print("-" * 70)
|
||||
|
||||
for i in range(first_pos, min(first_pos + 40, len(body))):
|
||||
elem = body[i]
|
||||
tag = elem.tag.split('}')[-1]
|
||||
text = ''.join(elem.itertext()).strip()
|
||||
text_preview = text[:80].replace('\n', '\\n') if text else '[空]'
|
||||
|
||||
# 额外信息
|
||||
extra = ""
|
||||
if tag == 'tbl':
|
||||
rows = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}tr')
|
||||
extra = f" [行数:{len(rows)}]"
|
||||
# 检查是否是表头
|
||||
if len(rows) == 1 and ('Abb' in text or 'Project' in text):
|
||||
extra += " [表头]"
|
||||
elif tag == 'p':
|
||||
# 检查是否有分页符
|
||||
page_breaks = elem.findall('.//{http://schemas.openxmlformats.org/wordprocessingml/2006/main}br')
|
||||
for br in page_breaks:
|
||||
br_type = br.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type')
|
||||
if br_type == 'page':
|
||||
extra = " [分页符]"
|
||||
break
|
||||
|
||||
print(f" [{i}] <{tag}>{extra}: {text_preview}")
|
||||
|
||||
def main():
|
||||
# 模板
|
||||
template_path = r"../Be.U Wellness Center功能医学健康报告&定制化方案-案例.docx"
|
||||
|
||||
# 最新生成的文件
|
||||
generated_path = "reports/filled_report_20260115_204528.docx"
|
||||
|
||||
analyze_file(template_path, "模板")
|
||||
analyze_file(generated_path, "生成文件")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user