初始化医疗报告生成项目,添加核心代码文件

This commit is contained in:
2026-02-13 18:32:52 +08:00
commit faaf2158d4
69 changed files with 29836 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
from docx import Document
from lxml import etree
doc = Document(r'C:\Users\UI\Desktop\医疗报告\backend\reports\filled_report_20260212_165326.docx')
body = doc.element.body
children = list(body)
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
w = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
def show_para_format(elem, label):
text = ''.join(elem.itertext()).strip()
print(f'=== {label} ===')
print(f'Text: {text[:80]}')
# pPr
pPr = elem.find('w:pPr', ns)
if pPr is not None:
jc = pPr.find('w:jc', ns)
if jc is not None:
print(f' jc: {jc.get(f"{{{w}}}val")}')
pStyle = pPr.find('w:pStyle', ns)
if pStyle is not None:
print(f' pStyle: {pStyle.get(f"{{{w}}}val")}')
# runs
for r in elem.findall('w:r', ns):
rPr = r.find('w:rPr', ns)
rt = ''.join(r.itertext()).strip()
if not rt:
continue
print(f' Run: "{rt[:50]}"')
if rPr is not None:
rFonts = rPr.find('w:rFonts', ns)
sz = rPr.find('w:sz', ns)
szCs = rPr.find('w:szCs', ns)
b = rPr.find('w:b', ns)
bCs = rPr.find('w:bCs', ns)
color = rPr.find('w:color', ns)
if rFonts is not None:
fonts = {}
for attr in ['ascii', 'hAnsi', 'eastAsia', 'cs']:
v = rFonts.get(f'{{{w}}}{attr}')
if v:
fonts[attr] = v
print(f' fonts: {fonts}')
if sz is not None:
print(f' sz: {sz.get(f"{{{w}}}val")} (={int(sz.get(f"{{{w}}}val"))//2}pt)')
if szCs is not None:
print(f' szCs: {szCs.get(f"{{{w}}}val")}')
if b is not None:
print(f' bold: yes')
if bCs is not None:
print(f' boldCs: yes')
if color is not None:
print(f' color: {color.get(f"{{{w}}}val")}')
else:
print(f' (no rPr)')
# Overall Health Assessment
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
if 'Overall Health' in text and 'Assessment' in text and len(text) < 200:
show_para_format(elem, f'Overall Health Assessment [{i}]')
break
print()
# Medical Intervention
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
if 'Medical Intervention' in text and '医学干预' in text and len(text) < 200:
show_para_format(elem, f'Medical Intervention [{i}]')
break
print()
# FHA Title
for i, elem in enumerate(children):
text = ''.join(elem.itertext()).strip()
if 'Functional Medical Health Advice' in text and '功能医学健康建议' in text and len(text) < 300:
show_para_format(elem, f'FHA Title [{i}]')
break