From 91ff28bdcf53e7493fcd801880da9fa4a87e8a76 Mon Sep 17 00:00:00 2001
From: wangys <3401275564@qq.com>
Date: Fri, 6 Mar 2026 14:50:43 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.gitignore | 4 +-
.vscode/launch.json | 27 --
ai-management-platform | 2 +-
difyPlugin/pdf/manifest.yaml | 3 +
difyPlugin/pdf/provider/pdf.yaml | 6 +-
difyPlugin/pdf/requirements.txt | 2 +-
difyPlugin/pdf/tools/pdf.py | 61 ----
difyPlugin/pdf/tools/pdf_column_range.py | 107 ++++++
.../tools/{pdf.yaml => pdf_column_range.yaml} | 2 +-
difyPlugin/pdf/tools/pdf_extract_range.py | 48 +++
difyPlugin/pdf/tools/pdf_extract_range.yaml | 68 ++++
difyPlugin/pdf/tools/pdf_single_page.py | 23 +-
difyPlugin/pdf/tools/pdf_summary.py | 209 +++++++++++
difyPlugin/pdf/tools/pdf_summary.yaml | 99 ++++++
difyPlugin/pdf/tools/pdf_to_markdown.py | 335 ++++++++++++++++++
difyPlugin/pdf/tools/pdf_to_markdown.yaml | 68 ++++
difyPlugin/pdf/tools/pdf_toc.py | 273 ++++++++++++++
difyPlugin/pdf/tools/pdf_toc.yaml | 79 +++++
18 files changed, 1316 insertions(+), 100 deletions(-)
delete mode 100644 difyPlugin/pdf/tools/pdf.py
create mode 100644 difyPlugin/pdf/tools/pdf_column_range.py
rename difyPlugin/pdf/tools/{pdf.yaml => pdf_column_range.yaml} (96%)
create mode 100644 difyPlugin/pdf/tools/pdf_extract_range.py
create mode 100644 difyPlugin/pdf/tools/pdf_extract_range.yaml
create mode 100644 difyPlugin/pdf/tools/pdf_summary.py
create mode 100644 difyPlugin/pdf/tools/pdf_summary.yaml
create mode 100644 difyPlugin/pdf/tools/pdf_to_markdown.py
create mode 100644 difyPlugin/pdf/tools/pdf_to_markdown.yaml
create mode 100644 difyPlugin/pdf/tools/pdf_toc.py
create mode 100644 difyPlugin/pdf/tools/pdf_toc.yaml
diff --git a/.gitignore b/.gitignore
index 4cbe3caa..195f9bd6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,6 @@
.tmp
.trae
-**/*.difypkg
\ No newline at end of file
+**/*.difypkg
+urbanLifeServ/*
+*/.data
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 203429db..e69de29b 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,27 +0,0 @@
-{
- "version": "0.2.0",
- "configurations": [
- {
- "name": "Python: FastAPI Server",
- "type": "python",
- "request": "launch",
- "program": "${workspaceFolder}/difyPlugin/main.py",
- "console": "integratedTerminal",
- "justMyCode": true,
- "env": {
- "PYTHONUNBUFFERED": "1"
- },
- "cwd": "${workspaceFolder}/difyPlugin",
- "args": []
- },
- {
- "name": "Python: Debug Plugin",
- "type": "python",
- "request": "launch",
- "program": "${workspaceFolder}/difyPlugin/app/plugins/pdf/__init__.py",
- "console": "integratedTerminal",
- "justMyCode": true,
- "cwd": "${workspaceFolder}/difyPlugin"
- }
- ]
-}
\ No newline at end of file
diff --git a/ai-management-platform b/ai-management-platform
index 199d8180..96f7c3aa 160000
--- a/ai-management-platform
+++ b/ai-management-platform
@@ -1 +1 @@
-Subproject commit 199d8180a698c62d79c5c853302733050fe9c0fa
+Subproject commit 96f7c3aa4c9ac8b00e0b98b5a4998b5f910d5337
diff --git a/difyPlugin/pdf/manifest.yaml b/difyPlugin/pdf/manifest.yaml
index fb7631bf..27f075f3 100644
--- a/difyPlugin/pdf/manifest.yaml
+++ b/difyPlugin/pdf/manifest.yaml
@@ -19,6 +19,9 @@ resource:
permission:
tool:
enabled: true
+ model:
+ enabled: true
+ llm: true
plugins:
tools:
- provider/pdf.yaml
diff --git a/difyPlugin/pdf/provider/pdf.yaml b/difyPlugin/pdf/provider/pdf.yaml
index 83a55577..c7473239 100644
--- a/difyPlugin/pdf/provider/pdf.yaml
+++ b/difyPlugin/pdf/provider/pdf.yaml
@@ -56,8 +56,12 @@ identity:
# en_US: "Access Token"
tools:
- - tools/pdf.yaml
+ - tools/pdf_column_range.yaml
- tools/pdf_single_page.yaml
+ - tools/pdf_summary.yaml
+ - tools/pdf_toc.yaml
+ - tools/pdf_extract_range.yaml
+ - tools/pdf_to_markdown.yaml
extra:
python:
source: provider/pdf.py
diff --git a/difyPlugin/pdf/requirements.txt b/difyPlugin/pdf/requirements.txt
index e9cf72f9..80735ec2 100644
--- a/difyPlugin/pdf/requirements.txt
+++ b/difyPlugin/pdf/requirements.txt
@@ -1,2 +1,2 @@
dify_plugin>=0.4.0,<0.7.0
-PyPDF2>=3.0.1
+pymupdf>=1.27.1
\ No newline at end of file
diff --git a/difyPlugin/pdf/tools/pdf.py b/difyPlugin/pdf/tools/pdf.py
deleted file mode 100644
index fc226c04..00000000
--- a/difyPlugin/pdf/tools/pdf.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import re
-from collections.abc import Generator
-from io import BytesIO
-from typing import Any
-
-import PyPDF2
-from dify_plugin import Tool
-from dify_plugin.entities.tool import ToolInvokeMessage
-
-
-class PdfTool(Tool):
- def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
- file = tool_parameters.get("file")
- if not file:
- yield self.create_text_message("Error: file is required")
- return
-
- # file.blob returns bytes
- pdf_bytes = file.blob
- reader = PyPDF2.PdfReader(BytesIO(pdf_bytes))
- num_pages = len(reader.pages)
-
- toc_start = None
- toc_end = None
-
- toc_patterns = [
- r'目录',
- r'Table of Contents',
- r'Contents',
- r'目次'
- ]
-
- for page_num in range(num_pages):
- page = reader.pages[page_num]
- text = page.extract_text() or ""
-
- if any(re.search(pattern, text, re.IGNORECASE) for pattern in toc_patterns):
- if toc_start is None:
- toc_start = page_num
- toc_end = page_num
- elif toc_start is not None and toc_end is not None:
- break
-
- if toc_start is None:
- yield self.create_json_message({
- "start": None,
- "end": None,
- "pages": []
- })
- return
-
- toc_pages = []
- for page_num in range(toc_start, toc_end + 1):
- page = reader.pages[page_num]
- toc_pages.append(page.extract_text() or "")
-
- yield self.create_json_message({
- "start": toc_start,
- "end": toc_end,
- "pages": toc_pages
- })
\ No newline at end of file
diff --git a/difyPlugin/pdf/tools/pdf_column_range.py b/difyPlugin/pdf/tools/pdf_column_range.py
new file mode 100644
index 00000000..5d5f5db8
--- /dev/null
+++ b/difyPlugin/pdf/tools/pdf_column_range.py
@@ -0,0 +1,107 @@
+import json
+import re
+from collections.abc import Generator
+from io import BytesIO
+from typing import Any
+
+import fitz # PyMuPDF 核心库
+from dify_plugin import Tool
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfTool(Tool):
+ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+ file = tool_parameters.get("file")
+ if not file:
+ yield self.create_text_message("Error: file is required")
+ return
+
+ # 从字节流加载 PDF(替换 PyPDF2)
+ pdf_bytes = file.blob
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+ num_pages = len(doc)
+
+ toc_start = None
+ toc_end = None
+
+ # 目录匹配正则(与原代码一致)
+ toc_patterns = [
+ r'目录',
+ r'目 录',
+ r'目\u3000录',
+ r'Table of Contents',
+ r'Contents',
+ r'目次'
+ ]
+
+ # 遍历页面识别目录页(逻辑不变,仅替换文本提取方式)
+ for page_num in range(num_pages):
+ page = doc[page_num]
+ text = page.get_text() or "" # PyMuPDF 提取文本
+
+ if any(re.search(pattern, text, re.IGNORECASE) for pattern in toc_patterns):
+ if toc_start is None:
+ toc_start = page_num
+ toc_end = page_num
+ elif toc_start is not None and toc_end is not None:
+ break
+
+ # 提取目录页文本
+ toc_pages = []
+ if toc_start is not None and toc_end is not None:
+ for page_num in range(toc_start, toc_end + 1):
+ page = doc[page_num]
+ toc_pages.append(page.get_text() or "")
+
+ # 关闭文档
+ doc.close()
+
+ result = {
+ "start": toc_start,
+ "end": toc_end,
+ "pages": toc_pages,
+ "pages_text": "\n".join(toc_pages) if toc_pages else "",
+ }
+ yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+ yield self.create_json_message(result)
+
+
+if __name__ == "__main__":
+ # 测试代码(改用 PyMuPDF)
+ pdf_path = r"F:\Project\urbanLifeline\docs\AI训练资料\菱重S12R发动机说明书.pdf"
+ doc = fitz.open(pdf_path) # 本地文件直接打开
+ num_pages = len(doc)
+
+ toc_start = None
+ toc_end = None
+
+ toc_patterns = [
+ r'目录',
+ r'目 录',
+ r'目\u3000录',
+ r'Table of Contents',
+ r'Contents',
+ r'目次'
+ ]
+
+ # 遍历页面找目录
+ for page_num in range(num_pages):
+ page = doc[page_num]
+ text = page.get_text() or ""
+ if any(re.search(pattern, text, re.IGNORECASE) for pattern in toc_patterns):
+ if toc_start is None:
+ toc_start = page_num
+ toc_end = page_num
+ elif toc_start is not None and toc_end is not None:
+ break
+
+ # 提取目录页文本
+ toc_pages = []
+ toc_start = toc_start if toc_start is not None else 18
+ toc_end = toc_end if toc_end is not None else toc_start + 9
+ for page_num in range(toc_start, toc_end):
+ page = doc[page_num]
+ toc_pages.append(page.get_text() or "")
+
+ print(toc_start, toc_end, toc_pages)
+ doc.close() # 关闭文档
\ No newline at end of file
diff --git a/difyPlugin/pdf/tools/pdf.yaml b/difyPlugin/pdf/tools/pdf_column_range.yaml
similarity index 96%
rename from difyPlugin/pdf/tools/pdf.yaml
rename to difyPlugin/pdf/tools/pdf_column_range.yaml
index fe18f6ab..8f758dd7 100644
--- a/difyPlugin/pdf/tools/pdf.yaml
+++ b/difyPlugin/pdf/tools/pdf_column_range.yaml
@@ -33,4 +33,4 @@ parameters:
- "pdf"
extra:
python:
- source: tools/pdf.py
\ No newline at end of file
+ source: tools/pdf_column_range.py
\ No newline at end of file
diff --git a/difyPlugin/pdf/tools/pdf_extract_range.py b/difyPlugin/pdf/tools/pdf_extract_range.py
new file mode 100644
index 00000000..fbaa3927
--- /dev/null
+++ b/difyPlugin/pdf/tools/pdf_extract_range.py
@@ -0,0 +1,48 @@
+import json
+from collections.abc import Generator
+from typing import Any
+
+import fitz # PyMuPDF
+from dify_plugin import Tool
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfExtractRangeTool(Tool):
+ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+ file = tool_parameters.get("file")
+ if not file:
+ yield self.create_text_message("Error: file is required")
+ return
+
+ start_page = int(tool_parameters.get("start_page", 0))
+ end_page = int(tool_parameters.get("end_page", 0))
+
+ # 打开 PDF
+ pdf_bytes = file.blob
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+ num_pages = len(doc)
+
+ # 边界处理
+ start_page = max(0, min(start_page, num_pages - 1))
+ end_page = max(start_page, min(end_page, num_pages - 1))
+
+ # 逐页提取文本
+ page_texts = []
+ for page_idx in range(start_page, end_page + 1):
+ page = doc[page_idx]
+ text = page.get_text("text", sort=True) or ""
+ page_texts.append(text)
+
+ doc.close()
+
+ # 拼接所有页面文本
+ full_text = "\n\n--- 分页 ---\n\n".join(page_texts)
+
+ result = {
+ "start": start_page,
+ "end": end_page,
+ "total_pages": end_page - start_page + 1,
+ "text": full_text,
+ }
+ yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+ yield self.create_json_message(result)
diff --git a/difyPlugin/pdf/tools/pdf_extract_range.yaml b/difyPlugin/pdf/tools/pdf_extract_range.yaml
new file mode 100644
index 00000000..0bc10b6f
--- /dev/null
+++ b/difyPlugin/pdf/tools/pdf_extract_range.yaml
@@ -0,0 +1,68 @@
+identity:
+ name: "pdf_extract_range"
+ author: "yslg"
+ label:
+ en_US: "Extract Page Range Text"
+ zh_Hans: "提取页面范围文本"
+ pt_BR: "Extrair Texto do Intervalo de Páginas"
+ ja_JP: "ページ範囲テキスト抽出"
+description:
+ human:
+ en_US: "Extract plain text from a specified page range of a PDF file"
+ zh_Hans: "从PDF文件的指定页码范围提取纯文本"
+ pt_BR: "Extrair texto simples de um intervalo de páginas especificado de um arquivo PDF"
+ ja_JP: "PDFファイルの指定ページ範囲からプレーンテキストを抽出"
+ llm: "Extract plain text from PDF pages in the given start-end range. Returns concatenated text of all pages in range."
+parameters:
+ - name: file
+ type: file
+ required: true
+ label:
+ en_US: PDF File
+ zh_Hans: PDF 文件
+ pt_BR: Arquivo PDF
+ ja_JP: PDFファイル
+ human_description:
+ en_US: "PDF file to extract text from"
+ zh_Hans: "要提取文本的 PDF 文件"
+ pt_BR: "Arquivo PDF para extrair texto"
+ ja_JP: "テキストを抽出するPDFファイル"
+ llm_description: "PDF file to extract page range text from"
+ form: llm
+ fileTypes:
+ - "pdf"
+ - name: start_page
+ type: number
+ required: true
+ label:
+ en_US: Start Page
+ zh_Hans: 起始页码
+ pt_BR: Página Inicial
+ ja_JP: 開始ページ
+ human_description:
+ en_US: "Start page index (0-based)"
+ zh_Hans: "起始页码(从0开始)"
+ pt_BR: "Índice da página inicial (base 0)"
+ ja_JP: "開始ページ番号(0始まり)"
+ llm_description: "Start page index (0-based)"
+ form: llm
+ default: 0
+ - name: end_page
+ type: number
+ required: true
+ label:
+ en_US: End Page
+ zh_Hans: 结束页码
+ pt_BR: Página Final
+ ja_JP: 終了ページ
+ human_description:
+ en_US: "End page index (0-based, inclusive)"
+ zh_Hans: "结束页码(从0开始,包含该页)"
+ pt_BR: "Índice da página final (base 0, inclusivo)"
+ ja_JP: "終了ページ番号(0始まり、含む)"
+ llm_description: "End page index (0-based, inclusive)"
+ form: llm
+ default: 0
+extra:
+ python:
+ source: tools/pdf_extract_range.py
diff --git a/difyPlugin/pdf/tools/pdf_single_page.py b/difyPlugin/pdf/tools/pdf_single_page.py
index 5ed41ecf..0fa67660 100644
--- a/difyPlugin/pdf/tools/pdf_single_page.py
+++ b/difyPlugin/pdf/tools/pdf_single_page.py
@@ -1,8 +1,9 @@
+import json
from collections.abc import Generator
from io import BytesIO
from typing import Any
-import PyPDF2
+import fitz # PyMuPDF 核心库
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
@@ -16,21 +17,29 @@ class PdfSinglePageTool(Tool):
yield self.create_text_message("Error: file is required")
return
+ # 从字节流加载 PDF(替换 PyPDF2 的 PdfReader)
pdf_bytes = file.blob
- reader = PyPDF2.PdfReader(BytesIO(pdf_bytes))
- num_pages = len(reader.pages)
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf") # 字节流方式打开
+ num_pages = len(doc)
+ # 页码边界处理(逻辑与原代码一致)
page_index = int(page)
if page_index < 0:
page_index = 0
if page_index >= num_pages:
page_index = num_pages - 1
- selected_page = reader.pages[page_index]
- text = selected_page.extract_text() or ""
+ # 提取指定页面文本(PyMuPDF 方式)
+ selected_page = doc[page_index]
+ text = selected_page.get_text() or "" # get_text() 提取文本,比 PyPDF2 更精准
- yield self.create_json_message({
+ # 关闭文档释放资源
+ doc.close()
+
+ result = {
"start": page_index,
"end": page_index,
"pages": [text]
- })
+ }
+ yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+ yield self.create_json_message(result)
\ No newline at end of file
diff --git a/difyPlugin/pdf/tools/pdf_summary.py b/difyPlugin/pdf/tools/pdf_summary.py
new file mode 100644
index 00000000..684914c7
--- /dev/null
+++ b/difyPlugin/pdf/tools/pdf_summary.py
@@ -0,0 +1,209 @@
+import json
+import re
+from collections.abc import Generator
+from typing import Any
+
+import fitz
+from dify_plugin import Tool
+from dify_plugin.entities.model.llm import LLMModelConfig
+from dify_plugin.entities.model.message import SystemPromptMessage, UserPromptMessage
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfSummaryTool(Tool):
+ """Fast PDF page summary tool.
+
+ Default behavior is optimized for throughput in large workflows:
+ - Extract plain text and lightweight table data only.
+ - Skip expensive image base64 and drawing path extraction.
+ - Skip LLM by default unless `use_llm=true` is explicitly passed.
+ """
+
+ def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+ file = tool_parameters.get("file")
+ if not file:
+ yield self.create_text_message("Error: file is required")
+ return
+
+ start_page = self._to_int(tool_parameters.get("pdf_start_page"), 0)
+ end_page = self._to_int(tool_parameters.get("pdf_end_page"), 0)
+ model_config = tool_parameters.get("model")
+ use_llm = self._to_bool(tool_parameters.get("use_llm"), False)
+
+ max_chars_per_page = self._to_int(tool_parameters.get("max_chars_per_page"), 6000)
+ max_chars_per_page = max(800, min(max_chars_per_page, 20000))
+
+ llm_prompt = tool_parameters.get(
+ "llm_prompt",
+ "请基于输入的PDF页面文本做简洁准确摘要,输出中文要点。不要输出思考过程。",
+ )
+
+ pdf_bytes = file.blob
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+ try:
+ num_pages = len(doc)
+ start_page = max(0, min(start_page, num_pages - 1))
+ end_page = max(start_page, min(end_page, num_pages - 1))
+
+ pages_data: list[dict[str, Any]] = []
+ for page_idx in range(start_page, end_page + 1):
+ page = doc[page_idx]
+ page_data = self._extract_page_fast(page, page_idx, max_chars_per_page)
+ pages_data.append(page_data)
+
+ result = {
+ "total_pages_extracted": len(pages_data),
+ "page_range": {"start": start_page, "end": end_page},
+ "pages": pages_data,
+ }
+ yield self.create_json_message(result)
+
+ # Fast local summary first (deterministic, no model latency)
+ local_text = self._build_local_summary(pages_data)
+
+ # Optional LLM refinement, explicitly enabled only
+ if use_llm and model_config:
+ refined = self._summarize_with_llm(local_text, llm_prompt, model_config)
+ final_text = refined if refined else local_text
+ else:
+ final_text = local_text
+
+ if final_text:
+ yield self.create_text_message(final_text)
+ finally:
+ doc.close()
+
+ def _extract_page_fast(self, page: fitz.Page, page_idx: int, max_chars_per_page: int) -> dict[str, Any]:
+ text = (page.get_text("text") or "").strip()
+ if len(text) > max_chars_per_page:
+ text = text[:max_chars_per_page] + "\n...[truncated]"
+
+ tables: list[dict[str, Any]] = []
+ try:
+ tabs = page.find_tables()
+ for tab_idx, tab in enumerate(tabs.tables[:3]):
+ cells = tab.extract() or []
+ tables.append(
+ {
+ "index": tab_idx,
+ "rows": tab.row_count,
+ "cols": tab.col_count,
+ "cells": cells[:10],
+ }
+ )
+ except Exception:
+ pass
+
+ return {
+ "page_number": page_idx,
+ "text": text,
+ "tables": tables,
+ "images": [],
+ "drawings_summary": [],
+ "text_blocks": [],
+ "width": float(page.rect.width),
+ "height": float(page.rect.height),
+ }
+
+ def _build_local_summary(self, pages_data: list[dict[str, Any]]) -> str:
+ """Output actual page content as Markdown (text + tables).
+
+ No LLM needed downstream — the text is already usable Markdown.
+ """
+ parts: list[str] = []
+ for page in pages_data:
+ text = (page.get("text") or "").strip()
+ tables = page.get("tables") or []
+
+ page_parts: list[str] = []
+ if text:
+ page_parts.append(text)
+
+ for tab in tables:
+ cells = tab.get("cells") or []
+ if len(cells) >= 2:
+ md = self._cells_to_md_table(cells)
+ if md:
+ page_parts.append(md)
+
+ if page_parts:
+ parts.append("\n\n".join(page_parts))
+
+ return "\n\n--- 分页 ---\n\n".join(parts)
+
+ @staticmethod
+ def _cells_to_md_table(cells: list) -> str:
+ if not cells:
+ return ""
+ header = cells[0]
+ ncols = len(header)
+ if ncols == 0:
+ return ""
+ clean = lambda c: str(c or "").replace("|", "\\|").replace("\n", " ").strip()
+ lines = [
+ "| " + " | ".join(clean(c) for c in header) + " |",
+ "| " + " | ".join("---" for _ in range(ncols)) + " |",
+ ]
+ for row in cells[1:]:
+ padded = list(row) + [""] * max(0, ncols - len(row))
+ lines.append("| " + " | ".join(clean(c) for c in padded[:ncols]) + " |")
+ return "\n".join(lines)
+
+ def _summarize_with_llm(self, local_text: str, llm_prompt: str, model_config: dict[str, Any]) -> str:
+ response = self.session.model.llm.invoke(
+ model_config=LLMModelConfig(**model_config),
+ prompt_messages=[
+ SystemPromptMessage(content=llm_prompt),
+ UserPromptMessage(content=local_text),
+ ],
+ stream=False,
+ )
+
+ llm_text = ""
+ if hasattr(response, "message") and response.message:
+ content = response.message.content
+ if isinstance(content, str):
+ llm_text = content
+ elif isinstance(content, list):
+ llm_text = "".join(
+ item.data if hasattr(item, "data") else str(item)
+ for item in content
+ )
+
+ return self._extract_visible_answer(llm_text)
+
+ @staticmethod
+ def _extract_visible_answer(text: str) -> str:
+ if not text:
+ return ""
+
+ box_match = re.search(r"<\|begin_of_box\|>([\s\S]*?)<\|end_of_box\|>", text)
+ if box_match:
+ text = box_match.group(1)
+ else:
+ text = re.sub(r"