更新

2026-03-06 14:50:43 +08:00
parent 843146cdd7
commit 91ff28bdcf
18 changed files with 1316 additions and 100 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@
 .trae

 **/*.difypkg
+urbanLifeServ/*
+*/.data
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,27 +0,0 @@
-{
-  "version": "0.2.0",
-  "configurations": [
-    {
-      "name": "Python: FastAPI Server",
-      "type": "python",
-      "request": "launch",
-      "program": "${workspaceFolder}/difyPlugin/main.py",
-      "console": "integratedTerminal",
-      "justMyCode": true,
-      "env": {
-        "PYTHONUNBUFFERED": "1"
-      },
-      "cwd": "${workspaceFolder}/difyPlugin",
-      "args": []
-    },
-    {
-      "name": "Python: Debug Plugin",
-      "type": "python",
-      "request": "launch",
-      "program": "${workspaceFolder}/difyPlugin/app/plugins/pdf/__init__.py",
-      "console": "integratedTerminal",
-      "justMyCode": true,
-      "cwd": "${workspaceFolder}/difyPlugin"
-    }
-  ]
-}
--- a/2
+++ b/2
--- a/difyPlugin/pdf/manifest.yaml
+++ b/difyPlugin/pdf/manifest.yaml
@@ -19,6 +19,9 @@ resource:
  permission:
    tool:
      enabled: true
+    model:
+      enabled: true
+      llm: true
 plugins:
  tools:
    - provider/pdf.yaml
--- a/difyPlugin/pdf/provider/pdf.yaml
+++ b/difyPlugin/pdf/provider/pdf.yaml
@@ -56,8 +56,12 @@ identity:
 #         en_US: "Access Token"

 tools:
-  - tools/pdf.yaml
+  - tools/pdf_column_range.yaml
  - tools/pdf_single_page.yaml
+  - tools/pdf_summary.yaml
+  - tools/pdf_toc.yaml
+  - tools/pdf_extract_range.yaml
+  - tools/pdf_to_markdown.yaml
 extra:
  python:
    source: provider/pdf.py
--- a/difyPlugin/pdf/requirements.txt
+++ b/difyPlugin/pdf/requirements.txt
@@ -1,2 +1,2 @@
 dify_plugin>=0.4.0,<0.7.0
-PyPDF2>=3.0.1
+pymupdf>=1.27.1
--- a/difyPlugin/pdf/tools/pdf.py
+++ b/difyPlugin/pdf/tools/pdf.py
@@ -1,61 +0,0 @@
-import re
-from collections.abc import Generator
-from io import BytesIO
-from typing import Any
-
-import PyPDF2
-from dify_plugin import Tool
-from dify_plugin.entities.tool import ToolInvokeMessage
-
-
-class PdfTool(Tool):
-    def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
-        file = tool_parameters.get("file")
-        if not file:
-            yield self.create_text_message("Error: file is required")
-            return
-
-        # file.blob returns bytes
-        pdf_bytes = file.blob
-        reader = PyPDF2.PdfReader(BytesIO(pdf_bytes))
-        num_pages = len(reader.pages)
-
-        toc_start = None
-        toc_end = None
-
-        toc_patterns = [
-            r'目录',
-            r'Table of Contents',
-            r'Contents',
-            r'目次'
-        ]
-
-        for page_num in range(num_pages):
-            page = reader.pages[page_num]
-            text = page.extract_text() or ""
-
-            if any(re.search(pattern, text, re.IGNORECASE) for pattern in toc_patterns):
-                if toc_start is None:
-                    toc_start = page_num
-                toc_end = page_num
-            elif toc_start is not None and toc_end is not None:
-                break
-
-        if toc_start is None:
-            yield self.create_json_message({
-                "start": None,
-                "end": None,
-                "pages": []
-            })
-            return
-
-        toc_pages = []
-        for page_num in range(toc_start, toc_end + 1):
-            page = reader.pages[page_num]
-            toc_pages.append(page.extract_text() or "")
-
-        yield self.create_json_message({
-            "start": toc_start,
-            "end": toc_end,
-            "pages": toc_pages
-        })
--- a/difyPlugin/pdf/tools/pdf_column_range.py
+++ b/difyPlugin/pdf/tools/pdf_column_range.py
@@ -0,0 +1,107 @@
+import json
+import re
+from collections.abc import Generator
+from io import BytesIO
+from typing import Any
+
+import fitz  # PyMuPDF 核心库
+from dify_plugin import Tool
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfTool(Tool):
+    def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+        file = tool_parameters.get("file")
+        if not file:
+            yield self.create_text_message("Error: file is required")
+            return
+
+        # 从字节流加载 PDF（替换 PyPDF2）
+        pdf_bytes = file.blob
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        num_pages = len(doc)
+
+        toc_start = None
+        toc_end = None
+
+        # 目录匹配正则（与原代码一致）
+        toc_patterns = [
+            r'目录',
+            r'目　录',
+            r'目\u3000录',
+            r'Table of Contents',
+            r'Contents',
+            r'目次'
+        ]
+
+        # 遍历页面识别目录页（逻辑不变，仅替换文本提取方式）
+        for page_num in range(num_pages):
+            page = doc[page_num]
+            text = page.get_text() or ""  # PyMuPDF 提取文本
+
+            if any(re.search(pattern, text, re.IGNORECASE) for pattern in toc_patterns):
+                if toc_start is None:
+                    toc_start = page_num
+                toc_end = page_num
+            elif toc_start is not None and toc_end is not None:
+                break
+
+        # 提取目录页文本
+        toc_pages = []
+        if toc_start is not None and toc_end is not None:
+            for page_num in range(toc_start, toc_end + 1):
+                page = doc[page_num]
+                toc_pages.append(page.get_text() or "")
+
+        # 关闭文档
+        doc.close()
+
+        result = {
+            "start": toc_start,
+            "end": toc_end,
+            "pages": toc_pages,
+            "pages_text": "\n".join(toc_pages) if toc_pages else "",
+        }
+        yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+        yield self.create_json_message(result)
+
+
+if __name__ == "__main__":
+    # 测试代码（改用 PyMuPDF）
+    pdf_path = r"F:\Project\urbanLifeline\docs\AI训练资料\菱重S12R发动机说明书.pdf"
+    doc = fitz.open(pdf_path)  # 本地文件直接打开
+    num_pages = len(doc)
+
+    toc_start = None
+    toc_end = None
+
+    toc_patterns = [
+        r'目录',
+        r'目　录',
+        r'目\u3000录',
+        r'Table of Contents',
+        r'Contents',
+        r'目次'
+    ]
+
+    # 遍历页面找目录
+    for page_num in range(num_pages):
+        page = doc[page_num]
+        text = page.get_text() or ""
+        if any(re.search(pattern, text, re.IGNORECASE) for pattern in toc_patterns):
+            if toc_start is None:
+                toc_start = page_num
+            toc_end = page_num
+        elif toc_start is not None and toc_end is not None:
+            break
+
+    # 提取目录页文本
+    toc_pages = []
+    toc_start = toc_start if toc_start is not None else 18
+    toc_end = toc_end if toc_end is not None else toc_start + 9
+    for page_num in range(toc_start, toc_end):
+        page = doc[page_num]
+        toc_pages.append(page.get_text() or "")
+    
+    print(toc_start, toc_end, toc_pages)
+    doc.close()  # 关闭文档
--- a/difyPlugin/pdf/tools/pdf_column_range.yaml
+++ b/difyPlugin/pdf/tools/pdf_column_range.yaml
@@ -33,4 +33,4 @@ parameters:
      - "pdf"
 extra:
  python:
-    source: tools/pdf.py
+    source: tools/pdf_column_range.py
--- a/difyPlugin/pdf/tools/pdf_extract_range.py
+++ b/difyPlugin/pdf/tools/pdf_extract_range.py
@@ -0,0 +1,48 @@
+import json
+from collections.abc import Generator
+from typing import Any
+
+import fitz  # PyMuPDF
+from dify_plugin import Tool
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfExtractRangeTool(Tool):
+    def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+        file = tool_parameters.get("file")
+        if not file:
+            yield self.create_text_message("Error: file is required")
+            return
+
+        start_page = int(tool_parameters.get("start_page", 0))
+        end_page = int(tool_parameters.get("end_page", 0))
+
+        # 打开 PDF
+        pdf_bytes = file.blob
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        num_pages = len(doc)
+
+        # 边界处理
+        start_page = max(0, min(start_page, num_pages - 1))
+        end_page = max(start_page, min(end_page, num_pages - 1))
+
+        # 逐页提取文本
+        page_texts = []
+        for page_idx in range(start_page, end_page + 1):
+            page = doc[page_idx]
+            text = page.get_text("text", sort=True) or ""
+            page_texts.append(text)
+
+        doc.close()
+
+        # 拼接所有页面文本
+        full_text = "\n\n--- 分页 ---\n\n".join(page_texts)
+
+        result = {
+            "start": start_page,
+            "end": end_page,
+            "total_pages": end_page - start_page + 1,
+            "text": full_text,
+        }
+        yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+        yield self.create_json_message(result)
--- a/difyPlugin/pdf/tools/pdf_extract_range.yaml
+++ b/difyPlugin/pdf/tools/pdf_extract_range.yaml
@@ -0,0 +1,68 @@
+identity:
+  name: "pdf_extract_range"
+  author: "yslg"
+  label:
+    en_US: "Extract Page Range Text"
+    zh_Hans: "提取页面范围文本"
+    pt_BR: "Extrair Texto do Intervalo de Páginas"
+    ja_JP: "ページ範囲テキスト抽出"
+description:
+  human:
+    en_US: "Extract plain text from a specified page range of a PDF file"
+    zh_Hans: "从PDF文件的指定页码范围提取纯文本"
+    pt_BR: "Extrair texto simples de um intervalo de páginas especificado de um arquivo PDF"
+    ja_JP: "PDFファイルの指定ページ範囲からプレーンテキストを抽出"
+  llm: "Extract plain text from PDF pages in the given start-end range. Returns concatenated text of all pages in range."
+parameters:
+  - name: file
+    type: file
+    required: true
+    label:
+      en_US: PDF File
+      zh_Hans: PDF 文件
+      pt_BR: Arquivo PDF
+      ja_JP: PDFファイル
+    human_description:
+      en_US: "PDF file to extract text from"
+      zh_Hans: "要提取文本的 PDF 文件"
+      pt_BR: "Arquivo PDF para extrair texto"
+      ja_JP: "テキストを抽出するPDFファイル"
+    llm_description: "PDF file to extract page range text from"
+    form: llm
+    fileTypes:
+      - "pdf"
+  - name: start_page
+    type: number
+    required: true
+    label:
+      en_US: Start Page
+      zh_Hans: 起始页码
+      pt_BR: Página Inicial
+      ja_JP: 開始ページ
+    human_description:
+      en_US: "Start page index (0-based)"
+      zh_Hans: "起始页码（从0开始）"
+      pt_BR: "Índice da página inicial (base 0)"
+      ja_JP: "開始ページ番号（0始まり）"
+    llm_description: "Start page index (0-based)"
+    form: llm
+    default: 0
+  - name: end_page
+    type: number
+    required: true
+    label:
+      en_US: End Page
+      zh_Hans: 结束页码
+      pt_BR: Página Final
+      ja_JP: 終了ページ
+    human_description:
+      en_US: "End page index (0-based, inclusive)"
+      zh_Hans: "结束页码（从0开始，包含该页）"
+      pt_BR: "Índice da página final (base 0, inclusivo)"
+      ja_JP: "終了ページ番号（0始まり、含む）"
+    llm_description: "End page index (0-based, inclusive)"
+    form: llm
+    default: 0
+extra:
+  python:
+    source: tools/pdf_extract_range.py
--- a/difyPlugin/pdf/tools/pdf_single_page.py
+++ b/difyPlugin/pdf/tools/pdf_single_page.py
@@ -1,8 +1,9 @@
+import json
 from collections.abc import Generator
 from io import BytesIO
 from typing import Any

-import PyPDF2
+import fitz  # PyMuPDF 核心库
 from dify_plugin import Tool
 from dify_plugin.entities.tool import ToolInvokeMessage

@@ -16,21 +17,29 @@ class PdfSinglePageTool(Tool):
            yield self.create_text_message("Error: file is required")
            return

+        # 从字节流加载 PDF（替换 PyPDF2 的 PdfReader）
        pdf_bytes = file.blob
-        reader = PyPDF2.PdfReader(BytesIO(pdf_bytes))
-        num_pages = len(reader.pages)
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")  # 字节流方式打开
+        num_pages = len(doc)

+        # 页码边界处理（逻辑与原代码一致）
        page_index = int(page)
        if page_index < 0:
            page_index = 0
        if page_index >= num_pages:
            page_index = num_pages - 1

-        selected_page = reader.pages[page_index]
-        text = selected_page.extract_text() or ""
+        # 提取指定页面文本（PyMuPDF 方式）
+        selected_page = doc[page_index]
+        text = selected_page.get_text() or ""  # get_text() 提取文本，比 PyPDF2 更精准

-        yield self.create_json_message({
+        # 关闭文档释放资源
+        doc.close()
+
+        result = {
            "start": page_index,
            "end": page_index,
            "pages": [text]
-        })
+        }
+        yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+        yield self.create_json_message(result)
--- a/difyPlugin/pdf/tools/pdf_summary.py
+++ b/difyPlugin/pdf/tools/pdf_summary.py
@@ -0,0 +1,209 @@
+import json
+import re
+from collections.abc import Generator
+from typing import Any
+
+import fitz
+from dify_plugin import Tool
+from dify_plugin.entities.model.llm import LLMModelConfig
+from dify_plugin.entities.model.message import SystemPromptMessage, UserPromptMessage
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfSummaryTool(Tool):
+    """Fast PDF page summary tool.
+
+    Default behavior is optimized for throughput in large workflows:
+    - Extract plain text and lightweight table data only.
+    - Skip expensive image base64 and drawing path extraction.
+    - Skip LLM by default unless `use_llm=true` is explicitly passed.
+    """
+
+    def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+        file = tool_parameters.get("file")
+        if not file:
+            yield self.create_text_message("Error: file is required")
+            return
+
+        start_page = self._to_int(tool_parameters.get("pdf_start_page"), 0)
+        end_page = self._to_int(tool_parameters.get("pdf_end_page"), 0)
+        model_config = tool_parameters.get("model")
+        use_llm = self._to_bool(tool_parameters.get("use_llm"), False)
+
+        max_chars_per_page = self._to_int(tool_parameters.get("max_chars_per_page"), 6000)
+        max_chars_per_page = max(800, min(max_chars_per_page, 20000))
+
+        llm_prompt = tool_parameters.get(
+            "llm_prompt",
+            "请基于输入的PDF页面文本做简洁准确摘要，输出中文要点。不要输出思考过程。",
+        )
+
+        pdf_bytes = file.blob
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+        try:
+            num_pages = len(doc)
+            start_page = max(0, min(start_page, num_pages - 1))
+            end_page = max(start_page, min(end_page, num_pages - 1))
+
+            pages_data: list[dict[str, Any]] = []
+            for page_idx in range(start_page, end_page + 1):
+                page = doc[page_idx]
+                page_data = self._extract_page_fast(page, page_idx, max_chars_per_page)
+                pages_data.append(page_data)
+
+            result = {
+                "total_pages_extracted": len(pages_data),
+                "page_range": {"start": start_page, "end": end_page},
+                "pages": pages_data,
+            }
+            yield self.create_json_message(result)
+
+            # Fast local summary first (deterministic, no model latency)
+            local_text = self._build_local_summary(pages_data)
+
+            # Optional LLM refinement, explicitly enabled only
+            if use_llm and model_config:
+                refined = self._summarize_with_llm(local_text, llm_prompt, model_config)
+                final_text = refined if refined else local_text
+            else:
+                final_text = local_text
+
+            if final_text:
+                yield self.create_text_message(final_text)
+        finally:
+            doc.close()
+
+    def _extract_page_fast(self, page: fitz.Page, page_idx: int, max_chars_per_page: int) -> dict[str, Any]:
+        text = (page.get_text("text") or "").strip()
+        if len(text) > max_chars_per_page:
+            text = text[:max_chars_per_page] + "\n...[truncated]"
+
+        tables: list[dict[str, Any]] = []
+        try:
+            tabs = page.find_tables()
+            for tab_idx, tab in enumerate(tabs.tables[:3]):
+                cells = tab.extract() or []
+                tables.append(
+                    {
+                        "index": tab_idx,
+                        "rows": tab.row_count,
+                        "cols": tab.col_count,
+                        "cells": cells[:10],
+                    }
+                )
+        except Exception:
+            pass
+
+        return {
+            "page_number": page_idx,
+            "text": text,
+            "tables": tables,
+            "images": [],
+            "drawings_summary": [],
+            "text_blocks": [],
+            "width": float(page.rect.width),
+            "height": float(page.rect.height),
+        }
+
+    def _build_local_summary(self, pages_data: list[dict[str, Any]]) -> str:
+        """Output actual page content as Markdown (text + tables).
+
+        No LLM needed downstream — the text is already usable Markdown.
+        """
+        parts: list[str] = []
+        for page in pages_data:
+            text = (page.get("text") or "").strip()
+            tables = page.get("tables") or []
+
+            page_parts: list[str] = []
+            if text:
+                page_parts.append(text)
+
+            for tab in tables:
+                cells = tab.get("cells") or []
+                if len(cells) >= 2:
+                    md = self._cells_to_md_table(cells)
+                    if md:
+                        page_parts.append(md)
+
+            if page_parts:
+                parts.append("\n\n".join(page_parts))
+
+        return "\n\n--- 分页 ---\n\n".join(parts)
+
+    @staticmethod
+    def _cells_to_md_table(cells: list) -> str:
+        if not cells:
+            return ""
+        header = cells[0]
+        ncols = len(header)
+        if ncols == 0:
+            return ""
+        clean = lambda c: str(c or "").replace("|", "\\|").replace("\n", " ").strip()
+        lines = [
+            "| " + " | ".join(clean(c) for c in header) + " |",
+            "| " + " | ".join("---" for _ in range(ncols)) + " |",
+        ]
+        for row in cells[1:]:
+            padded = list(row) + [""] * max(0, ncols - len(row))
+            lines.append("| " + " | ".join(clean(c) for c in padded[:ncols]) + " |")
+        return "\n".join(lines)
+
+    def _summarize_with_llm(self, local_text: str, llm_prompt: str, model_config: dict[str, Any]) -> str:
+        response = self.session.model.llm.invoke(
+            model_config=LLMModelConfig(**model_config),
+            prompt_messages=[
+                SystemPromptMessage(content=llm_prompt),
+                UserPromptMessage(content=local_text),
+            ],
+            stream=False,
+        )
+
+        llm_text = ""
+        if hasattr(response, "message") and response.message:
+            content = response.message.content
+            if isinstance(content, str):
+                llm_text = content
+            elif isinstance(content, list):
+                llm_text = "".join(
+                    item.data if hasattr(item, "data") else str(item)
+                    for item in content
+                )
+
+        return self._extract_visible_answer(llm_text)
+
+    @staticmethod
+    def _extract_visible_answer(text: str) -> str:
+        if not text:
+            return ""
+
+        box_match = re.search(r"<\|begin_of_box\|>([\s\S]*?)<\|end_of_box\|>", text)
+        if box_match:
+            text = box_match.group(1)
+        else:
+            text = re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE)
+
+        text = re.sub(r"<\|[^>]+\|>", "", text)
+        return text.strip()
+
+    @staticmethod
+    def _to_int(value: Any, default: int) -> int:
+        try:
+            if value is None or value == "":
+                return default
+            return int(value)
+        except Exception:
+            return default
+
+    @staticmethod
+    def _to_bool(value: Any, default: bool) -> bool:
+        if value is None:
+            return default
+        if isinstance(value, bool):
+            return value
+        s = str(value).strip().lower()
+        if s in {"1", "true", "yes", "on"}:
+            return True
+        if s in {"0", "false", "no", "off"}:
+            return False
+        return default
--- a/difyPlugin/pdf/tools/pdf_summary.yaml
+++ b/difyPlugin/pdf/tools/pdf_summary.yaml
@@ -0,0 +1,99 @@
+identity:
+  name: "pdf_summary"
+  author: "yslg"
+  label:
+    en_US: "PDF Page Summary"
+    zh_Hans: "PDF页面概述"
+    pt_BR: "Resumo de Página PDF"
+    ja_JP: "PDFページ概要"
+description:
+  human:
+    en_US: "Extract core elements (text, image, table, path) from PDF pages with coordinates, then summarize via LLM"
+    zh_Hans: "提取PDF页面核心元素（文本、图片、表格、路径）及坐标，并通过LLM进行概述"
+    pt_BR: "Extrair elementos principais (texto, imagem, tabela, caminho) de páginas PDF com coordenadas e resumir via LLM"
+    ja_JP: "PDFページからコア要素（テキスト、画像、テーブル、パス）を座標付きで抽出し、LLMで要約"
+  llm: "Extract core elements (text, image, table, drawing path) with coordinates from specified PDF page range, then use LLM to summarize the content"
+parameters:
+  - name: file
+    type: file
+    required: true
+    label:
+      en_US: PDF File
+      zh_Hans: PDF 文件
+      pt_BR: Arquivo PDF
+      ja_JP: PDFファイル
+    human_description:
+      en_US: "PDF file to process"
+      zh_Hans: "要处理的 PDF 文件"
+      pt_BR: "Arquivo PDF para processar"
+      ja_JP: "処理するPDFファイル"
+    llm_description: "PDF file to extract elements from and summarize"
+    form: llm
+    fileTypes:
+      - "pdf"
+  - name: pdf_start_page
+    type: number
+    required: true
+    label:
+      en_US: Start Page
+      zh_Hans: 起始页码
+      pt_BR: Página Inicial
+      ja_JP: 開始ページ
+    human_description:
+      en_US: "Start page index (0-based)"
+      zh_Hans: "起始页码（从0开始）"
+      pt_BR: "Índice da página inicial (base 0)"
+      ja_JP: "開始ページ番号（0始まり）"
+    llm_description: "Start page index (0-based) for element extraction"
+    form: llm
+    default: 0
+  - name: pdf_end_page
+    type: number
+    required: true
+    label:
+      en_US: End Page
+      zh_Hans: 结束页码
+      pt_BR: Página Final
+      ja_JP: 終了ページ
+    human_description:
+      en_US: "End page index (0-based, inclusive)"
+      zh_Hans: "结束页码（从0开始，包含该页）"
+      pt_BR: "Índice da página final (base 0, inclusivo)"
+      ja_JP: "終了ページ番号（0始まり、含む）"
+    llm_description: "End page index (0-based, inclusive) for element extraction"
+    form: llm
+    default: 0
+  - name: model
+    type: model-selector
+    scope: llm
+    required: true
+    label:
+      en_US: LLM Model
+      zh_Hans: LLM 模型
+      pt_BR: Modelo LLM
+      ja_JP: LLMモデル
+    human_description:
+      en_US: "LLM model used for summarizing extracted content"
+      zh_Hans: "用于概述提取内容的 LLM 模型"
+      pt_BR: "Modelo LLM usado para resumir o conteúdo extraído"
+      ja_JP: "抽出内容の要約に使用するLLMモデル"
+    form: form
+  - name: llm_prompt
+    type: string
+    required: false
+    label:
+      en_US: LLM Prompt
+      zh_Hans: LLM 提示词
+      pt_BR: Prompt do LLM
+      ja_JP: LLMプロンプト
+    human_description:
+      en_US: "System prompt for LLM summarization"
+      zh_Hans: "LLM 概述的系统提示词"
+      pt_BR: "Prompt do sistema para resumo LLM"
+      ja_JP: "LLM要約用のシステムプロンプト"
+    llm_description: "System prompt guiding LLM on how to summarize the extracted PDF content"
+    form: form
+    default: "你是一个专业的文档分析助手。请根据以下从PDF页面中提取的结构化内容（包含文本、图片信息、表格和矢量图形），对每页内容进行准确、简洁的概述。"
+extra:
+  python:
+    source: tools/pdf_summary.py
--- a/difyPlugin/pdf/tools/pdf_to_markdown.py
+++ b/difyPlugin/pdf/tools/pdf_to_markdown.py
@@ -0,0 +1,335 @@
+import base64
+import re
+from collections import OrderedDict
+from collections.abc import Generator
+from typing import Any
+
+import fitz
+from dify_plugin import Tool
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+
+class PdfToMarkdownTool(Tool):
+    """Convert PDF to a single Markdown file. No LLM needed.
+
+    - Auto-detect TOC and organize content by chapters.
+    - Extract text and tables as Markdown.
+    - Embed raster images as base64.
+    - Render vector drawings as base64 PNG.
+    - Output one .md file via create_blob_message.
+    """
+
+    _TOC_PATTERNS = [
+        r"目录", r"目　录", r"目\u3000录",
+        r"Table of Contents", r"Contents", r"目次",
+    ]
+
+    # ── entry point ──────────────────────────────────────────
+
+    def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+        file = tool_parameters.get("file")
+        if not file:
+            yield self.create_text_message("Error: file is required")
+            return
+
+        include_images = self._to_bool(tool_parameters.get("include_images"), True)
+        image_dpi = self._to_int(tool_parameters.get("image_dpi"), 150)
+        image_dpi = max(72, min(image_dpi, 300))
+        max_image_bytes = 2 * 1024 * 1024  # skip images > 2 MB raw
+
+        doc = fitz.open(stream=file.blob, filetype="pdf")
+        try:
+            num_pages = len(doc)
+
+            # 1) Build chapter map (metadata TOC → printed TOC → none)
+            chapters, content_offset = self._build_chapter_map(doc, num_pages)
+
+            # 2) Convert every page
+            page_mds: list[str] = []
+            for idx in range(num_pages):
+                md = self._page_to_markdown(
+                    doc, doc[idx], idx,
+                    include_images, image_dpi, max_image_bytes,
+                )
+                page_mds.append(md)
+
+            # 3) Assemble
+            if chapters:
+                final_md = self._assemble_by_chapters(
+                    chapters, page_mds, content_offset, num_pages,
+                )
+            else:
+                final_md = "\n\n---\n\n".join(m for m in page_mds if m.strip())
+
+            # 4) Output: text (for variable aggregation) + blob (.md file)
+            yield self.create_text_message(final_md)
+            md_bytes = final_md.encode("utf-8")
+            yield self.create_blob_message(
+                blob=md_bytes,
+                meta={"mime_type": "text/markdown"},
+            )
+        finally:
+            doc.close()
+
+    # ── chapter detection ────────────────────────────────────
+
+    def _build_chapter_map(
+        self, doc: fitz.Document, num_pages: int,
+    ) -> tuple[dict, int]:
+        """Return (chapters_dict, content_offset).
+
+        Try embedded PDF TOC metadata first (reliable page mapping).
+        Fall back to scanning printed TOC pages.
+        """
+        toc = doc.get_toc()
+        if toc:
+            chapters = self._chapters_from_metadata(toc, num_pages)
+            if chapters:
+                return chapters, 0
+
+        toc_start, toc_end = self._find_toc_pages(doc, num_pages)
+        if toc_start is not None and toc_end is not None:
+            toc_text = "\n".join(
+                doc[i].get_text() or "" for i in range(toc_start, toc_end + 1)
+            )
+            chapters = self._parse_toc_lines(toc_text)
+            if chapters:
+                offset = self._guess_offset(chapters, toc_end)
+                return chapters, offset
+
+        return {}, 0
+
+    def _chapters_from_metadata(
+        self, toc: list, num_pages: int,
+    ) -> dict[str, dict[str, int]]:
+        top = [(t, max(0, p - 1)) for lvl, t, p in toc if lvl <= 2 and p >= 1]
+        if not top:
+            return {}
+        chapters: dict[str, dict[str, int]] = OrderedDict()
+        for i, (title, start) in enumerate(top):
+            end = top[i + 1][1] - 1 if i + 1 < len(top) else num_pages - 1
+            chapters[title] = {"start": start, "end": max(start, end)}
+        return chapters
+
+    def _find_toc_pages(self, doc, num_pages):
+        toc_start = toc_end = None
+        for pn in range(min(num_pages, 30)):
+            text = doc[pn].get_text() or ""
+            if any(re.search(p, text, re.IGNORECASE) for p in self._TOC_PATTERNS):
+                if toc_start is None:
+                    toc_start = pn
+                toc_end = pn
+            elif toc_start is not None:
+                break
+        return toc_start, toc_end
+
+    def _parse_toc_lines(self, text: str) -> dict[str, dict[str, int]]:
+        m = re.search(
+            r"^(List\s+of\s+Figures|List\s+of\s+Tables|图目录|表目录)",
+            text, re.IGNORECASE | re.MULTILINE,
+        )
+        if m:
+            text = text[: m.start()]
+
+        pat = re.compile(
+            r"^\s*(?P<title>.+?)\s*(?:\.{2,}|\s)\s*(?P<page>\d{1,5})\s*$"
+        )
+        entries: list[tuple[str, int]] = []
+        for raw in text.splitlines():
+            line = raw.strip()
+            if not line or len(line) < 3 or re.fullmatch(r"\d+", line):
+                continue
+            m2 = pat.match(line)
+            if not m2:
+                continue
+            title = re.sub(r"\s+", " ", m2.group("title")).strip("-_:： ")
+            page = self._to_int(m2.group("page"), None)
+            if not title or page is None or len(title) <= 1:
+                continue
+            if title.lower() in {"page", "pages", "目录", "contents"}:
+                continue
+            entries.append((title, page))
+
+        if not entries:
+            return {}
+
+        dedup: OrderedDict[str, int] = OrderedDict()
+        for t, p in entries:
+            dedup.setdefault(t, p)
+
+        titles = list(dedup.keys())
+        pages = [dedup[t] for t in titles]
+        catalog: dict[str, dict[str, int]] = OrderedDict()
+        for i, t in enumerate(titles):
+            s = pages[i]
+            e = max(s, pages[i + 1] - 1) if i + 1 < len(pages) else s
+            catalog[t] = {"start": s, "end": e}
+        return catalog
+
+    @staticmethod
+    def _guess_offset(chapters: dict, toc_end: int) -> int:
+        first_page = None
+        for info in chapters.values():
+            s = info["start"]
+            if first_page is None or s < first_page:
+                first_page = s
+        if first_page is None:
+            return 0
+        return (toc_end + 1) - first_page
+
+    # ── per-page conversion ──────────────────────────────────
+
+    def _page_to_markdown(
+        self,
+        doc: fitz.Document,
+        page: fitz.Page,
+        page_idx: int,
+        include_images: bool,
+        image_dpi: int,
+        max_image_bytes: int,
+    ) -> str:
+        parts: list[str] = []
+
+        # ── text ──
+        text = (page.get_text("text", sort=True) or "").strip()
+        if text:
+            parts.append(text)
+
+        # ── tables → Markdown ──
+        try:
+            for tab in (page.find_tables().tables or [])[:5]:
+                cells = tab.extract() or []
+                if len(cells) >= 2:
+                    md = self._cells_to_md_table(cells)
+                    if md:
+                        parts.append(md)
+        except Exception:
+            pass
+
+        if not include_images:
+            return "\n\n".join(parts)
+
+        # ── embedded raster images ──
+        try:
+            for img_idx, img_info in enumerate(page.get_images(full=True)):
+                xref = img_info[0]
+                try:
+                    data = doc.extract_image(xref)
+                    if not data or not data.get("image"):
+                        continue
+                    raw = data["image"]
+                    if len(raw) > max_image_bytes:
+                        continue
+                    # skip tiny icons (< 20x20)
+                    w = data.get("width", 0)
+                    h = data.get("height", 0)
+                    if w < 20 and h < 20:
+                        continue
+                    ext = data.get("ext", "png")
+                    mime = "image/jpeg" if ext in ("jpg", "jpeg") else f"image/{ext}"
+                    b64 = base64.b64encode(raw).decode("ascii")
+                    parts.append(
+                        f"![img-p{page_idx}-{img_idx}](data:{mime};base64,{b64})"
+                    )
+                except Exception:
+                    pass
+        except Exception:
+            pass
+
+        # ── vector drawings → render as PNG ──
+        try:
+            drawings = page.get_drawings()
+            if len(drawings) >= 3:
+                valid_rects: list[fitz.Rect] = []
+                for d in drawings:
+                    r = d.get("rect")
+                    if r:
+                        try:
+                            rect = fitz.Rect(r)
+                            if rect.is_valid and not rect.is_empty:
+                                valid_rects.append(rect)
+                        except Exception:
+                            pass
+                if valid_rects:
+                    bbox = valid_rects[0]
+                    for r in valid_rects[1:]:
+                        bbox |= r
+                    bbox &= page.rect
+                    if bbox.width > 30 and bbox.height > 30:
+                        scale = image_dpi / 72
+                        mat = fitz.Matrix(scale, scale)
+                        pix = page.get_pixmap(matrix=mat, clip=bbox)
+                        png = pix.tobytes("png")
+                        if len(png) <= max_image_bytes:
+                            b64 = base64.b64encode(png).decode("ascii")
+                            parts.append(
+                                f"![drawing-p{page_idx}](data:image/png;base64,{b64})"
+                            )
+        except Exception:
+            pass
+
+        return "\n\n".join(parts)
+
+    # ── assembly ─────────────────────────────────────────────
+
+    def _assemble_by_chapters(
+        self,
+        chapters: dict[str, dict[str, int]],
+        page_mds: list[str],
+        offset: int,
+        num_pages: int,
+    ) -> str:
+        parts: list[str] = []
+        for name, info in chapters.items():
+            s = info["start"] + offset
+            e = info["end"] + offset
+            s = max(0, min(s, num_pages - 1))
+            e = max(s, min(e, num_pages - 1))
+            ch: list[str] = [f"# {name}\n"]
+            for idx in range(s, e + 1):
+                if idx < len(page_mds) and page_mds[idx].strip():
+                    ch.append(page_mds[idx])
+            parts.append("\n\n".join(ch))
+        return "\n\n---\n\n".join(parts)
+
+    # ── helpers ──────────────────────────────────────────────
+
+    @staticmethod
+    def _cells_to_md_table(cells: list) -> str:
+        if not cells:
+            return ""
+        header = cells[0]
+        ncols = len(header)
+        if ncols == 0:
+            return ""
+        clean = lambda c: str(c or "").replace("|", "\\|").replace("\n", " ").strip()
+        lines = [
+            "| " + " | ".join(clean(c) for c in header) + " |",
+            "| " + " | ".join("---" for _ in range(ncols)) + " |",
+        ]
+        for row in cells[1:]:
+            padded = list(row) + [""] * max(0, ncols - len(row))
+            lines.append("| " + " | ".join(clean(c) for c in padded[:ncols]) + " |")
+        return "\n".join(lines)
+
+    @staticmethod
+    def _to_int(value: Any, default: int | None) -> int | None:
+        try:
+            if value is None or value == "":
+                return default
+            return int(value)
+        except Exception:
+            return default
+
+    @staticmethod
+    def _to_bool(value: Any, default: bool) -> bool:
+        if value is None:
+            return default
+        if isinstance(value, bool):
+            return value
+        s = str(value).strip().lower()
+        if s in {"1", "true", "yes", "on"}:
+            return True
+        if s in {"0", "false", "no", "off"}:
+            return False
+        return default
--- a/difyPlugin/pdf/tools/pdf_to_markdown.yaml
+++ b/difyPlugin/pdf/tools/pdf_to_markdown.yaml
@@ -0,0 +1,68 @@
+identity:
+  name: "pdf_to_markdown"
+  author: "yslg"
+  label:
+    en_US: "PDF to Markdown"
+    zh_Hans: "PDF转Markdown"
+    pt_BR: "PDF para Markdown"
+    ja_JP: "PDFからMarkdown"
+description:
+  human:
+    en_US: "Convert PDF to a single Markdown file with embedded base64 images. No LLM needed."
+    zh_Hans: "将PDF转换为单个Markdown文件，图片以base64嵌入，无需大模型"
+    pt_BR: "Converter PDF em um arquivo Markdown com imagens base64 incorporadas. Sem LLM."
+    ja_JP: "PDFをbase64画像埋め込みの単一Markdownファイルに変換。LLM不要。"
+  llm: "Convert a PDF file into a single Markdown (.md) file. Extracts text, tables, images (base64), and vector drawings. Auto-detects TOC and organizes by chapters. No LLM needed."
+parameters:
+  - name: file
+    type: file
+    required: true
+    label:
+      en_US: PDF File
+      zh_Hans: PDF 文件
+      pt_BR: Arquivo PDF
+      ja_JP: PDFファイル
+    human_description:
+      en_US: "PDF file to convert"
+      zh_Hans: "要转换的 PDF 文件"
+      pt_BR: "Arquivo PDF para converter"
+      ja_JP: "変換するPDFファイル"
+    llm_description: "PDF file to convert to Markdown"
+    form: llm
+    fileTypes:
+      - "pdf"
+  - name: include_images
+    type: boolean
+    required: false
+    label:
+      en_US: Include Images
+      zh_Hans: 包含图片
+      pt_BR: Incluir Imagens
+      ja_JP: 画像を含める
+    human_description:
+      en_US: "Whether to embed images as base64 in the Markdown output (default: true)"
+      zh_Hans: "是否将图片以base64嵌入Markdown输出（默认：是）"
+      pt_BR: "Se deve incorporar imagens como base64 na saída Markdown (padrão: verdadeiro)"
+      ja_JP: "Markdown出力にbase64として画像を埋め込むかどうか（デフォルト：はい）"
+    llm_description: "Set to true to embed images as base64, false to skip images"
+    form: form
+    default: true
+  - name: image_dpi
+    type: number
+    required: false
+    label:
+      en_US: Image DPI
+      zh_Hans: 图片DPI
+      pt_BR: DPI da Imagem
+      ja_JP: 画像DPI
+    human_description:
+      en_US: "DPI for rendering vector drawings (72-300, default: 150)"
+      zh_Hans: "矢量图渲染DPI（72-300，默认150）"
+      pt_BR: "DPI para renderizar desenhos vetoriais (72-300, padrão: 150)"
+      ja_JP: "ベクター描画のレンダリングDPI（72-300、デフォルト：150）"
+    llm_description: "Resolution for rendering vector drawings as images. Range 72-300, default 150."
+    form: form
+    default: 150
+extra:
+  python:
+    source: tools/pdf_to_markdown.py
--- a/difyPlugin/pdf/tools/pdf_toc.py
+++ b/difyPlugin/pdf/tools/pdf_toc.py
@@ -0,0 +1,273 @@
+import json
+import re
+from collections import OrderedDict
+from collections.abc import Generator
+from typing import Any
+
+from dify_plugin import Tool
+from dify_plugin.entities.model.llm import LLMModelConfig
+from dify_plugin.entities.model.message import SystemPromptMessage, UserPromptMessage
+from dify_plugin.entities.tool import ToolInvokeMessage
+
+_SYSTEM_PROMPT = """You parse PDF table-of-contents text.
+Return only valid JSON object, no markdown fences, no explanation.
+Output schema:
+{
+  "Chapter Name": {"start": 1, "end": 5},
+  "Another": {"start": 6, "end": 20}
+}
+Rules:
+- start/end are integer printed page numbers from TOC.
+- If end is unknown, use same value as start.
+- Keep chapter names exactly as in TOC text.
+"""
+
+
+class PdfTocTool(Tool):
+    def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
+        toc_start = self._to_int(tool_parameters.get("toc_start"), None)
+        toc_end = self._to_int(tool_parameters.get("toc_end"), None)
+        toc_pages = (tool_parameters.get("toc_pages") or "").strip()
+        model_config = tool_parameters.get("model")
+
+        if toc_start is None or toc_end is None:
+            yield self.create_text_message("Error: toc_start and toc_end are required")
+            return
+
+        if not toc_pages:
+            yield self.create_text_message("Error: toc_pages text is empty")
+            return
+
+        cleaned = self._strip_index_lists(toc_pages)
+
+        # 1) deterministic parser first
+        catalog = self._parse_toc_lines(cleaned)
+
+        # 2) optional LLM fallback/enhance only when deterministic parser gives no result
+        llm_raw_output = ""
+        llm_error = None
+        if not catalog and model_config:
+            llm_catalog, llm_raw_output, llm_error = self._parse_with_llm(
+                toc_start=toc_start,
+                toc_end=toc_end,
+                toc_pages=cleaned,
+                model_config=model_config,
+            )
+            if llm_catalog:
+                catalog = self._normalize_catalog(llm_catalog)
+
+        result: dict[str, Any] = {
+            "toc_start": toc_start,
+            "toc_end": toc_end,
+            "catalog": catalog,
+            "meta": {
+                "catalog_size": len(catalog),
+                "parser": "rule" if catalog else "none",
+            },
+        }
+
+        if llm_raw_output:
+            result["meta"]["llm_used"] = True
+        if llm_error:
+            result["meta"]["llm_error"] = llm_error
+
+        # always return valid json text payload for downstream json.loads
+        yield self.create_text_message(json.dumps(result, ensure_ascii=False))
+        yield self.create_json_message(result)
+
+    def _parse_with_llm(
+        self,
+        toc_start: int,
+        toc_end: int,
+        toc_pages: str,
+        model_config: dict[str, Any],
+    ) -> tuple[dict[str, Any] | None, str, str | None]:
+        user_content = (
+            f"TOC page index range: {toc_start}..{toc_end}\n\n"
+            f"TOC raw text:\n{toc_pages}"
+        )
+        response = self.session.model.llm.invoke(
+            model_config=LLMModelConfig(**model_config),
+            prompt_messages=[
+                SystemPromptMessage(content=_SYSTEM_PROMPT),
+                UserPromptMessage(content=user_content),
+            ],
+            stream=False,
+        )
+
+        llm_text = ""
+        if hasattr(response, "message") and response.message:
+            content = response.message.content
+            if isinstance(content, str):
+                llm_text = content
+            elif isinstance(content, list):
+                llm_text = "".join(
+                    item.data if hasattr(item, "data") else str(item) for item in content
+                )
+
+        parsed = self._extract_json_object(llm_text)
+        if parsed is None:
+            return None, llm_text, "Failed to parse LLM output as JSON"
+        if not isinstance(parsed, dict):
+            return None, llm_text, "LLM output JSON is not an object"
+
+        return parsed, llm_text, None
+
+    @staticmethod
+    def _strip_index_lists(text: str) -> str:
+        # Stop before common appendix lists that pollute TOC parsing.
+        pattern = re.compile(
+            r"^(List\s+of\s+Figures|List\s+of\s+Tables|图目录|表目录)",
+            re.IGNORECASE | re.MULTILINE,
+        )
+        m = pattern.search(text)
+        return text[: m.start()].rstrip() if m else text
+
+    def _parse_toc_lines(self, text: str) -> dict[str, dict[str, int]]:
+        """Parse lines like:
+        1.2 Engine Overview ........ 35
+        Appendix A  120
+        """
+        line_pattern = re.compile(
+            r"^\s*(?P<title>.+?)\s*(?:\.{2,}|\s)\s*(?P<page>\d{1,5})\s*$"
+        )
+
+        entries: list[tuple[str, int]] = []
+        for raw in text.splitlines():
+            line = raw.strip()
+            if not line or len(line) < 3:
+                continue
+            if re.fullmatch(r"\d+", line):
+                continue
+
+            m = line_pattern.match(line)
+            if not m:
+                continue
+
+            title = re.sub(r"\s+", " ", m.group("title")).strip("-_:： ")
+            page = self._to_int(m.group("page"), None)
+            if not title or page is None:
+                continue
+
+            # Skip obvious noise.
+            if len(title) <= 1 or title.lower() in {"page", "pages", "目录", "contents"}:
+                continue
+
+            entries.append((title, page))
+
+        if not entries:
+            return {}
+
+        # Deduplicate keeping earliest appearance.
+        dedup: OrderedDict[str, int] = OrderedDict()
+        for title, page in entries:
+            if title not in dedup:
+                dedup[title] = page
+
+        titles = list(dedup.keys())
+        pages = [dedup[t] for t in titles]
+
+        catalog: dict[str, dict[str, int]] = {}
+        for i, title in enumerate(titles):
+            start = pages[i]
+            if i + 1 < len(pages):
+                next_start = pages[i + 1]
+                end = max(start, next_start - 1)
+            else:
+                end = start
+            catalog[title] = {"start": int(start), "end": int(end)}
+
+        return catalog
+
+    def _normalize_catalog(self, raw: dict[str, Any]) -> dict[str, dict[str, int]]:
+        catalog: dict[str, dict[str, int]] = {}
+        source = raw.get("catalog") if isinstance(raw.get("catalog"), dict) else raw
+        if not isinstance(source, dict):
+            return catalog
+
+        for name, value in source.items():
+            if not isinstance(name, str) or not isinstance(value, dict):
+                continue
+            start = self._to_int(value.get("start"), None)
+            end = self._to_int(value.get("end"), start)
+            if start is None:
+                continue
+            if end is None:
+                end = start
+            catalog[name] = {"start": int(start), "end": int(max(start, end))}
+        return catalog
+
+    @staticmethod
+    def _extract_json_object(text: str) -> Any:
+        if not text:
+            return None
+
+        candidates: list[str] = []
+
+        code_blocks = re.findall(r"```(?:json)?\s*([\s\S]*?)\s*```", text, flags=re.IGNORECASE)
+        candidates.extend([c.strip() for c in code_blocks if c.strip()])
+
+        brace_candidate = PdfTocTool._extract_first_brace_object(text)
+        if brace_candidate:
+            candidates.append(brace_candidate)
+
+        candidates.append(text.strip())
+
+        for cand in candidates:
+            parsed = PdfTocTool._json_try_parse(cand)
+            if parsed is not None:
+                return parsed
+        return None
+
+    @staticmethod
+    def _extract_first_brace_object(text: str) -> str | None:
+        start = text.find("{")
+        if start < 0:
+            return None
+
+        depth = 0
+        in_str = False
+        escape = False
+        for i in range(start, len(text)):
+            ch = text[i]
+            if in_str:
+                if escape:
+                    escape = False
+                elif ch == "\\":
+                    escape = True
+                elif ch == '"':
+                    in_str = False
+                continue
+
+            if ch == '"':
+                in_str = True
+            elif ch == "{":
+                depth += 1
+            elif ch == "}":
+                depth -= 1
+                if depth == 0:
+                    return text[start : i + 1]
+        return None
+
+    @staticmethod
+    def _json_try_parse(text: str) -> Any:
+        try:
+            return json.loads(text)
+        except Exception:
+            pass
+
+        # Minimal repair: remove trailing commas before } or ]
+        repaired = re.sub(r",\s*([}\]])", r"\1", text)
+        try:
+            return json.loads(repaired)
+        except Exception:
+            return None
+
+    @staticmethod
+    def _to_int(value: Any, default: int | None) -> int | None:
+        try:
+            if value is None or value == "":
+                return default
+            return int(value)
+        except Exception:
+            return default
--- a/difyPlugin/pdf/tools/pdf_toc.yaml
+++ b/difyPlugin/pdf/tools/pdf_toc.yaml
@@ -0,0 +1,79 @@
+identity:
+  name: "pdf_toc"
+  author: "yslg"
+  label:
+    en_US: "PDF TOC Parser"
+    zh_Hans: "PDF目录解析"
+    pt_BR: "Analisador de Sumário PDF"
+    ja_JP: "PDF目次解析"
+description:
+  human:
+    en_US: "Parse PDF table-of-contents text (from pdf_column_range) into structured JSON catalog via LLM"
+    zh_Hans: "通过LLM将PDF目录文本（来自目录页提取工具的输出）解析为结构化JSON目录"
+    pt_BR: "Analisar texto do sumário PDF em catálogo JSON estruturado via LLM"
+    ja_JP: "LLMを使用してPDF目次テキストを構造化JSONカタログに解析"
+  llm: "Parse PDF table-of-contents text into structured JSON with chapter names and page ranges. Input is the output of pdf_column_range tool (start/end/pages)."
+parameters:
+  - name: toc_start
+    type: number
+    required: true
+    label:
+      en_US: TOC Start Page
+      zh_Hans: 目录起始页
+      pt_BR: Página Inicial do Sumário
+      ja_JP: 目次開始ページ
+    human_description:
+      en_US: "Start page index of TOC (from pdf_column_range output)"
+      zh_Hans: "目录起始页码（来自目录页提取工具输出的 start）"
+      pt_BR: "Índice da página inicial do sumário"
+      ja_JP: "目次の開始ページ番号"
+    llm_description: "Start page index of TOC section, from pdf_column_range output field 'start'"
+    form: llm
+  - name: toc_end
+    type: number
+    required: true
+    label:
+      en_US: TOC End Page
+      zh_Hans: 目录结束页
+      pt_BR: Página Final do Sumário
+      ja_JP: 目次終了ページ
+    human_description:
+      en_US: "End page index of TOC (from pdf_column_range output)"
+      zh_Hans: "目录结束页码（来自目录页提取工具输出的 end）"
+      pt_BR: "Índice da página final do sumário"
+      ja_JP: "目次の終了ページ番号"
+    llm_description: "End page index of TOC section, from pdf_column_range output field 'end'"
+    form: llm
+  - name: toc_pages
+    type: string
+    required: true
+    label:
+      en_US: TOC Page Text
+      zh_Hans: 目录页文本
+      pt_BR: Texto das Páginas do Sumário
+      ja_JP: 目次ページテキスト
+    human_description:
+      en_US: "Raw text content of TOC pages (from pdf_column_range output 'pages' array, joined)"
+      zh_Hans: "目录页原始文本内容（来自目录页提取工具输出的 pages 数组）"
+      pt_BR: "Conteúdo de texto bruto das páginas do sumário"
+      ja_JP: "目次ページの生テキスト内容"
+    llm_description: "Raw text content extracted from TOC pages, from pdf_column_range output field 'pages'"
+    form: llm
+  - name: model
+    type: model-selector
+    scope: llm
+    required: true
+    label:
+      en_US: LLM Model
+      zh_Hans: LLM 模型
+      pt_BR: Modelo LLM
+      ja_JP: LLMモデル
+    human_description:
+      en_US: "LLM model for parsing TOC into structured JSON"
+      zh_Hans: "用于解析目录的 LLM 模型"
+      pt_BR: "Modelo LLM para análise do sumário"
+      ja_JP: "目次解析用のLLMモデル"
+    form: form
+extra:
+  python:
+    source: tools/pdf_toc.py