import json from collections.abc import Generator from io import BytesIO from typing import Any import fitz # PyMuPDF 核心库 from dify_plugin import Tool from dify_plugin.entities.tool import ToolInvokeMessage class PdfSinglePageTool(Tool): def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]: file = tool_parameters.get("file") page = tool_parameters.get("page", 0) if not file: yield self.create_text_message("Error: file is required") return # 从字节流加载 PDF(替换 PyPDF2 的 PdfReader) pdf_bytes = file.blob doc = fitz.open(stream=pdf_bytes, filetype="pdf") # 字节流方式打开 num_pages = len(doc) # 页码边界处理(逻辑与原代码一致) page_index = int(page) if page_index < 0: page_index = 0 if page_index >= num_pages: page_index = num_pages - 1 # 提取指定页面文本(PyMuPDF 方式) selected_page = doc[page_index] text = selected_page.get_text() or "" # get_text() 提取文本,比 PyPDF2 更精准 # 关闭文档释放资源 doc.close() result = { "start": page_index, "end": page_index, "pages": [text] } yield self.create_text_message(json.dumps(result, ensure_ascii=False)) yield self.create_json_message(result)