Files
urbanLifeline/difyPlugin/pdf/tools/pdf_single_page.py
2026-03-06 14:50:43 +08:00

45 lines
1.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from collections.abc import Generator
from io import BytesIO
from typing import Any
import fitz # PyMuPDF 核心库
from dify_plugin import Tool
from dify_plugin.entities.tool import ToolInvokeMessage
class PdfSinglePageTool(Tool):
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
file = tool_parameters.get("file")
page = tool_parameters.get("page", 0)
if not file:
yield self.create_text_message("Error: file is required")
return
# 从字节流加载 PDF替换 PyPDF2 的 PdfReader
pdf_bytes = file.blob
doc = fitz.open(stream=pdf_bytes, filetype="pdf") # 字节流方式打开
num_pages = len(doc)
# 页码边界处理(逻辑与原代码一致)
page_index = int(page)
if page_index < 0:
page_index = 0
if page_index >= num_pages:
page_index = num_pages - 1
# 提取指定页面文本PyMuPDF 方式)
selected_page = doc[page_index]
text = selected_page.get_text() or "" # get_text() 提取文本,比 PyPDF2 更精准
# 关闭文档释放资源
doc.close()
result = {
"start": page_index,
"end": page_index,
"pages": [text]
}
yield self.create_text_message(json.dumps(result, ensure_ascii=False))
yield self.create_json_message(result)