37 lines
1.0 KiB
Python
37 lines
1.0 KiB
Python
|
|
from collections.abc import Generator
|
||
|
|
from io import BytesIO
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
import PyPDF2
|
||
|
|
from dify_plugin import Tool
|
||
|
|
from dify_plugin.entities.tool import ToolInvokeMessage
|
||
|
|
|
||
|
|
|
||
|
|
class PdfSinglePageTool(Tool):
|
||
|
|
def _invoke(self, tool_parameters: dict[str, Any]) -> Generator[ToolInvokeMessage]:
|
||
|
|
file = tool_parameters.get("file")
|
||
|
|
page = tool_parameters.get("page", 0)
|
||
|
|
|
||
|
|
if not file:
|
||
|
|
yield self.create_text_message("Error: file is required")
|
||
|
|
return
|
||
|
|
|
||
|
|
pdf_bytes = file.blob
|
||
|
|
reader = PyPDF2.PdfReader(BytesIO(pdf_bytes))
|
||
|
|
num_pages = len(reader.pages)
|
||
|
|
|
||
|
|
page_index = int(page)
|
||
|
|
if page_index < 0:
|
||
|
|
page_index = 0
|
||
|
|
if page_index >= num_pages:
|
||
|
|
page_index = num_pages - 1
|
||
|
|
|
||
|
|
selected_page = reader.pages[page_index]
|
||
|
|
text = selected_page.extract_text() or ""
|
||
|
|
|
||
|
|
yield self.create_json_message({
|
||
|
|
"start": page_index,
|
||
|
|
"end": page_index,
|
||
|
|
"pages": [text]
|
||
|
|
})
|