This commit is contained in:
2026-03-06 14:50:43 +08:00
parent 843146cdd7
commit 91ff28bdcf
18 changed files with 1316 additions and 100 deletions

View File

@@ -0,0 +1,68 @@
identity:
name: "pdf_extract_range"
author: "yslg"
label:
en_US: "Extract Page Range Text"
zh_Hans: "提取页面范围文本"
pt_BR: "Extrair Texto do Intervalo de Páginas"
ja_JP: "ページ範囲テキスト抽出"
description:
human:
en_US: "Extract plain text from a specified page range of a PDF file"
zh_Hans: "从PDF文件的指定页码范围提取纯文本"
pt_BR: "Extrair texto simples de um intervalo de páginas especificado de um arquivo PDF"
ja_JP: "PDFファイルの指定ページ範囲からプレーンテキストを抽出"
llm: "Extract plain text from PDF pages in the given start-end range. Returns concatenated text of all pages in range."
parameters:
- name: file
type: file
required: true
label:
en_US: PDF File
zh_Hans: PDF 文件
pt_BR: Arquivo PDF
ja_JP: PDFファイル
human_description:
en_US: "PDF file to extract text from"
zh_Hans: "要提取文本的 PDF 文件"
pt_BR: "Arquivo PDF para extrair texto"
ja_JP: "テキストを抽出するPDFファイル"
llm_description: "PDF file to extract page range text from"
form: llm
fileTypes:
- "pdf"
- name: start_page
type: number
required: true
label:
en_US: Start Page
zh_Hans: 起始页码
pt_BR: Página Inicial
ja_JP: 開始ページ
human_description:
en_US: "Start page index (0-based)"
zh_Hans: "起始页码从0开始"
pt_BR: "Índice da página inicial (base 0)"
ja_JP: "開始ページ番号0始まり"
llm_description: "Start page index (0-based)"
form: llm
default: 0
- name: end_page
type: number
required: true
label:
en_US: End Page
zh_Hans: 结束页码
pt_BR: Página Final
ja_JP: 終了ページ
human_description:
en_US: "End page index (0-based, inclusive)"
zh_Hans: "结束页码从0开始包含该页"
pt_BR: "Índice da página final (base 0, inclusivo)"
ja_JP: "終了ページ番号0始まり、含む"
llm_description: "End page index (0-based, inclusive)"
form: llm
default: 0
extra:
python:
source: tools/pdf_extract_range.py