Files
urbanLifeline/difyPlugin/pdf/tools/pdf_toc.yaml
2026-03-06 14:50:43 +08:00

80 lines
3.0 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

identity:
name: "pdf_toc"
author: "yslg"
label:
en_US: "PDF TOC Parser"
zh_Hans: "PDF目录解析"
pt_BR: "Analisador de Sumário PDF"
ja_JP: "PDF目次解析"
description:
human:
en_US: "Parse PDF table-of-contents text (from pdf_column_range) into structured JSON catalog via LLM"
zh_Hans: "通过LLM将PDF目录文本来自目录页提取工具的输出解析为结构化JSON目录"
pt_BR: "Analisar texto do sumário PDF em catálogo JSON estruturado via LLM"
ja_JP: "LLMを使用してPDF目次テキストを構造化JSONカタログに解析"
llm: "Parse PDF table-of-contents text into structured JSON with chapter names and page ranges. Input is the output of pdf_column_range tool (start/end/pages)."
parameters:
- name: toc_start
type: number
required: true
label:
en_US: TOC Start Page
zh_Hans: 目录起始页
pt_BR: Página Inicial do Sumário
ja_JP: 目次開始ページ
human_description:
en_US: "Start page index of TOC (from pdf_column_range output)"
zh_Hans: "目录起始页码(来自目录页提取工具输出的 start"
pt_BR: "Índice da página inicial do sumário"
ja_JP: "目次の開始ページ番号"
llm_description: "Start page index of TOC section, from pdf_column_range output field 'start'"
form: llm
- name: toc_end
type: number
required: true
label:
en_US: TOC End Page
zh_Hans: 目录结束页
pt_BR: Página Final do Sumário
ja_JP: 目次終了ページ
human_description:
en_US: "End page index of TOC (from pdf_column_range output)"
zh_Hans: "目录结束页码(来自目录页提取工具输出的 end"
pt_BR: "Índice da página final do sumário"
ja_JP: "目次の終了ページ番号"
llm_description: "End page index of TOC section, from pdf_column_range output field 'end'"
form: llm
- name: toc_pages
type: string
required: true
label:
en_US: TOC Page Text
zh_Hans: 目录页文本
pt_BR: Texto das Páginas do Sumário
ja_JP: 目次ページテキスト
human_description:
en_US: "Raw text content of TOC pages (from pdf_column_range output 'pages' array, joined)"
zh_Hans: "目录页原始文本内容(来自目录页提取工具输出的 pages 数组)"
pt_BR: "Conteúdo de texto bruto das páginas do sumário"
ja_JP: "目次ページの生テキスト内容"
llm_description: "Raw text content extracted from TOC pages, from pdf_column_range output field 'pages'"
form: llm
- name: model
type: model-selector
scope: llm
required: true
label:
en_US: LLM Model
zh_Hans: LLM 模型
pt_BR: Modelo LLM
ja_JP: LLMモデル
human_description:
en_US: "LLM model for parsing TOC into structured JSON"
zh_Hans: "用于解析目录的 LLM 模型"
pt_BR: "Modelo LLM para análise do sumário"
ja_JP: "目次解析用のLLMモデル"
form: form
extra:
python:
source: tools/pdf_toc.py