80 lines
3.0 KiB
YAML
80 lines
3.0 KiB
YAML
|
|
identity:
|
|||
|
|
name: "pdf_toc"
|
|||
|
|
author: "yslg"
|
|||
|
|
label:
|
|||
|
|
en_US: "PDF TOC Parser"
|
|||
|
|
zh_Hans: "PDF目录解析"
|
|||
|
|
pt_BR: "Analisador de Sumário PDF"
|
|||
|
|
ja_JP: "PDF目次解析"
|
|||
|
|
description:
|
|||
|
|
human:
|
|||
|
|
en_US: "Parse PDF table-of-contents text (from pdf_column_range) into structured JSON catalog via LLM"
|
|||
|
|
zh_Hans: "通过LLM将PDF目录文本(来自目录页提取工具的输出)解析为结构化JSON目录"
|
|||
|
|
pt_BR: "Analisar texto do sumário PDF em catálogo JSON estruturado via LLM"
|
|||
|
|
ja_JP: "LLMを使用してPDF目次テキストを構造化JSONカタログに解析"
|
|||
|
|
llm: "Parse PDF table-of-contents text into structured JSON with chapter names and page ranges. Input is the output of pdf_column_range tool (start/end/pages)."
|
|||
|
|
parameters:
|
|||
|
|
- name: toc_start
|
|||
|
|
type: number
|
|||
|
|
required: true
|
|||
|
|
label:
|
|||
|
|
en_US: TOC Start Page
|
|||
|
|
zh_Hans: 目录起始页
|
|||
|
|
pt_BR: Página Inicial do Sumário
|
|||
|
|
ja_JP: 目次開始ページ
|
|||
|
|
human_description:
|
|||
|
|
en_US: "Start page index of TOC (from pdf_column_range output)"
|
|||
|
|
zh_Hans: "目录起始页码(来自目录页提取工具输出的 start)"
|
|||
|
|
pt_BR: "Índice da página inicial do sumário"
|
|||
|
|
ja_JP: "目次の開始ページ番号"
|
|||
|
|
llm_description: "Start page index of TOC section, from pdf_column_range output field 'start'"
|
|||
|
|
form: llm
|
|||
|
|
- name: toc_end
|
|||
|
|
type: number
|
|||
|
|
required: true
|
|||
|
|
label:
|
|||
|
|
en_US: TOC End Page
|
|||
|
|
zh_Hans: 目录结束页
|
|||
|
|
pt_BR: Página Final do Sumário
|
|||
|
|
ja_JP: 目次終了ページ
|
|||
|
|
human_description:
|
|||
|
|
en_US: "End page index of TOC (from pdf_column_range output)"
|
|||
|
|
zh_Hans: "目录结束页码(来自目录页提取工具输出的 end)"
|
|||
|
|
pt_BR: "Índice da página final do sumário"
|
|||
|
|
ja_JP: "目次の終了ページ番号"
|
|||
|
|
llm_description: "End page index of TOC section, from pdf_column_range output field 'end'"
|
|||
|
|
form: llm
|
|||
|
|
- name: toc_pages
|
|||
|
|
type: string
|
|||
|
|
required: true
|
|||
|
|
label:
|
|||
|
|
en_US: TOC Page Text
|
|||
|
|
zh_Hans: 目录页文本
|
|||
|
|
pt_BR: Texto das Páginas do Sumário
|
|||
|
|
ja_JP: 目次ページテキスト
|
|||
|
|
human_description:
|
|||
|
|
en_US: "Raw text content of TOC pages (from pdf_column_range output 'pages' array, joined)"
|
|||
|
|
zh_Hans: "目录页原始文本内容(来自目录页提取工具输出的 pages 数组)"
|
|||
|
|
pt_BR: "Conteúdo de texto bruto das páginas do sumário"
|
|||
|
|
ja_JP: "目次ページの生テキスト内容"
|
|||
|
|
llm_description: "Raw text content extracted from TOC pages, from pdf_column_range output field 'pages'"
|
|||
|
|
form: llm
|
|||
|
|
- name: model
|
|||
|
|
type: model-selector
|
|||
|
|
scope: llm
|
|||
|
|
required: true
|
|||
|
|
label:
|
|||
|
|
en_US: LLM Model
|
|||
|
|
zh_Hans: LLM 模型
|
|||
|
|
pt_BR: Modelo LLM
|
|||
|
|
ja_JP: LLMモデル
|
|||
|
|
human_description:
|
|||
|
|
en_US: "LLM model for parsing TOC into structured JSON"
|
|||
|
|
zh_Hans: "用于解析目录的 LLM 模型"
|
|||
|
|
pt_BR: "Modelo LLM para análise do sumário"
|
|||
|
|
ja_JP: "目次解析用のLLMモデル"
|
|||
|
|
form: form
|
|||
|
|
extra:
|
|||
|
|
python:
|
|||
|
|
source: tools/pdf_toc.py
|