urbanLifeline/difyPlugin/pdf/tools/pdf_toc.yaml

identity:
  name: "pdf_toc"
  author: "yslg"
  label:
    en_US: "PDF TOC"
    zh_Hans: "PDF 目录提取"
    pt_BR: "PDF TOC"
    ja_JP: "PDF TOC"
description:
  human:
    en_US: "Extract the catalog array from a PDF file using metadata or LLM."
    zh_Hans: "从PDF文件中提取目录数组，优先使用元数据，回退使用LLM解析。"
    pt_BR: "Extrair o array de catálogo de um arquivo PDF."
    ja_JP: "PDFファイルからカタログ配列を抽出する。"
  llm: "Extract a catalog array from a PDF file. Returns JSON text like [{title,start,end,page_start_index,page_end_index}]."
parameters:
  - name: file
    type: file
    required: true
    label:
      en_US: PDF File
      zh_Hans: PDF 文件
      pt_BR: PDF File
      ja_JP: PDF File
    human_description:
      en_US: "PDF file to inspect"
      zh_Hans: "要解析的PDF文件"
      pt_BR: "PDF file to inspect"
      ja_JP: "PDF file to inspect"
    llm_description: "PDF file to extract catalog from"
    form: llm
    fileTypes:
      - "pdf"
  - name: model
    type: model-selector
    scope: llm
    required: true
    label:
      en_US: LLM Model
      zh_Hans: LLM 模型
      pt_BR: Modelo LLM
      ja_JP: LLMモデル
    human_description:
      en_US: "LLM model used for parsing TOC when metadata is unavailable"
      zh_Hans: "当元数据不可用时，用于解析目录的LLM模型"
      pt_BR: "Modelo LLM para análise de TOC"
      ja_JP: "メタデータが利用できない場合のTOC解析用LLMモデル"
    form: form
extra:
  python:
    source: tools/pdf_toc.py