This commit is contained in:
2026-03-15 13:00:30 +08:00
parent 91ff28bdcf
commit 136ddc270c
15 changed files with 1459 additions and 1276 deletions

View File

@@ -1,68 +1,51 @@
identity:
identity:
name: "pdf_to_markdown"
author: "yslg"
label:
en_US: "PDF to Markdown"
zh_Hans: "PDFMarkdown"
zh_Hans: "PDF to Markdown"
pt_BR: "PDF para Markdown"
ja_JP: "PDFからMarkdown"
ja_JP: "PDF to Markdown"
description:
human:
en_US: "Convert PDF to a single Markdown file with embedded base64 images. No LLM needed."
zh_Hans: "将PDF转换为单个Markdown文件图片以base64嵌入无需大模型"
pt_BR: "Converter PDF em um arquivo Markdown com imagens base64 incorporadas. Sem LLM."
ja_JP: "PDFをbase64画像埋め込みの単一Markdownファイルに変換。LLM不要。"
llm: "Convert a PDF file into a single Markdown (.md) file. Extracts text, tables, images (base64), and vector drawings. Auto-detects TOC and organizes by chapters. No LLM needed."
en_US: "Convert PDF to Markdown using a catalog array. Images and graphics are ignored."
zh_Hans: "Convert PDF to Markdown using a catalog array. Images and graphics are ignored."
pt_BR: "Convert PDF to Markdown using a catalog array. Images and graphics are ignored."
ja_JP: "Convert PDF to Markdown using a catalog array. Images and graphics are ignored."
llm: "Convert a PDF file into Markdown using a catalog JSON array. Ignore images and graphics."
parameters:
- name: file
type: file
required: true
label:
en_US: PDF File
zh_Hans: PDF 文件
pt_BR: Arquivo PDF
ja_JP: PDFファイル
zh_Hans: PDF File
pt_BR: PDF File
ja_JP: PDF File
human_description:
en_US: "PDF file to convert"
zh_Hans: "要转换的 PDF 文件"
pt_BR: "Arquivo PDF para converter"
ja_JP: "変換するPDFファイル"
zh_Hans: "PDF file to convert"
pt_BR: "PDF file to convert"
ja_JP: "PDF file to convert"
llm_description: "PDF file to convert to Markdown"
form: llm
fileTypes:
- "pdf"
- name: include_images
type: boolean
required: false
- name: catalog
type: string
required: true
label:
en_US: Include Images
zh_Hans: 包含图片
pt_BR: Incluir Imagens
ja_JP: 画像を含める
en_US: Catalog JSON
zh_Hans: Catalog JSON
pt_BR: Catalog JSON
ja_JP: Catalog JSON
human_description:
en_US: "Whether to embed images as base64 in the Markdown output (default: true)"
zh_Hans: "是否将图片以base64嵌入Markdown输出默认"
pt_BR: "Se deve incorporar imagens como base64 na saída Markdown (padrão: verdadeiro)"
ja_JP: "Markdown出力にbase64として画像を埋め込むかどうかデフォルトはい"
llm_description: "Set to true to embed images as base64, false to skip images"
form: form
default: true
- name: image_dpi
type: number
required: false
label:
en_US: Image DPI
zh_Hans: 图片DPI
pt_BR: DPI da Imagem
ja_JP: 画像DPI
human_description:
en_US: "DPI for rendering vector drawings (72-300, default: 150)"
zh_Hans: "矢量图渲染DPI72-300默认150"
pt_BR: "DPI para renderizar desenhos vetoriais (72-300, padrão: 150)"
ja_JP: "ベクター描画のレンダリングDPI72-300、デフォルト150"
llm_description: "Resolution for rendering vector drawings as images. Range 72-300, default 150."
form: form
default: 150
en_US: "Catalog JSON array like [{title,start,end,page_start_index,page_end_index}]"
zh_Hans: "Catalog JSON array like [{title,start,end,page_start_index,page_end_index}]"
pt_BR: "Catalog JSON array like [{title,start,end,page_start_index,page_end_index}]"
ja_JP: "Catalog JSON array like [{title,start,end,page_start_index,page_end_index}]"
llm_description: "Catalog JSON array returned by pdf_toc"
form: llm
extra:
python:
source: tools/pdf_to_markdown.py