This commit is contained in:
2026-03-06 14:50:43 +08:00
parent 843146cdd7
commit 91ff28bdcf
18 changed files with 1316 additions and 100 deletions

View File

@@ -0,0 +1,68 @@
identity:
name: "pdf_to_markdown"
author: "yslg"
label:
en_US: "PDF to Markdown"
zh_Hans: "PDF转Markdown"
pt_BR: "PDF para Markdown"
ja_JP: "PDFからMarkdown"
description:
human:
en_US: "Convert PDF to a single Markdown file with embedded base64 images. No LLM needed."
zh_Hans: "将PDF转换为单个Markdown文件图片以base64嵌入无需大模型"
pt_BR: "Converter PDF em um arquivo Markdown com imagens base64 incorporadas. Sem LLM."
ja_JP: "PDFをbase64画像埋め込みの単一Markdownファイルに変換。LLM不要。"
llm: "Convert a PDF file into a single Markdown (.md) file. Extracts text, tables, images (base64), and vector drawings. Auto-detects TOC and organizes by chapters. No LLM needed."
parameters:
- name: file
type: file
required: true
label:
en_US: PDF File
zh_Hans: PDF 文件
pt_BR: Arquivo PDF
ja_JP: PDFファイル
human_description:
en_US: "PDF file to convert"
zh_Hans: "要转换的 PDF 文件"
pt_BR: "Arquivo PDF para converter"
ja_JP: "変換するPDFファイル"
llm_description: "PDF file to convert to Markdown"
form: llm
fileTypes:
- "pdf"
- name: include_images
type: boolean
required: false
label:
en_US: Include Images
zh_Hans: 包含图片
pt_BR: Incluir Imagens
ja_JP: 画像を含める
human_description:
en_US: "Whether to embed images as base64 in the Markdown output (default: true)"
zh_Hans: "是否将图片以base64嵌入Markdown输出默认"
pt_BR: "Se deve incorporar imagens como base64 na saída Markdown (padrão: verdadeiro)"
ja_JP: "Markdown出力にbase64として画像を埋め込むかどうかデフォルトはい"
llm_description: "Set to true to embed images as base64, false to skip images"
form: form
default: true
- name: image_dpi
type: number
required: false
label:
en_US: Image DPI
zh_Hans: 图片DPI
pt_BR: DPI da Imagem
ja_JP: 画像DPI
human_description:
en_US: "DPI for rendering vector drawings (72-300, default: 150)"
zh_Hans: "矢量图渲染DPI72-300默认150"
pt_BR: "DPI para renderizar desenhos vetoriais (72-300, padrão: 150)"
ja_JP: "ベクター描画のレンダリングDPI72-300、デフォルト150"
llm_description: "Resolution for rendering vector drawings as images. Range 72-300, default 150."
form: form
default: 150
extra:
python:
source: tools/pdf_to_markdown.py