1001 lines
28 KiB
YAML
1001 lines
28 KiB
YAML
|
|
app:
|
|||
|
|
description: 优化版:支持大文件PDF处理,跨页表格/段落智能识别合并
|
|||
|
|
icon: 🤖
|
|||
|
|
icon_background: '#FFEAD5'
|
|||
|
|
mode: workflow
|
|||
|
|
name: 数据清洗-大文件处理
|
|||
|
|
use_icon_as_answer_icon: false
|
|||
|
|
dependencies:
|
|||
|
|
- current_identifier: null
|
|||
|
|
type: marketplace
|
|||
|
|
value:
|
|||
|
|
marketplace_plugin_unique_identifier: samanhappy/word_process:0.0.1@003ecc76645cf2d5160d4e009a29d8eba2946eaaf7134c49971c3b9fedbfab0d
|
|||
|
|
version: null
|
|||
|
|
- current_identifier: null
|
|||
|
|
type: marketplace
|
|||
|
|
value:
|
|||
|
|
marketplace_plugin_unique_identifier: langgenius/siliconflow:0.0.44@9dac23fe837d6da24a2cd9ef959c1c93e4e094b7562ad8a2fd3d4cc86c0e3e89
|
|||
|
|
version: null
|
|||
|
|
- current_identifier: null
|
|||
|
|
type: marketplace
|
|||
|
|
value:
|
|||
|
|
marketplace_plugin_unique_identifier: bowenliang123/md_exporter:3.6.9@3f027d63e80b44d5d5a9f706871afaef37905b8f8a89a2d152dc530211a8acb1
|
|||
|
|
version: null
|
|||
|
|
- current_identifier: null
|
|||
|
|
type: package
|
|||
|
|
value:
|
|||
|
|
plugin_unique_identifier: yslg/pdf:0.0.1@5e83b87d38ad55c2a1e929311d21a86cef5f9e04394b977b3ba16eb34de08b36
|
|||
|
|
version: null
|
|||
|
|
kind: app
|
|||
|
|
version: 0.5.0
|
|||
|
|
workflow:
|
|||
|
|
conversation_variables: []
|
|||
|
|
environment_variables: []
|
|||
|
|
features:
|
|||
|
|
file_upload:
|
|||
|
|
allowed_file_extensions:
|
|||
|
|
- .JPG
|
|||
|
|
- .JPEG
|
|||
|
|
- .PNG
|
|||
|
|
- .GIF
|
|||
|
|
- .WEBP
|
|||
|
|
- .SVG
|
|||
|
|
- .PDF
|
|||
|
|
- .pdf
|
|||
|
|
allowed_file_types:
|
|||
|
|
- image
|
|||
|
|
- document
|
|||
|
|
allowed_file_upload_methods:
|
|||
|
|
- local_file
|
|||
|
|
- remote_url
|
|||
|
|
enabled: false
|
|||
|
|
fileUploadConfig:
|
|||
|
|
audio_file_size_limit: 50
|
|||
|
|
batch_count_limit: 5
|
|||
|
|
file_size_limit: 500
|
|||
|
|
image_file_batch_limit: 10
|
|||
|
|
image_file_size_limit: 10
|
|||
|
|
single_chunk_attachment_limit: 10
|
|||
|
|
video_file_size_limit: 100
|
|||
|
|
workflow_file_upload_limit: 10
|
|||
|
|
image:
|
|||
|
|
enabled: false
|
|||
|
|
number_limits: 3
|
|||
|
|
transfer_methods:
|
|||
|
|
- local_file
|
|||
|
|
- remote_url
|
|||
|
|
number_limits: 3
|
|||
|
|
opening_statement: ''
|
|||
|
|
retriever_resource:
|
|||
|
|
enabled: true
|
|||
|
|
sensitive_word_avoidance:
|
|||
|
|
enabled: false
|
|||
|
|
speech_to_text:
|
|||
|
|
enabled: false
|
|||
|
|
suggested_questions: []
|
|||
|
|
suggested_questions_after_answer:
|
|||
|
|
enabled: false
|
|||
|
|
text_to_speech:
|
|||
|
|
enabled: false
|
|||
|
|
language: ''
|
|||
|
|
voice: ''
|
|||
|
|
graph:
|
|||
|
|
edges:
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: start
|
|||
|
|
targetType: if-else
|
|||
|
|
id: 1770703294598-source-1770703342256-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703294598'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770703342256'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: if-else
|
|||
|
|
targetType: llm
|
|||
|
|
id: 1770703342256-true-1770703393190-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703342256'
|
|||
|
|
sourceHandle: 'true'
|
|||
|
|
target: '1770703393190'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: if-else
|
|||
|
|
targetType: llm
|
|||
|
|
id: 1770703342256-93d5294c-5984-4bc0-b30d-cd9e2ffba28d-1770703524412-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703342256'
|
|||
|
|
sourceHandle: 93d5294c-5984-4bc0-b30d-cd9e2ffba28d
|
|||
|
|
target: '1770703524412'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: llm
|
|||
|
|
targetType: variable-aggregator
|
|||
|
|
id: 1770703393190-source-1770703625287-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703393190'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770703625287'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: llm
|
|||
|
|
targetType: variable-aggregator
|
|||
|
|
id: 1770703524412-source-1770703625287-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703524412'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770703625287'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: if-else
|
|||
|
|
targetType: if-else
|
|||
|
|
id: 1770703342256-6556b05e-3266-4aa7-b196-ec41f5dd766b-1772348592076-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703342256'
|
|||
|
|
sourceHandle: 6556b05e-3266-4aa7-b196-ec41f5dd766b
|
|||
|
|
target: '1772348592076'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: if-else
|
|||
|
|
targetType: document-extractor
|
|||
|
|
id: 1772348592076-false-1770703633813-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1772348592076'
|
|||
|
|
sourceHandle: 'false'
|
|||
|
|
target: '1770703633813'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: if-else
|
|||
|
|
targetType: tool
|
|||
|
|
id: 1772348592076-0b4fd2d4-a592-4421-acbb-822db3004219-1772349027446-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1772348592076'
|
|||
|
|
sourceHandle: 0b4fd2d4-a592-4421-acbb-822db3004219
|
|||
|
|
target: '1772349027446'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: document-extractor
|
|||
|
|
targetType: variable-aggregator
|
|||
|
|
id: 1770703633813-source-1772348969241-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703633813'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1772348969241'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: tool
|
|||
|
|
targetType: variable-aggregator
|
|||
|
|
id: 1772349027446-source-1772348969241-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1772349027446'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1772348969241'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: variable-aggregator
|
|||
|
|
targetType: llm
|
|||
|
|
id: 1770703625287-source-1770703671732-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703625287'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770703671732'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: llm
|
|||
|
|
targetType: tool
|
|||
|
|
id: 1770703671732-source-1770704285657-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1770703671732'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770704285657'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: if-else
|
|||
|
|
targetType: tool
|
|||
|
|
id: 1772348592076-true-1772527425324-target
|
|||
|
|
selected: false
|
|||
|
|
source: '1772348592076'
|
|||
|
|
sourceHandle: 'true'
|
|||
|
|
target: '1772527425324'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: variable-aggregator
|
|||
|
|
targetType: variable-aggregator
|
|||
|
|
id: 1772348969241-source-1770703625287-target
|
|||
|
|
source: '1772348969241'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770703625287'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: tool
|
|||
|
|
targetType: end
|
|||
|
|
id: 1770704285657-source-1770704288628-target
|
|||
|
|
source: '1770704285657'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1770704288628'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
- data:
|
|||
|
|
isInIteration: false
|
|||
|
|
isInLoop: false
|
|||
|
|
sourceType: tool
|
|||
|
|
targetType: end
|
|||
|
|
id: 1772527425324-source-1772779766541-target
|
|||
|
|
source: '1772527425324'
|
|||
|
|
sourceHandle: source
|
|||
|
|
target: '1772779766541'
|
|||
|
|
targetHandle: target
|
|||
|
|
type: custom
|
|||
|
|
zIndex: 0
|
|||
|
|
nodes:
|
|||
|
|
- data:
|
|||
|
|
selected: false
|
|||
|
|
title: 用户输入
|
|||
|
|
type: start
|
|||
|
|
variables:
|
|||
|
|
- allowed_file_extensions: []
|
|||
|
|
allowed_file_types:
|
|||
|
|
- image
|
|||
|
|
- document
|
|||
|
|
- video
|
|||
|
|
allowed_file_upload_methods:
|
|||
|
|
- local_file
|
|||
|
|
- remote_url
|
|||
|
|
default: ''
|
|||
|
|
hint: ''
|
|||
|
|
label: 文件
|
|||
|
|
max_length: 48
|
|||
|
|
options: []
|
|||
|
|
placeholder: ''
|
|||
|
|
required: true
|
|||
|
|
type: file
|
|||
|
|
variable: file
|
|||
|
|
height: 109
|
|||
|
|
id: '1770703294598'
|
|||
|
|
position:
|
|||
|
|
x: 0
|
|||
|
|
y: 55
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 0
|
|||
|
|
y: 55
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
cases:
|
|||
|
|
- case_id: 'true'
|
|||
|
|
conditions:
|
|||
|
|
- comparison_operator: in
|
|||
|
|
id: f88f279e-5736-4b1b-98cf-f8a9621531a0
|
|||
|
|
value:
|
|||
|
|
- image
|
|||
|
|
varType: file
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
- type
|
|||
|
|
id: 'true'
|
|||
|
|
logical_operator: and
|
|||
|
|
- case_id: 93d5294c-5984-4bc0-b30d-cd9e2ffba28d
|
|||
|
|
conditions:
|
|||
|
|
- comparison_operator: in
|
|||
|
|
id: 48e8d32a-59c5-4573-8e8a-355dc73a39fc
|
|||
|
|
value:
|
|||
|
|
- video
|
|||
|
|
varType: file
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
- type
|
|||
|
|
id: 93d5294c-5984-4bc0-b30d-cd9e2ffba28d
|
|||
|
|
logical_operator: and
|
|||
|
|
- case_id: 6556b05e-3266-4aa7-b196-ec41f5dd766b
|
|||
|
|
conditions:
|
|||
|
|
- comparison_operator: in
|
|||
|
|
id: 9916110c-edf7-4a4a-b324-2f8d85c73299
|
|||
|
|
value:
|
|||
|
|
- document
|
|||
|
|
varType: file
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
- type
|
|||
|
|
id: 6556b05e-3266-4aa7-b196-ec41f5dd766b
|
|||
|
|
logical_operator: and
|
|||
|
|
selected: false
|
|||
|
|
title: 条件分支
|
|||
|
|
type: if-else
|
|||
|
|
height: 220
|
|||
|
|
id: '1770703342256'
|
|||
|
|
position:
|
|||
|
|
x: 342
|
|||
|
|
y: 0
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 342
|
|||
|
|
y: 0
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
context:
|
|||
|
|
enabled: false
|
|||
|
|
variable_selector: []
|
|||
|
|
model:
|
|||
|
|
completion_params:
|
|||
|
|
enable_thinking: true
|
|||
|
|
temperature: 0.7
|
|||
|
|
mode: chat
|
|||
|
|
name: zai-org/GLM-4.6V
|
|||
|
|
provider: langgenius/siliconflow/siliconflow
|
|||
|
|
prompt_template:
|
|||
|
|
- id: 4b1706f6-3216-4fb7-a6dc-978ce43ff491
|
|||
|
|
role: system
|
|||
|
|
text: 识别图片中所有内容和文字,并进行合理的描述编排
|
|||
|
|
reasoning_format: separated
|
|||
|
|
selected: false
|
|||
|
|
title: 图片理解
|
|||
|
|
type: llm
|
|||
|
|
vision:
|
|||
|
|
configs:
|
|||
|
|
detail: high
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
enabled: true
|
|||
|
|
height: 88
|
|||
|
|
id: '1770703393190'
|
|||
|
|
position:
|
|||
|
|
x: 2772
|
|||
|
|
y: 82
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 2772
|
|||
|
|
y: 82
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
context:
|
|||
|
|
enabled: false
|
|||
|
|
variable_selector: []
|
|||
|
|
model:
|
|||
|
|
completion_params: {}
|
|||
|
|
mode: chat
|
|||
|
|
name: Pro/moonshotai/Kimi-K2.5
|
|||
|
|
provider: langgenius/siliconflow/siliconflow
|
|||
|
|
prompt_template:
|
|||
|
|
- id: 497bebc3-5e75-4c2b-940c-ba485dc1e51a
|
|||
|
|
role: system
|
|||
|
|
text: 识别视频中所有内容和文字,并进行合理的描述编排
|
|||
|
|
reasoning_format: separated
|
|||
|
|
selected: false
|
|||
|
|
title: 视频理解
|
|||
|
|
type: llm
|
|||
|
|
vision:
|
|||
|
|
configs:
|
|||
|
|
detail: high
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
enabled: true
|
|||
|
|
height: 88
|
|||
|
|
id: '1770703524412'
|
|||
|
|
position:
|
|||
|
|
x: 1770
|
|||
|
|
y: 177
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 1770
|
|||
|
|
y: 177
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
cases:
|
|||
|
|
- case_id: 'true'
|
|||
|
|
conditions:
|
|||
|
|
- comparison_operator: contains
|
|||
|
|
id: 7a6d2b1e-9704-41f3-aeba-40c6e2484d56
|
|||
|
|
value: pdf
|
|||
|
|
varType: string
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
- extension
|
|||
|
|
id: 'true'
|
|||
|
|
logical_operator: and
|
|||
|
|
- case_id: 0b4fd2d4-a592-4421-acbb-822db3004219
|
|||
|
|
conditions:
|
|||
|
|
- comparison_operator: contains
|
|||
|
|
id: 67767b34-ad03-48f4-80ef-100eb78e13ab
|
|||
|
|
value: doc
|
|||
|
|
varType: file
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
- extension
|
|||
|
|
logical_operator: and
|
|||
|
|
selected: false
|
|||
|
|
title: 条件分支 2
|
|||
|
|
type: if-else
|
|||
|
|
height: 172
|
|||
|
|
id: '1772348592076'
|
|||
|
|
position:
|
|||
|
|
x: 704
|
|||
|
|
y: 424
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 704
|
|||
|
|
y: 424
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
is_array_file: false
|
|||
|
|
selected: false
|
|||
|
|
title: 文档提取器
|
|||
|
|
type: document-extractor
|
|||
|
|
variable_selector:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
height: 104
|
|||
|
|
id: '1770703633813'
|
|||
|
|
position:
|
|||
|
|
x: 1066
|
|||
|
|
y: 337
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 1066
|
|||
|
|
y: 337
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
is_team_authorization: true
|
|||
|
|
paramSchemas:
|
|||
|
|
- auto_generate: null
|
|||
|
|
default: null
|
|||
|
|
form: llm
|
|||
|
|
human_description:
|
|||
|
|
en_US: Word file to extract text and images from
|
|||
|
|
ja_JP: Word file to extract text and images from
|
|||
|
|
pt_BR: Word file to extract text and images from
|
|||
|
|
zh_Hans: 要提取文本和图片的Word文件
|
|||
|
|
label:
|
|||
|
|
en_US: Word Content
|
|||
|
|
ja_JP: Word Content
|
|||
|
|
pt_BR: Word Content
|
|||
|
|
zh_Hans: Word 内容
|
|||
|
|
llm_description: Word file content to be extracted
|
|||
|
|
max: null
|
|||
|
|
min: null
|
|||
|
|
name: word_content
|
|||
|
|
options: []
|
|||
|
|
placeholder: null
|
|||
|
|
precision: null
|
|||
|
|
required: true
|
|||
|
|
scope: null
|
|||
|
|
template: null
|
|||
|
|
type: file
|
|||
|
|
params:
|
|||
|
|
word_content: ''
|
|||
|
|
plugin_id: samanhappy/word_process
|
|||
|
|
plugin_unique_identifier: samanhappy/word_process:0.0.1@003ecc76645cf2d5160d4e009a29d8eba2946eaaf7134c49971c3b9fedbfab0d
|
|||
|
|
provider_icon: https://dify.org.xyzh.yslg/console/api/workspaces/current/plugin/icon?tenant_id=fe3bcf55-9a04-4850-8473-7f97e1c09b97&filename=cb0643689e2f8152d38c44a267a459fae99ff208b0bc164e27ccb053fc1844cd.svg
|
|||
|
|
provider_id: samanhappy/word_process/word_process
|
|||
|
|
provider_name: samanhappy/word_process/word_process
|
|||
|
|
provider_type: builtin
|
|||
|
|
selected: false
|
|||
|
|
title: Word提取器
|
|||
|
|
tool_configurations: {}
|
|||
|
|
tool_description: 一个将Word文件提取为文本和图片的工具
|
|||
|
|
tool_label: Word提取器
|
|||
|
|
tool_name: word_extractor
|
|||
|
|
tool_node_version: '2'
|
|||
|
|
tool_parameters:
|
|||
|
|
word_content:
|
|||
|
|
type: variable
|
|||
|
|
value:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
type: tool
|
|||
|
|
height: 52
|
|||
|
|
id: '1772349027446'
|
|||
|
|
position:
|
|||
|
|
x: 1066
|
|||
|
|
y: 521
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 1066
|
|||
|
|
y: 521
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
output_type: string
|
|||
|
|
selected: false
|
|||
|
|
title: 文档提取聚合
|
|||
|
|
type: variable-aggregator
|
|||
|
|
variables:
|
|||
|
|
- - '1772349027446'
|
|||
|
|
- text
|
|||
|
|
- - '1770703633813'
|
|||
|
|
- text
|
|||
|
|
height: 134
|
|||
|
|
id: '1772348969241'
|
|||
|
|
position:
|
|||
|
|
x: 1428
|
|||
|
|
y: 344
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 1428
|
|||
|
|
y: 344
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
advanced_settings:
|
|||
|
|
group_enabled: false
|
|||
|
|
groups:
|
|||
|
|
- groupId: 058efed3-3c6a-44d6-8f40-704abda8c413
|
|||
|
|
group_name: Group1
|
|||
|
|
output_type: string
|
|||
|
|
variables:
|
|||
|
|
- - '1770703393190'
|
|||
|
|
- text
|
|||
|
|
- - '1770703524412'
|
|||
|
|
- text
|
|||
|
|
- - '1772349100004'
|
|||
|
|
- result
|
|||
|
|
output_type: string
|
|||
|
|
selected: false
|
|||
|
|
title: 文件提取聚合
|
|||
|
|
type: variable-aggregator
|
|||
|
|
variables:
|
|||
|
|
- - '1770703393190'
|
|||
|
|
- text
|
|||
|
|
- - '1770703524412'
|
|||
|
|
- text
|
|||
|
|
- - '1772348969241'
|
|||
|
|
- output
|
|||
|
|
height: 160
|
|||
|
|
id: '1770703625287'
|
|||
|
|
position:
|
|||
|
|
x: 3134
|
|||
|
|
y: 291
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 3134
|
|||
|
|
y: 291
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
context:
|
|||
|
|
enabled: false
|
|||
|
|
variable_selector: []
|
|||
|
|
model:
|
|||
|
|
completion_params:
|
|||
|
|
temperature: 0.3
|
|||
|
|
mode: chat
|
|||
|
|
name: Qwen/Qwen3-32B
|
|||
|
|
provider: langgenius/siliconflow/siliconflow
|
|||
|
|
prompt_template:
|
|||
|
|
- id: 48ec1856-fdd7-4f4a-9ce5-1aa635822550
|
|||
|
|
role: system
|
|||
|
|
text: '你是一个专业的文档整理和合并专家。以下内容是从文档中分块提取并格式化的Markdown文本。由于分块处理,各块之间可能存在跨页断裂和重复内容,需要你进行智能合并。
|
|||
|
|
|
|||
|
|
|
|||
|
|
## 你的任务
|
|||
|
|
|
|||
|
|
|
|||
|
|
### 1. 合并跨页表格
|
|||
|
|
|
|||
|
|
- 找到所有 `<!-- TABLE_CONTINUES -->` 和对应的 `<!-- TABLE_CONTINUED_FROM_PREV
|
|||
|
|
-->` 标记
|
|||
|
|
|
|||
|
|
- 将前一块末尾的不完整表格和后一块开头的延续表格合并为一个完整表格
|
|||
|
|
|
|||
|
|
- 确保表头只保留一份,数据行完整拼接,表格结构正确
|
|||
|
|
|
|||
|
|
|
|||
|
|
### 2. 合并跨页段落
|
|||
|
|
|
|||
|
|
- 找到所有 `<!-- PARA_CONTINUES -->` 和 `<!-- PARA_CONTINUED_FROM_PREV -->`
|
|||
|
|
标记
|
|||
|
|
|
|||
|
|
- 将被截断的段落拼接为语义完整的段落
|
|||
|
|
|
|||
|
|
|
|||
|
|
### 3. 合并跨页列表
|
|||
|
|
|
|||
|
|
- 找到所有 `<!-- LIST_CONTINUES -->` 和 `<!-- LIST_CONTINUED_FROM_PREV -->`
|
|||
|
|
标记
|
|||
|
|
|
|||
|
|
- 将被截断的列表合并为完整列表,确保编号连续
|
|||
|
|
|
|||
|
|
|
|||
|
|
### 4. 去除重复内容
|
|||
|
|
|
|||
|
|
- 由于分块时存在页面重叠,相邻块之间可能有重复的段落、表格行或列表项
|
|||
|
|
|
|||
|
|
- 识别并去除这些重复内容,每段内容只保留一份
|
|||
|
|
|
|||
|
|
|
|||
|
|
### 5. 清理所有辅助标记
|
|||
|
|
|
|||
|
|
- 移除所有 `<!-- ... -->` 形式的辅助标记和块分隔符
|
|||
|
|
|
|||
|
|
- 确保最终输出中不包含任何HTML注释或处理标记
|
|||
|
|
|
|||
|
|
|
|||
|
|
### 6. 格式规范化
|
|||
|
|
|
|||
|
|
- 确保标题层级正确且连续
|
|||
|
|
|
|||
|
|
- 确保表格格式完整(有表头行和分隔行)
|
|||
|
|
|
|||
|
|
- 确保列表编号连续
|
|||
|
|
|
|||
|
|
- 统一全文格式风格
|
|||
|
|
|
|||
|
|
|
|||
|
|
直接输出最终的Markdown内容,不要用```markdown```包裹。
|
|||
|
|
|
|||
|
|
|
|||
|
|
以下是需要整理合并的内容:
|
|||
|
|
|
|||
|
|
{{#1770703625287.output#}}'
|
|||
|
|
reasoning_format: separated
|
|||
|
|
selected: false
|
|||
|
|
title: 数据清洗与跨页合并
|
|||
|
|
type: llm
|
|||
|
|
vision:
|
|||
|
|
enabled: false
|
|||
|
|
height: 88
|
|||
|
|
id: '1770703671732'
|
|||
|
|
position:
|
|||
|
|
x: 3660
|
|||
|
|
y: 327
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 3660
|
|||
|
|
y: 327
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
is_team_authorization: true
|
|||
|
|
paramSchemas:
|
|||
|
|
- auto_generate: null
|
|||
|
|
default: null
|
|||
|
|
form: llm
|
|||
|
|
human_description:
|
|||
|
|
en_US: Markdown text
|
|||
|
|
ja_JP: Markdown text
|
|||
|
|
pt_BR: Markdown text
|
|||
|
|
zh_Hans: Markdown格式文本
|
|||
|
|
label:
|
|||
|
|
en_US: Markdown text
|
|||
|
|
ja_JP: Markdown text
|
|||
|
|
pt_BR: Markdown text
|
|||
|
|
zh_Hans: Markdown格式文本
|
|||
|
|
llm_description: ''
|
|||
|
|
max: null
|
|||
|
|
min: null
|
|||
|
|
name: md_text
|
|||
|
|
options: []
|
|||
|
|
placeholder: null
|
|||
|
|
precision: null
|
|||
|
|
required: true
|
|||
|
|
scope: null
|
|||
|
|
template: null
|
|||
|
|
type: string
|
|||
|
|
- auto_generate: null
|
|||
|
|
default: null
|
|||
|
|
form: llm
|
|||
|
|
human_description:
|
|||
|
|
en_US: Optional custom output file name, and the filename suffix is not
|
|||
|
|
required.
|
|||
|
|
ja_JP: Optional custom output file name, and the filename suffix is not
|
|||
|
|
required.
|
|||
|
|
pt_BR: Optional custom output file name, and the filename suffix is not
|
|||
|
|
required.
|
|||
|
|
zh_Hans: 可选的自定义输出文件名,后缀名无需指定
|
|||
|
|
label:
|
|||
|
|
en_US: Output Filename
|
|||
|
|
ja_JP: Output Filename
|
|||
|
|
pt_BR: Output Filename
|
|||
|
|
zh_Hans: 输出文件名
|
|||
|
|
llm_description: ''
|
|||
|
|
max: null
|
|||
|
|
min: null
|
|||
|
|
name: output_filename
|
|||
|
|
options: []
|
|||
|
|
placeholder: null
|
|||
|
|
precision: null
|
|||
|
|
required: false
|
|||
|
|
scope: null
|
|||
|
|
template: null
|
|||
|
|
type: string
|
|||
|
|
params:
|
|||
|
|
md_text: ''
|
|||
|
|
output_filename: ''
|
|||
|
|
plugin_id: bowenliang123/md_exporter
|
|||
|
|
plugin_unique_identifier: bowenliang123/md_exporter:3.4.0@a5ce3ac3114f3dd6ab4fe49f0bb931a31af49ff555e479ec45e8aaa5d44157ee
|
|||
|
|
provider_icon: https://dify.org.xyzh.yslg/console/api/workspaces/current/plugin/icon?tenant_id=fe3bcf55-9a04-4850-8473-7f97e1c09b97&filename=f0bad95cda1671b4e49f0e05df6122ef9ec5d554e138f128795d11d3806c00ef.svg
|
|||
|
|
provider_id: bowenliang123/md_exporter/md_exporter
|
|||
|
|
provider_name: bowenliang123/md_exporter/md_exporter
|
|||
|
|
provider_type: builtin
|
|||
|
|
selected: false
|
|||
|
|
title: Markdown ⮕ MD
|
|||
|
|
tool_configurations: {}
|
|||
|
|
tool_description: 将 Markdown 转换为 .md 文件的工具
|
|||
|
|
tool_label: Markdown ⮕ MD
|
|||
|
|
tool_name: md_to_md
|
|||
|
|
tool_node_version: '2'
|
|||
|
|
tool_parameters:
|
|||
|
|
md_text:
|
|||
|
|
type: mixed
|
|||
|
|
value: '{{#1770703671732.text#}}'
|
|||
|
|
output_filename:
|
|||
|
|
type: mixed
|
|||
|
|
value: ''
|
|||
|
|
type: tool
|
|||
|
|
height: 52
|
|||
|
|
id: '1770704285657'
|
|||
|
|
position:
|
|||
|
|
x: 4231.079190350343
|
|||
|
|
y: 573.1529224498603
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 4231.079190350343
|
|||
|
|
y: 573.1529224498603
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
outputs:
|
|||
|
|
- value_selector:
|
|||
|
|
- '1770704285657'
|
|||
|
|
- files
|
|||
|
|
value_type: array[file]
|
|||
|
|
variable: _
|
|||
|
|
selected: false
|
|||
|
|
title: 输出
|
|||
|
|
type: end
|
|||
|
|
height: 88
|
|||
|
|
id: '1770704288628'
|
|||
|
|
position:
|
|||
|
|
x: 5142.505374898874
|
|||
|
|
y: 614.2288378497078
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 5142.505374898874
|
|||
|
|
y: 614.2288378497078
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
is_team_authorization: true
|
|||
|
|
paramSchemas:
|
|||
|
|
- auto_generate: null
|
|||
|
|
default: null
|
|||
|
|
form: llm
|
|||
|
|
human_description:
|
|||
|
|
en_US: PDF file to convert
|
|||
|
|
ja_JP: 変換するPDFファイル
|
|||
|
|
pt_BR: Arquivo PDF para converter
|
|||
|
|
zh_Hans: 要转换的 PDF 文件
|
|||
|
|
label:
|
|||
|
|
en_US: PDF File
|
|||
|
|
ja_JP: PDFファイル
|
|||
|
|
pt_BR: Arquivo PDF
|
|||
|
|
zh_Hans: PDF 文件
|
|||
|
|
llm_description: PDF file to convert to Markdown
|
|||
|
|
max: null
|
|||
|
|
min: null
|
|||
|
|
name: file
|
|||
|
|
options: []
|
|||
|
|
placeholder: null
|
|||
|
|
precision: null
|
|||
|
|
required: true
|
|||
|
|
scope: null
|
|||
|
|
template: null
|
|||
|
|
type: file
|
|||
|
|
- auto_generate: null
|
|||
|
|
default: true
|
|||
|
|
form: form
|
|||
|
|
human_description:
|
|||
|
|
en_US: Whether to embed images as base64 (default true)
|
|||
|
|
ja_JP: 画像をbase64として埋め込むか
|
|||
|
|
pt_BR: Se deve incorporar imagens como base64
|
|||
|
|
zh_Hans: 是否将图片以base64嵌入(默认是)
|
|||
|
|
label:
|
|||
|
|
en_US: Include Images
|
|||
|
|
ja_JP: 画像を含める
|
|||
|
|
pt_BR: Incluir Imagens
|
|||
|
|
zh_Hans: 包含图片
|
|||
|
|
llm_description: Set to true to embed images as base64
|
|||
|
|
max: null
|
|||
|
|
min: null
|
|||
|
|
name: include_images
|
|||
|
|
options: []
|
|||
|
|
placeholder: null
|
|||
|
|
precision: null
|
|||
|
|
required: false
|
|||
|
|
scope: null
|
|||
|
|
template: null
|
|||
|
|
type: boolean
|
|||
|
|
- auto_generate: null
|
|||
|
|
default: 150
|
|||
|
|
form: form
|
|||
|
|
human_description:
|
|||
|
|
en_US: DPI for rendering vector drawings (72-300)
|
|||
|
|
ja_JP: ベクター描画のDPI
|
|||
|
|
pt_BR: DPI para renderizar desenhos vetoriais
|
|||
|
|
zh_Hans: 矢量图渲染DPI(72-300,默认150)
|
|||
|
|
label:
|
|||
|
|
en_US: Image DPI
|
|||
|
|
ja_JP: 画像DPI
|
|||
|
|
pt_BR: DPI da Imagem
|
|||
|
|
zh_Hans: 图片DPI
|
|||
|
|
llm_description: Resolution for rendering vector drawings
|
|||
|
|
max: null
|
|||
|
|
min: null
|
|||
|
|
name: image_dpi
|
|||
|
|
options: []
|
|||
|
|
placeholder: null
|
|||
|
|
precision: null
|
|||
|
|
required: false
|
|||
|
|
scope: null
|
|||
|
|
template: null
|
|||
|
|
type: number
|
|||
|
|
params:
|
|||
|
|
file: ''
|
|||
|
|
image_dpi: ''
|
|||
|
|
include_images: ''
|
|||
|
|
plugin_id: yslg/pdf
|
|||
|
|
plugin_unique_identifier: yslg/pdf:0.0.1@cc5f6665002ca7c06855ef6703ee9f6e051ddbfb3d00d2aa899f9f280f45dd61
|
|||
|
|
provider_icon: https://dify.org.xyzh.yslg/console/api/workspaces/current/plugin/icon?tenant_id=fe3bcf55-9a04-4850-8473-7f97e1c09b97&filename=f1441c071a96f87326f5eb2ae2bfc5a570e9260e7d2b74c2ac15df4037231c64.svg
|
|||
|
|
provider_id: yslg/pdf/pdf
|
|||
|
|
provider_name: yslg/pdf/pdf
|
|||
|
|
provider_type: builtin
|
|||
|
|
selected: true
|
|||
|
|
title: PDF转Markdown
|
|||
|
|
tool_configurations:
|
|||
|
|
image_dpi:
|
|||
|
|
type: constant
|
|||
|
|
value: 150
|
|||
|
|
include_images:
|
|||
|
|
type: constant
|
|||
|
|
value: true
|
|||
|
|
model:
|
|||
|
|
type: constant
|
|||
|
|
value:
|
|||
|
|
completion_params: {}
|
|||
|
|
mode: chat
|
|||
|
|
model: Qwen/Qwen3-32B
|
|||
|
|
model_type: llm
|
|||
|
|
provider: langgenius/siliconflow/siliconflow
|
|||
|
|
tool_description: 将PDF转换为Markdown,图片base64嵌入,无需大模型
|
|||
|
|
tool_label: PDF转Markdown
|
|||
|
|
tool_name: pdf_to_markdown
|
|||
|
|
tool_node_version: '2'
|
|||
|
|
tool_parameters:
|
|||
|
|
file:
|
|||
|
|
type: variable
|
|||
|
|
value:
|
|||
|
|
- '1770703294598'
|
|||
|
|
- file
|
|||
|
|
type: tool
|
|||
|
|
height: 140
|
|||
|
|
id: '1772527425324'
|
|||
|
|
position:
|
|||
|
|
x: 1881.4558888576478
|
|||
|
|
y: 697.8632689662784
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 1881.4558888576478
|
|||
|
|
y: 697.8632689662784
|
|||
|
|
selected: true
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
- data:
|
|||
|
|
outputs:
|
|||
|
|
- value_selector:
|
|||
|
|
- '1772527425324'
|
|||
|
|
- files
|
|||
|
|
value_type: array[file]
|
|||
|
|
variable: files
|
|||
|
|
selected: false
|
|||
|
|
title: 输出 2
|
|||
|
|
type: end
|
|||
|
|
height: 88
|
|||
|
|
id: '1772779766541'
|
|||
|
|
position:
|
|||
|
|
x: 2183.4558888576476
|
|||
|
|
y: 697.8632689662784
|
|||
|
|
positionAbsolute:
|
|||
|
|
x: 2183.4558888576476
|
|||
|
|
y: 697.8632689662784
|
|||
|
|
selected: false
|
|||
|
|
sourcePosition: right
|
|||
|
|
targetPosition: left
|
|||
|
|
type: custom
|
|||
|
|
width: 242
|
|||
|
|
viewport:
|
|||
|
|
x: -675.5777822239224
|
|||
|
|
y: 9.568461206490326
|
|||
|
|
zoom: 0.7578582832552
|
|||
|
|
rag_pipeline_variables: []
|