dify
This commit is contained in:
@@ -0,0 +1,709 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: file-general-economy
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: if-else
|
||||
id: 1752479895761-source-1752481129417-target
|
||||
source: '1752479895761'
|
||||
sourceHandle: source
|
||||
target: '1752481129417'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: tool
|
||||
id: 1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
target: '1752480460682'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: document-extractor
|
||||
id: 1752481129417-false-1752481112180-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 'false'
|
||||
target: '1752481112180'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: variable-aggregator
|
||||
id: 1752480460682-source-1752482022496-target
|
||||
source: '1752480460682'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: document-extractor
|
||||
targetType: variable-aggregator
|
||||
id: 1752481112180-source-1752482022496-target
|
||||
source: '1752481112180'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752482022496-source-1752482151668-target
|
||||
source: '1752482022496'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: economy
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: keyword_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: File
|
||||
datasource_name: upload-file
|
||||
datasource_parameters: {}
|
||||
fileExtensions:
|
||||
- txt
|
||||
- markdown
|
||||
- mdx
|
||||
- pdf
|
||||
- html
|
||||
- xlsx
|
||||
- xls
|
||||
- vtt
|
||||
- properties
|
||||
- doc
|
||||
- docx
|
||||
- csv
|
||||
- eml
|
||||
- msg
|
||||
- pptx
|
||||
- xml
|
||||
- epub
|
||||
- ppt
|
||||
- md
|
||||
plugin_id: langgenius/file
|
||||
provider_name: file
|
||||
provider_type: local_file
|
||||
selected: false
|
||||
title: File
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752479895761'
|
||||
position:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
documents:
|
||||
description: the documents extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
images:
|
||||
description: The images extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
ja_JP: 解析するファイル(pdf, ppt, pptx, doc, docx, png, jpg, jpegをサポート)
|
||||
pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,
|
||||
jpg, jpeg)
|
||||
zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)
|
||||
label:
|
||||
en_US: file
|
||||
ja_JP: ファイル
|
||||
pt_BR: arquivo
|
||||
zh_Hans: file
|
||||
llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,
|
||||
png, jpg, jpeg)
|
||||
max: null
|
||||
min: null
|
||||
name: file
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: file
|
||||
params:
|
||||
file: ''
|
||||
provider_id: langgenius/dify_extractor/dify_extractor
|
||||
provider_name: langgenius/dify_extractor/dify_extractor
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Dify Extractor
|
||||
tool_configurations: {}
|
||||
tool_description: Dify Extractor
|
||||
tool_label: Dify Extractor
|
||||
tool_name: dify_extractor
|
||||
tool_parameters:
|
||||
file:
|
||||
type: variable
|
||||
value:
|
||||
- '1752479895761'
|
||||
- file
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752480460682'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_array_file: false
|
||||
selected: false
|
||||
title: 文档提取器
|
||||
type: document-extractor
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
height: 90
|
||||
id: '1752481112180'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
cases:
|
||||
- case_id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
conditions:
|
||||
- comparison_operator: is
|
||||
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
|
||||
value: .xlsx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
|
||||
value: .xls
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
|
||||
value: .md
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
|
||||
value: .markdown
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
|
||||
value: .mdx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
|
||||
value: .html
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
|
||||
value: .htm
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
|
||||
value: .docx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
|
||||
value: .csv
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
|
||||
value: .txt
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
logical_operator: or
|
||||
selected: false
|
||||
title: IF/ELSE
|
||||
type: if-else
|
||||
height: 358
|
||||
id: '1752481129417'
|
||||
position:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
advanced_settings:
|
||||
group_enabled: false
|
||||
groups:
|
||||
- groupId: f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7
|
||||
group_name: Group1
|
||||
output_type: string
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
output_type: string
|
||||
selected: false
|
||||
title: Variable Aggregator
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
height: 129
|
||||
id: '1752482022496'
|
||||
position:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: 入力変数
|
||||
pt_BR: Variável de entrada
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: チャンクの区切り記号。
|
||||
pt_BR: O delimitador dos blocos.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: 区切り記号
|
||||
pt_BR: DDelimitador
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: 最大長のチャンク。
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: チャンク最大長
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: チャンクの重複長
|
||||
pt_BR: O comprimento de sobreposição dos fragmentos
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: チャンク重複長
|
||||
pt_BR: Comprimento de sobreposição do bloco
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Substituir espaços consecutivos, novas linhas e tabulações
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Substituir espaços consecutivos, novas linhas e tabulações
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Excluir todos os URLs e endereços de e-mail
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Excluir todos os URLs e endereços de e-mail
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunker/general_chunker
|
||||
provider_name: langgenius/general_chunker/general_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: General Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: A tool for general text chunking mode, the chunks retrieved and recalled are the same.
|
||||
tool_label: General Chunker
|
||||
tool_name: general_chunker
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752482022496.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: 701.4999626224237
|
||||
y: 128.33739021504016
|
||||
zoom: 0.48941689643726966
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
@@ -0,0 +1,709 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: file-general-high-quality
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: if-else
|
||||
id: 1752479895761-source-1752481129417-target
|
||||
source: '1752479895761'
|
||||
sourceHandle: source
|
||||
target: '1752481129417'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: tool
|
||||
id: 1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
target: '1752480460682'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: document-extractor
|
||||
id: 1752481129417-false-1752481112180-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 'false'
|
||||
target: '1752481112180'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: variable-aggregator
|
||||
id: 1752480460682-source-1752482022496-target
|
||||
source: '1752480460682'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: document-extractor
|
||||
targetType: variable-aggregator
|
||||
id: 1752481112180-source-1752482022496-target
|
||||
source: '1752481112180'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752482022496-source-1752482151668-target
|
||||
source: '1752482022496'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1076.4656678451215
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: File
|
||||
datasource_name: upload-file
|
||||
datasource_parameters: {}
|
||||
fileExtensions:
|
||||
- txt
|
||||
- markdown
|
||||
- mdx
|
||||
- pdf
|
||||
- html
|
||||
- xlsx
|
||||
- xls
|
||||
- vtt
|
||||
- properties
|
||||
- doc
|
||||
- docx
|
||||
- csv
|
||||
- eml
|
||||
- msg
|
||||
- pptx
|
||||
- xml
|
||||
- epub
|
||||
- ppt
|
||||
- md
|
||||
plugin_id: langgenius/file
|
||||
provider_name: file
|
||||
provider_type: local_file
|
||||
selected: false
|
||||
title: File
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752479895761'
|
||||
position:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
documents:
|
||||
description: the documents extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
images:
|
||||
description: The images extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
ja_JP: 解析するファイル(pdf, ppt, pptx, doc, docx, png, jpg, jpegをサポート)
|
||||
pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,
|
||||
jpg, jpeg)
|
||||
zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)
|
||||
label:
|
||||
en_US: file
|
||||
ja_JP: ファイル
|
||||
pt_BR: arquivo
|
||||
zh_Hans: file
|
||||
llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,
|
||||
png, jpg, jpeg)
|
||||
max: null
|
||||
min: null
|
||||
name: file
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: file
|
||||
params:
|
||||
file: ''
|
||||
provider_id: langgenius/dify_extractor/dify_extractor
|
||||
provider_name: langgenius/dify_extractor/dify_extractor
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Dify Extractor
|
||||
tool_configurations: {}
|
||||
tool_description: Dify Extractor
|
||||
tool_label: Dify Extractor
|
||||
tool_name: dify_extractor
|
||||
tool_parameters:
|
||||
file:
|
||||
type: variable
|
||||
value:
|
||||
- '1752479895761'
|
||||
- file
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752480460682'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_array_file: false
|
||||
selected: false
|
||||
title: 文档提取器
|
||||
type: document-extractor
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
height: 90
|
||||
id: '1752481112180'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
cases:
|
||||
- case_id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
conditions:
|
||||
- comparison_operator: is
|
||||
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
|
||||
value: .xlsx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
|
||||
value: .xls
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
|
||||
value: .md
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
|
||||
value: .markdown
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
|
||||
value: .mdx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
|
||||
value: .html
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
|
||||
value: .htm
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
|
||||
value: .docx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
|
||||
value: .csv
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
|
||||
value: .txt
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
logical_operator: or
|
||||
selected: false
|
||||
title: IF/ELSE
|
||||
type: if-else
|
||||
height: 358
|
||||
id: '1752481129417'
|
||||
position:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -489.57009543377865
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
advanced_settings:
|
||||
group_enabled: false
|
||||
groups:
|
||||
- groupId: f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7
|
||||
group_name: Group1
|
||||
output_type: string
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
output_type: string
|
||||
selected: false
|
||||
title: Variable Aggregator
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
height: 129
|
||||
id: '1752482022496'
|
||||
position:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: 入力変数
|
||||
pt_BR: Variável de entrada
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: チャンクの区切り記号。
|
||||
pt_BR: O delimitador dos pedaços.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: 区切り記号
|
||||
pt_BR: Delimitador
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: 最大長のチャンク。
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: チャンク最大長
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: チャンクの重複長
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: チャンク重複長
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunker/general_chunker
|
||||
provider_name: langgenius/general_chunker/general_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: General Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: A tool for general text chunking mode, the chunks retrieved and recalled are the same.
|
||||
tool_label: General Chunker
|
||||
tool_name: general_chunker
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752482022496.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 693.5300771507484
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: 701.4999626224237
|
||||
y: 128.33739021504016
|
||||
zoom: 0.48941689643726966
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
814
dify/api/services/rag_pipeline/transform/file-parentchild.yml
Normal file
814
dify/api/services/rag_pipeline/transform/file-parentchild.yml
Normal file
@@ -0,0 +1,814 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: file-parentchild
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: if-else
|
||||
id: 1752479895761-source-1752481129417-target
|
||||
source: '1752479895761'
|
||||
sourceHandle: source
|
||||
target: '1752481129417'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: tool
|
||||
id: 1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
target: '1752480460682'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: if-else
|
||||
targetType: document-extractor
|
||||
id: 1752481129417-false-1752481112180-target
|
||||
source: '1752481129417'
|
||||
sourceHandle: 'false'
|
||||
target: '1752481112180'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: variable-aggregator
|
||||
id: 1752480460682-source-1752482022496-target
|
||||
source: '1752480460682'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: document-extractor
|
||||
targetType: variable-aggregator
|
||||
id: 1752481112180-source-1752482022496-target
|
||||
source: '1752481112180'
|
||||
sourceHandle: source
|
||||
target: '1752482022496'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752482022496-source-1752575473519-target
|
||||
source: '1752482022496'
|
||||
sourceHandle: source
|
||||
target: '1752575473519'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752575473519-source-1752477924228-target
|
||||
source: '1752575473519'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: hierarchical_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752575473519'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 994.3774545394483
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 994.3774545394483
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: File
|
||||
datasource_name: upload-file
|
||||
datasource_parameters: {}
|
||||
fileExtensions:
|
||||
- txt
|
||||
- markdown
|
||||
- mdx
|
||||
- pdf
|
||||
- html
|
||||
- xlsx
|
||||
- xls
|
||||
- vtt
|
||||
- properties
|
||||
- doc
|
||||
- docx
|
||||
- csv
|
||||
- eml
|
||||
- msg
|
||||
- pptx
|
||||
- xml
|
||||
- epub
|
||||
- ppt
|
||||
- md
|
||||
plugin_id: langgenius/file
|
||||
provider_name: file
|
||||
provider_type: local_file
|
||||
selected: false
|
||||
title: File
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752479895761'
|
||||
position:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -839.8603427660498
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
documents:
|
||||
description: the documents extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
images:
|
||||
description: The images extracted from the file
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,
|
||||
jpeg)
|
||||
ja_JP: 解析するファイル(pdf, ppt, pptx, doc, docx, png, jpg, jpegをサポート)
|
||||
pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,
|
||||
jpg, jpeg)
|
||||
zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)
|
||||
label:
|
||||
en_US: file
|
||||
ja_JP: ファイル
|
||||
pt_BR: arquivo
|
||||
zh_Hans: file
|
||||
llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,
|
||||
png, jpg, jpeg)
|
||||
max: null
|
||||
min: null
|
||||
name: file
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: file
|
||||
params:
|
||||
file: ''
|
||||
provider_id: langgenius/dify_extractor/dify_extractor
|
||||
provider_name: langgenius/dify_extractor/dify_extractor
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Dify Extractor
|
||||
tool_configurations: {}
|
||||
tool_description: Dify Extractor
|
||||
tool_label: Dify Extractor
|
||||
tool_name: dify_extractor
|
||||
tool_parameters:
|
||||
file:
|
||||
type: variable
|
||||
value:
|
||||
- '1752479895761'
|
||||
- file
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752480460682'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_array_file: false
|
||||
selected: false
|
||||
title: 文档提取器
|
||||
type: document-extractor
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
height: 90
|
||||
id: '1752481112180'
|
||||
position:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
positionAbsolute:
|
||||
x: -108.28652292656551
|
||||
y: 390.6576481692478
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
cases:
|
||||
- case_id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
conditions:
|
||||
- comparison_operator: is
|
||||
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
|
||||
value: .xlsx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
|
||||
value: .xls
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
|
||||
value: .md
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
|
||||
value: .markdown
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
|
||||
value: .mdx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
|
||||
value: .html
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
|
||||
value: .htm
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
|
||||
value: .docx
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
|
||||
value: .csv
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
- comparison_operator: is
|
||||
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
|
||||
value: .txt
|
||||
varType: file
|
||||
variable_selector:
|
||||
- '1752479895761'
|
||||
- file
|
||||
- extension
|
||||
id: 24e47cad-f1e2-4f74-9884-3f49d5bb37b7
|
||||
logical_operator: or
|
||||
selected: false
|
||||
title: IF/ELSE
|
||||
type: if-else
|
||||
height: 358
|
||||
id: '1752481129417'
|
||||
position:
|
||||
x: -512.2335487893622
|
||||
y: 251.3910724383104
|
||||
positionAbsolute:
|
||||
x: -512.2335487893622
|
||||
y: 251.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
advanced_settings:
|
||||
group_enabled: false
|
||||
groups:
|
||||
- groupId: f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7
|
||||
group_name: Group1
|
||||
output_type: string
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
output_type: string
|
||||
selected: false
|
||||
title: Variable Aggregator
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752481112180'
|
||||
- text
|
||||
- - '1752480460682'
|
||||
- text
|
||||
height: 129
|
||||
id: '1752482022496'
|
||||
position:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 319.441649575055
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: Parent child chunks result
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input text
|
||||
ja_JP: 入力テキスト
|
||||
pt_BR: Texto de entrada
|
||||
zh_Hans: 输入文本
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_text
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 1024
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for chunking
|
||||
ja_JP: チャンク分割の最大長
|
||||
pt_BR: Comprimento máximo para divisão
|
||||
zh_Hans: 用于分块的最大长度
|
||||
label:
|
||||
en_US: Maximum Length
|
||||
ja_JP: 最大長
|
||||
pt_BR: Comprimento Máximo
|
||||
zh_Hans: 最大长度
|
||||
llm_description: Maximum length allowed per chunk
|
||||
max: null
|
||||
min: null
|
||||
name: max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '
|
||||
|
||||
|
||||
'
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for chunking
|
||||
ja_JP: チャンク分割に使用する区切り文字
|
||||
pt_BR: Separador usado para divisão
|
||||
zh_Hans: 用于分块的分隔符
|
||||
label:
|
||||
en_US: Chunk Separator
|
||||
ja_JP: チャンク区切り文字
|
||||
pt_BR: Separador de Divisão
|
||||
zh_Hans: 分块分隔符
|
||||
llm_description: The separator used to split chunks
|
||||
max: null
|
||||
min: null
|
||||
name: separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 512
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for subchunking
|
||||
ja_JP: サブチャンク分割の最大長
|
||||
pt_BR: Comprimento máximo para subdivisão
|
||||
zh_Hans: 用于子分块的最大长度
|
||||
label:
|
||||
en_US: Subchunk Maximum Length
|
||||
ja_JP: サブチャンク最大長
|
||||
pt_BR: Comprimento Máximo de Subdivisão
|
||||
zh_Hans: 子分块最大长度
|
||||
llm_description: Maximum length allowed per subchunk
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '. '
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for subchunking
|
||||
ja_JP: サブチャンク分割に使用する区切り文字
|
||||
pt_BR: Separador usado para subdivisão
|
||||
zh_Hans: 用于子分块的分隔符
|
||||
label:
|
||||
en_US: Subchunk Separator
|
||||
ja_JP: サブチャンキング用セパレーター
|
||||
pt_BR: Separador de Subdivisão
|
||||
zh_Hans: 子分块分隔符
|
||||
llm_description: The separator used to split subchunks
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: paragraph
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
ja_JP: セパレーターと最大チャンク長に基づいてテキストを段落に分割し、分割されたテキスト
|
||||
を親ブロックとして使用するか、文書全体を親ブロックとして使用して直接取得します。
|
||||
pt_BR: Dividir texto em parágrafos com base no separador e no comprimento
|
||||
máximo do bloco, usando o texto dividido como bloco pai ou documento
|
||||
completo como bloco pai e diretamente recuperá-lo.
|
||||
zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。
|
||||
label:
|
||||
en_US: Parent Mode
|
||||
ja_JP: 親子モード
|
||||
pt_BR: Modo Pai
|
||||
zh_Hans: 父块模式
|
||||
llm_description: Split text into paragraphs based on separator and maximum
|
||||
chunk length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
max: null
|
||||
min: null
|
||||
name: parent_mode
|
||||
options:
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Paragraph
|
||||
ja_JP: 段落
|
||||
pt_BR: Parágrafo
|
||||
zh_Hans: 段落
|
||||
value: paragraph
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Full Document
|
||||
ja_JP: 全文
|
||||
pt_BR: Documento Completo
|
||||
zh_Hans: 全文
|
||||
value: full_doc
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: select
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove extra spaces in the text
|
||||
ja_JP: テキスト内の余分なスペースを削除するかどうか
|
||||
pt_BR: Se deve remover espaços extras no texto
|
||||
zh_Hans: 是否移除文本中的多余空格
|
||||
label:
|
||||
en_US: Remove Extra Spaces
|
||||
ja_JP: 余分なスペースを削除
|
||||
pt_BR: Remover Espaços Extras
|
||||
zh_Hans: 移除多余空格
|
||||
llm_description: Whether to remove extra spaces in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_extra_spaces
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove URLs and emails in the text
|
||||
ja_JP: テキスト内のURLやメールアドレスを削除するかどうか
|
||||
pt_BR: Se deve remover URLs e e-mails no texto
|
||||
zh_Hans: 是否移除文本中的URL和电子邮件地址
|
||||
label:
|
||||
en_US: Remove URLs and Emails
|
||||
ja_JP: URLとメールアドレスを削除
|
||||
pt_BR: Remover URLs e E-mails
|
||||
zh_Hans: 移除URL和电子邮件地址
|
||||
llm_description: Whether to remove URLs and emails in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_urls_emails
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
input_text: ''
|
||||
max_length: ''
|
||||
parent_mode: ''
|
||||
remove_extra_spaces: ''
|
||||
remove_urls_emails: ''
|
||||
separator: ''
|
||||
subchunk_max_length: ''
|
||||
subchunk_separator: ''
|
||||
provider_id: langgenius/parentchild_chunker/parentchild_chunker
|
||||
provider_name: langgenius/parentchild_chunker/parentchild_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: Parent-child Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: Parent-child Chunk Structure
|
||||
tool_label: Parent-child Chunker
|
||||
tool_name: parentchild_chunker
|
||||
tool_parameters:
|
||||
input_text:
|
||||
type: mixed
|
||||
value: '{{#1752482022496.output#}}'
|
||||
max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
parent_mode:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- parent_mode
|
||||
remove_extra_spaces:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
remove_urls_emails:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
subchunk_max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- child_max_chunk_length
|
||||
subchunk_separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.child_delimiter#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752575473519'
|
||||
position:
|
||||
x: 637.9241611063885
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 637.9241611063885
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: 948.6766333808323
|
||||
y: -102.06757184183238
|
||||
zoom: 0.8375774577380971
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n
|
||||
label: Child delimiter
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: child_delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 512
|
||||
label: Child max chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: child_max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: paragraph
|
||||
label: Parent mode
|
||||
max_length: 48
|
||||
options:
|
||||
- full_doc
|
||||
- paragraph
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: select
|
||||
unit: null
|
||||
variable: parent_mode
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
@@ -0,0 +1,400 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: notion-general-economy
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: tool
|
||||
id: 1752489759475-source-1752482151668-target
|
||||
source: '1752489759475'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: economy
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: keyword_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: 入力変数
|
||||
pt_BR: Variável de entrada
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: チャンクの区切り記号。
|
||||
pt_BR: O delimitador dos pedaços.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: 区切り記号
|
||||
pt_BR: Delimitador
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: 最大長のチャンク。
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: チャンク最大長
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: チャンクの重複長
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: チャンク重複長
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunker/general_chunker
|
||||
provider_name: langgenius/general_chunker/general_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: General Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: A tool for general text chunking mode, the chunks retrieved and recalled are the same.
|
||||
tool_label: General Chunker
|
||||
tool_name: general_chunker
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752489759475.content#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Notion数据源
|
||||
datasource_name: notion_datasource
|
||||
datasource_parameters: {}
|
||||
plugin_id: langgenius/notion_datasource
|
||||
provider_name: notion_datasource
|
||||
provider_type: online_document
|
||||
selected: false
|
||||
title: Notion数据源
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752489759475'
|
||||
position:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -838.569649323166
|
||||
y: -168.94656489167426
|
||||
zoom: 1.286925643857699
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
@@ -0,0 +1,400 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: notion-general-high-quality
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752482151668-source-1752477924228-target
|
||||
source: '1752482151668'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: tool
|
||||
id: 1752489759475-source-1752482151668-target
|
||||
source: '1752489759475'
|
||||
sourceHandle: source
|
||||
target: '1752482151668'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752482151668'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1444.5503479271906
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: 入力変数
|
||||
pt_BR: Variável de entrada
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: チャンクの区切り記号。
|
||||
pt_BR: O delimitador dos pedaços.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: 区切り記号
|
||||
pt_BR: Delimitador
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: 最大長のチャンク。
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: チャンク最大長
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: チャンクの重複長
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: チャンク重複長
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunker/general_chunker
|
||||
provider_name: langgenius/general_chunker/general_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: General Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: A tool for general text chunking mode, the chunks retrieved and recalled are the same.
|
||||
tool_label: General Chunker
|
||||
tool_name: general_chunker
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752489759475.content#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752482151668'
|
||||
position:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1063.6922916384628
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Notion数据源
|
||||
datasource_name: notion_datasource
|
||||
datasource_parameters: {}
|
||||
plugin_id: langgenius/notion_datasource
|
||||
provider_name: notion_datasource
|
||||
provider_type: online_document
|
||||
selected: false
|
||||
title: Notion数据源
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752489759475'
|
||||
position:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -838.569649323166
|
||||
y: -168.94656489167426
|
||||
zoom: 1.286925643857699
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Chunk overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
506
dify/api/services/rag_pipeline/transform/notion-parentchild.yml
Normal file
506
dify/api/services/rag_pipeline/transform/notion-parentchild.yml
Normal file
@@ -0,0 +1,506 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: notion-parentchild
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: tool
|
||||
id: 1752489759475-source-1752490343805-target
|
||||
source: '1752489759475'
|
||||
sourceHandle: source
|
||||
target: '1752490343805'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752490343805-source-1752477924228-target
|
||||
source: '1752490343805'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: hierarchical_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752490343805'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 1486.2052698032674
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1486.2052698032674
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Notion数据源
|
||||
datasource_name: notion_datasource
|
||||
datasource_parameters: {}
|
||||
plugin_id: langgenius/notion_datasource
|
||||
provider_name: notion_datasource
|
||||
provider_type: online_document
|
||||
selected: false
|
||||
title: Notion数据源
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752489759475'
|
||||
position:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 736.9082104000458
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: Parent child chunks result
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input text
|
||||
ja_JP: 入力テキスト
|
||||
pt_BR: Texto de entrada
|
||||
zh_Hans: 输入文本
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_text
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 1024
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for chunking
|
||||
ja_JP: チャンク分割の最大長
|
||||
pt_BR: Comprimento máximo para divisão
|
||||
zh_Hans: 用于分块的最大长度
|
||||
label:
|
||||
en_US: Maximum Length
|
||||
ja_JP: 最大長
|
||||
pt_BR: Comprimento Máximo
|
||||
zh_Hans: 最大长度
|
||||
llm_description: Maximum length allowed per chunk
|
||||
max: null
|
||||
min: null
|
||||
name: max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '
|
||||
|
||||
|
||||
'
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for chunking
|
||||
ja_JP: チャンク分割に使用する区切り文字
|
||||
pt_BR: Separador usado para divisão
|
||||
zh_Hans: 用于分块的分隔符
|
||||
label:
|
||||
en_US: Chunk Separator
|
||||
ja_JP: チャンク区切り文字
|
||||
pt_BR: Separador de Divisão
|
||||
zh_Hans: 分块分隔符
|
||||
llm_description: The separator used to split chunks
|
||||
max: null
|
||||
min: null
|
||||
name: separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 512
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for subchunking
|
||||
ja_JP: サブチャンク分割の最大長
|
||||
pt_BR: Comprimento máximo para subdivisão
|
||||
zh_Hans: 用于子分块的最大长度
|
||||
label:
|
||||
en_US: Subchunk Maximum Length
|
||||
ja_JP: サブチャンク最大長
|
||||
pt_BR: Comprimento Máximo de Subdivisão
|
||||
zh_Hans: 子分块最大长度
|
||||
llm_description: Maximum length allowed per subchunk
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '. '
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for subchunking
|
||||
ja_JP: サブチャンク分割に使用する区切り文字
|
||||
pt_BR: Separador usado para subdivisão
|
||||
zh_Hans: 用于子分块的分隔符
|
||||
label:
|
||||
en_US: Subchunk Separator
|
||||
ja_JP: サブチャンキング用セパレーター
|
||||
pt_BR: Separador de Subdivisão
|
||||
zh_Hans: 子分块分隔符
|
||||
llm_description: The separator used to split subchunks
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: paragraph
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
ja_JP: セパレーターと最大チャンク長に基づいてテキストを段落に分割し、分割されたテキスト
|
||||
を親ブロックとして使用するか、文書全体を親ブロックとして使用して直接取得します。
|
||||
pt_BR: Dividir texto em parágrafos com base no separador e no comprimento
|
||||
máximo do bloco, usando o texto dividido como bloco pai ou documento
|
||||
completo como bloco pai e diretamente recuperá-lo.
|
||||
zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。
|
||||
label:
|
||||
en_US: Parent Mode
|
||||
ja_JP: 親子モード
|
||||
pt_BR: Modo Pai
|
||||
zh_Hans: 父块模式
|
||||
llm_description: Split text into paragraphs based on separator and maximum
|
||||
chunk length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
max: null
|
||||
min: null
|
||||
name: parent_mode
|
||||
options:
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Paragraph
|
||||
ja_JP: 段落
|
||||
pt_BR: Parágrafo
|
||||
zh_Hans: 段落
|
||||
value: paragraph
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Full Document
|
||||
ja_JP: 全文
|
||||
pt_BR: Documento Completo
|
||||
zh_Hans: 全文
|
||||
value: full_doc
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: select
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove extra spaces in the text
|
||||
ja_JP: テキスト内の余分なスペースを削除するかどうか
|
||||
pt_BR: Se deve remover espaços extras no texto
|
||||
zh_Hans: 是否移除文本中的多余空格
|
||||
label:
|
||||
en_US: Remove Extra Spaces
|
||||
ja_JP: 余分なスペースを削除
|
||||
pt_BR: Remover Espaços Extras
|
||||
zh_Hans: 移除多余空格
|
||||
llm_description: Whether to remove extra spaces in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_extra_spaces
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove URLs and emails in the text
|
||||
ja_JP: テキスト内のURLやメールアドレスを削除するかどうか
|
||||
pt_BR: Se deve remover URLs e e-mails no texto
|
||||
zh_Hans: 是否移除文本中的URL和电子邮件地址
|
||||
label:
|
||||
en_US: Remove URLs and Emails
|
||||
ja_JP: URLとメールアドレスを削除
|
||||
pt_BR: Remover URLs e E-mails
|
||||
zh_Hans: 移除URL和电子邮件地址
|
||||
llm_description: Whether to remove URLs and emails in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_urls_emails
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
input_text: ''
|
||||
max_length: ''
|
||||
parent_mode: ''
|
||||
remove_extra_spaces: ''
|
||||
remove_urls_emails: ''
|
||||
separator: ''
|
||||
subchunk_max_length: ''
|
||||
subchunk_separator: ''
|
||||
provider_id: langgenius/parentchild_chunker/parentchild_chunker
|
||||
provider_name: langgenius/parentchild_chunker/parentchild_chunker
|
||||
provider_type: builtin
|
||||
selected: true
|
||||
title: Parent-child Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: Parent-child Chunk Structure
|
||||
tool_label: Parent-child Chunker
|
||||
tool_name: parentchild_chunker
|
||||
tool_parameters:
|
||||
input_text:
|
||||
type: mixed
|
||||
value: '{{#1752489759475.content#}}'
|
||||
max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
parent_mode:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- parent_mode
|
||||
remove_extra_spaces:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
remove_urls_emails:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
subchunk_max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- child_max_chunk_length
|
||||
subchunk_separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.child_delimiter#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752490343805'
|
||||
position:
|
||||
x: 1077.0240183162543
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1077.0240183162543
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -487.2912544090391
|
||||
y: -54.7029301848807
|
||||
zoom: 0.9994011715768695
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n
|
||||
label: Child delimiter
|
||||
max_length: 199
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: child_delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 512
|
||||
label: Child max chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: child_max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: paragraph
|
||||
label: Parent mode
|
||||
max_length: 48
|
||||
options:
|
||||
- full_doc
|
||||
- paragraph
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: select
|
||||
unit: null
|
||||
variable: parent_mode
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
@@ -0,0 +1,674 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: website-crawl-general-economy
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752491761974-source-1752565435219-target
|
||||
source: '1752491761974'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752565402678-source-1752565435219-target
|
||||
source: '1752565402678'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752565435219-source-1752569675978-target
|
||||
source: '1752565435219'
|
||||
sourceHandle: source
|
||||
target: '1752569675978'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752569675978-source-1752477924228-target
|
||||
source: '1752569675978'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752569675978'
|
||||
- result
|
||||
indexing_technique: economy
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: keyword_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: true
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Jina Reader
|
||||
datasource_name: jina_reader
|
||||
datasource_parameters:
|
||||
crawl_sub_pages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_crawl_sub_pages#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752491761974'
|
||||
- jina_limit
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_url#}}'
|
||||
use_sitemap:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_use_sitemap#}}'
|
||||
plugin_id: langgenius/jina_datasource
|
||||
provider_name: jinareader
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Jina Reader
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752491761974'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Firecrawl
|
||||
datasource_name: crawl
|
||||
datasource_parameters:
|
||||
crawl_subpages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}'
|
||||
exclude_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_exclude_paths#}}'
|
||||
include_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_include_only_paths#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_limit
|
||||
max_depth:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_max_depth
|
||||
only_main_content:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_extract_main_content#}}'
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_url#}}'
|
||||
plugin_id: langgenius/firecrawl_datasource
|
||||
provider_name: firecrawl
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Firecrawl
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752565402678'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
output_type: string
|
||||
selected: false
|
||||
title: Variable Aggregator
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752491761974'
|
||||
- content
|
||||
- - '1752565402678'
|
||||
- content
|
||||
height: 129
|
||||
id: '1752565435219'
|
||||
position:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: 入力変数
|
||||
pt_BR: Variável de entrada
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: チャンクの区切り記号。
|
||||
pt_BR: O delimitador dos pedaços.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: 区切り記号
|
||||
pt_BR: Delimitador
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: 最大長のチャンク。
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: チャンク最大長
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: チャンクの重複長
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: チャンク重複長
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunker/general_chunker
|
||||
provider_name: langgenius/general_chunker/general_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: General Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: A tool for general text chunking mode, the chunks retrieved and recalled are the same.
|
||||
tool_label: General Chunker
|
||||
tool_name: general_chunker
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752565435219.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752569675978'
|
||||
position:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -707.721097109337
|
||||
y: -93.07807382100896
|
||||
zoom: 0.9350632198875476
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: jina_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: jina_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Use sitemap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl
|
||||
iteratively based on page relevance, yielding fewer but higher-quality pages.
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_use_sitemap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: true
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Max depth
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: ''
|
||||
required: false
|
||||
tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes
|
||||
the page of the entered url, depth 1 scrapes the url and everything after enteredURL
|
||||
+ one /, and so on.
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_max_depth
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Exclude paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: blog/*, /about/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_exclude_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Include only paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: articles/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_include_only_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: firecrawl_extract_main_content
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_extract_main_content
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 50
|
||||
label: chunk_overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Setting the chunk overlap can maintain the semantic relevance between
|
||||
them, enhancing the retrieve effect. It is recommended to set 10%–25% of the
|
||||
maximum chunk size.
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: replace_consecutive_spaces
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
@@ -0,0 +1,674 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: '#FFF4ED'
|
||||
icon_type: emoji
|
||||
name: website-crawl-general-high-quality
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752491761974-source-1752565435219-target
|
||||
source: '1752491761974'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752565402678-source-1752565435219-target
|
||||
source: '1752565402678'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752565435219-source-1752569675978-target
|
||||
source: '1752565435219'
|
||||
sourceHandle: source
|
||||
target: '1752569675978'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752569675978-source-1752477924228-target
|
||||
source: '1752569675978'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: text_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752569675978'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 2140.4053851189346
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Jina Reader
|
||||
datasource_name: jina_reader
|
||||
datasource_parameters:
|
||||
crawl_sub_pages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_crawl_sub_pages#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752491761974'
|
||||
- jina_limit
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_url#}}'
|
||||
use_sitemap:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_use_sitemap#}}'
|
||||
plugin_id: langgenius/jina_datasource
|
||||
provider_name: jinareader
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Jina Reader
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752491761974'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Firecrawl
|
||||
datasource_name: crawl
|
||||
datasource_parameters:
|
||||
crawl_subpages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}'
|
||||
exclude_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_exclude_paths#}}'
|
||||
include_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_include_only_paths#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_limit
|
||||
max_depth:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_max_depth
|
||||
only_main_content:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_extract_main_content#}}'
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_url#}}'
|
||||
plugin_id: langgenius/firecrawl_datasource
|
||||
provider_name: firecrawl
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Firecrawl
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752565402678'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
output_type: string
|
||||
selected: false
|
||||
title: Variable Aggregator
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752491761974'
|
||||
- content
|
||||
- - '1752565402678'
|
||||
- content
|
||||
height: 129
|
||||
id: '1752565435219'
|
||||
position:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: The result of the general chunk tool.
|
||||
properties:
|
||||
general_chunks:
|
||||
items:
|
||||
description: The chunk of the text.
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input Variable
|
||||
ja_JP: 入力変数
|
||||
pt_BR: Variável de entrada
|
||||
zh_Hans: 输入变量
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_variable
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The delimiter of the chunks.
|
||||
ja_JP: チャンクの区切り記号。
|
||||
pt_BR: O delimitador dos pedaços.
|
||||
zh_Hans: 块的分隔符。
|
||||
label:
|
||||
en_US: Delimiter
|
||||
ja_JP: 区切り記号
|
||||
pt_BR: Delimitador
|
||||
zh_Hans: 分隔符
|
||||
llm_description: The delimiter of the chunks, the format of the delimiter
|
||||
must be a string.
|
||||
max: null
|
||||
min: null
|
||||
name: delimiter
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The maximum chunk length.
|
||||
ja_JP: 最大長のチャンク。
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度。
|
||||
label:
|
||||
en_US: Maximum Chunk Length
|
||||
ja_JP: チャンク最大長
|
||||
pt_BR: O comprimento máximo do bloco
|
||||
zh_Hans: 最大块的长度
|
||||
llm_description: The maximum chunk length, the format of the chunk size
|
||||
must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: max_chunk_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The chunk overlap length.
|
||||
ja_JP: チャンクの重複長。
|
||||
pt_BR: The chunk overlap length.
|
||||
zh_Hans: 块的重叠长度。
|
||||
label:
|
||||
en_US: Chunk Overlap Length
|
||||
ja_JP: チャンク重複長
|
||||
pt_BR: Chunk Overlap Length
|
||||
zh_Hans: 块的重叠长度
|
||||
llm_description: The chunk overlap length, the format of the chunk overlap
|
||||
length must be an integer.
|
||||
max: null
|
||||
min: null
|
||||
name: chunk_overlap_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Replace consecutive spaces, newlines and tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace consecutive spaces, newlines and tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
label:
|
||||
en_US: Replace Consecutive Spaces, Newlines and Tabs
|
||||
ja_JP: 連続のスペース、改行、まだはタブを置換する
|
||||
pt_BR: Replace Consecutive Spaces, Newlines and Tabs
|
||||
zh_Hans: 替换连续的空格、换行符和制表符
|
||||
llm_description: Replace consecutive spaces, newlines and tabs, the format
|
||||
of the replace must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: replace_consecutive_spaces_newlines_tabs
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Delete all URLs and email addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete all URLs and email addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
label:
|
||||
en_US: Delete All URLs and Email Addresses
|
||||
ja_JP: すべてのURLとメールアドレスを削除する
|
||||
pt_BR: Delete All URLs and Email Addresses
|
||||
zh_Hans: 删除所有URL和电子邮件地址
|
||||
llm_description: Delete all URLs and email addresses, the format of the
|
||||
delete must be a boolean.
|
||||
max: null
|
||||
min: null
|
||||
name: delete_all_urls_and_email_addresses
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
chunk_overlap_length: ''
|
||||
delete_all_urls_and_email_addresses: ''
|
||||
delimiter: ''
|
||||
input_variable: ''
|
||||
max_chunk_length: ''
|
||||
replace_consecutive_spaces_newlines_tabs: ''
|
||||
provider_id: langgenius/general_chunker/general_chunker
|
||||
provider_name: langgenius/general_chunker/general_chunker
|
||||
provider_type: builtin
|
||||
selected: false
|
||||
title: General Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: A tool for general text chunking mode, the chunks retrieved and recalled are the same.
|
||||
tool_label: General Chunker
|
||||
tool_name: general_chunker
|
||||
tool_parameters:
|
||||
chunk_overlap_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- chunk_overlap
|
||||
delete_all_urls_and_email_addresses:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
delimiter:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
input_variable:
|
||||
type: mixed
|
||||
value: '{{#1752565435219.output#}}'
|
||||
max_chunk_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
replace_consecutive_spaces_newlines_tabs:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752569675978'
|
||||
position:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1807.4306671642219
|
||||
y: 281.3910724383104
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -707.721097109337
|
||||
y: -93.07807382100896
|
||||
zoom: 0.9350632198875476
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: jina_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: jina_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Use sitemap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl
|
||||
iteratively based on page relevance, yielding fewer but higher-quality pages.
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_use_sitemap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: true
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Max depth
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: ''
|
||||
required: false
|
||||
tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes
|
||||
the page of the entered url, depth 1 scrapes the url and everything after enteredURL
|
||||
+ one /, and so on.
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_max_depth
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Exclude paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: blog/*, /about/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_exclude_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Include only paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: articles/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_include_only_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: firecrawl_extract_main_content
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_extract_main_content
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: Delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 50
|
||||
label: chunk_overlap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Setting the chunk overlap can maintain the semantic relevance between
|
||||
them, enhancing the retrieve effect. It is recommended to set 10%–25% of the
|
||||
maximum chunk size.
|
||||
type: number
|
||||
unit: characters
|
||||
variable: chunk_overlap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: replace_consecutive_spaces
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
@@ -0,0 +1,779 @@
|
||||
dependencies:
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a
|
||||
- current_identifier: null
|
||||
type: marketplace
|
||||
value:
|
||||
plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608
|
||||
kind: rag_pipeline
|
||||
rag_pipeline:
|
||||
description: ''
|
||||
icon: 📙
|
||||
icon_background: ''
|
||||
icon_type: emoji
|
||||
name: website-crawl-parentchild
|
||||
version: 0.1.0
|
||||
workflow:
|
||||
conversation_variables: []
|
||||
environment_variables: []
|
||||
features: {}
|
||||
graph:
|
||||
edges:
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: tool
|
||||
targetType: knowledge-index
|
||||
id: 1752490343805-source-1752477924228-target
|
||||
source: '1752490343805'
|
||||
sourceHandle: source
|
||||
target: '1752477924228'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752491761974-source-1752565435219-target
|
||||
source: '1752491761974'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInIteration: false
|
||||
isInLoop: false
|
||||
sourceType: variable-aggregator
|
||||
targetType: tool
|
||||
id: 1752565435219-source-1752490343805-target
|
||||
source: '1752565435219'
|
||||
sourceHandle: source
|
||||
target: '1752490343805'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
- data:
|
||||
isInLoop: false
|
||||
sourceType: datasource
|
||||
targetType: variable-aggregator
|
||||
id: 1752565402678-source-1752565435219-target
|
||||
source: '1752565402678'
|
||||
sourceHandle: source
|
||||
target: '1752565435219'
|
||||
targetHandle: target
|
||||
type: custom
|
||||
zIndex: 0
|
||||
nodes:
|
||||
- data:
|
||||
chunk_structure: hierarchical_model
|
||||
embedding_model: text-embedding-ada-002
|
||||
embedding_model_provider: langgenius/openai/openai
|
||||
index_chunk_variable_selector:
|
||||
- '1752490343805'
|
||||
- result
|
||||
indexing_technique: high_quality
|
||||
keyword_number: 10
|
||||
retrieval_model:
|
||||
score_threshold: 0.5
|
||||
score_threshold_enabled: false
|
||||
search_method: semantic_search
|
||||
top_k: 3
|
||||
vector_setting:
|
||||
embedding_model_name: text-embedding-ada-002
|
||||
embedding_provider_name: langgenius/openai/openai
|
||||
selected: false
|
||||
title: Knowledge Base
|
||||
type: knowledge-index
|
||||
height: 114
|
||||
id: '1752477924228'
|
||||
position:
|
||||
x: 2215.5544306817387
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 2215.5544306817387
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
is_team_authorization: true
|
||||
output_schema:
|
||||
properties:
|
||||
result:
|
||||
description: Parent child chunks result
|
||||
items:
|
||||
type: object
|
||||
type: array
|
||||
type: object
|
||||
paramSchemas:
|
||||
- auto_generate: null
|
||||
default: null
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: The text you want to chunk.
|
||||
ja_JP: チャンク化したいテキスト。
|
||||
pt_BR: O texto que você deseja dividir.
|
||||
zh_Hans: 你想要分块的文本。
|
||||
label:
|
||||
en_US: Input text
|
||||
ja_JP: 入力テキスト
|
||||
pt_BR: Texto de entrada
|
||||
zh_Hans: 输入文本
|
||||
llm_description: The text you want to chunk.
|
||||
max: null
|
||||
min: null
|
||||
name: input_text
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 1024
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for chunking
|
||||
ja_JP: チャンク分割の最大長
|
||||
pt_BR: Comprimento máximo para divisão
|
||||
zh_Hans: 用于分块的最大长度
|
||||
label:
|
||||
en_US: Maximum Length
|
||||
ja_JP: 最大長
|
||||
pt_BR: Comprimento Máximo
|
||||
zh_Hans: 最大长度
|
||||
llm_description: Maximum length allowed per chunk
|
||||
max: null
|
||||
min: null
|
||||
name: max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '
|
||||
|
||||
|
||||
'
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for chunking
|
||||
ja_JP: チャンク分割に使用する区切り文字
|
||||
pt_BR: Separador usado para divisão
|
||||
zh_Hans: 用于分块的分隔符
|
||||
label:
|
||||
en_US: Chunk Separator
|
||||
ja_JP: チャンク区切り文字
|
||||
pt_BR: Separador de Divisão
|
||||
zh_Hans: 分块分隔符
|
||||
llm_description: The separator used to split chunks
|
||||
max: null
|
||||
min: null
|
||||
name: separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: 512
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Maximum length for subchunking
|
||||
ja_JP: サブチャンク分割の最大長
|
||||
pt_BR: Comprimento máximo para subdivisão
|
||||
zh_Hans: 用于子分块的最大长度
|
||||
label:
|
||||
en_US: Subchunk Maximum Length
|
||||
ja_JP: サブチャンク最大長
|
||||
pt_BR: Comprimento Máximo de Subdivisão
|
||||
zh_Hans: 子分块最大长度
|
||||
llm_description: Maximum length allowed per subchunk
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_max_length
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: number
|
||||
- auto_generate: null
|
||||
default: '. '
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Separator used for subchunking
|
||||
ja_JP: サブチャンク分割に使用する区切り文字
|
||||
pt_BR: Separador usado para subdivisão
|
||||
zh_Hans: 用于子分块的分隔符
|
||||
label:
|
||||
en_US: Subchunk Separator
|
||||
ja_JP: サブチャンキング用セパレーター
|
||||
pt_BR: Separador de Subdivisão
|
||||
zh_Hans: 子分块分隔符
|
||||
llm_description: The separator used to split subchunks
|
||||
max: null
|
||||
min: null
|
||||
name: subchunk_separator
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: string
|
||||
- auto_generate: null
|
||||
default: paragraph
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Split text into paragraphs based on separator and maximum chunk
|
||||
length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
ja_JP: セパレーターと最大チャンク長に基づいてテキストを段落に分割し、分割されたテキスト
|
||||
を親ブロックとして使用するか、文書全体を親ブロックとして使用して直接取得します。
|
||||
pt_BR: Dividir texto em parágrafos com base no separador e no comprimento
|
||||
máximo do bloco, usando o texto dividido como bloco pai ou documento
|
||||
completo como bloco pai e diretamente recuperá-lo.
|
||||
zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。
|
||||
label:
|
||||
en_US: Parent Mode
|
||||
ja_JP: 親子モード
|
||||
pt_BR: Modo Pai
|
||||
zh_Hans: 父块模式
|
||||
llm_description: Split text into paragraphs based on separator and maximum
|
||||
chunk length, using split text as parent block or entire document as parent
|
||||
block and directly retrieve.
|
||||
max: null
|
||||
min: null
|
||||
name: parent_mode
|
||||
options:
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Paragraph
|
||||
ja_JP: 段落
|
||||
pt_BR: Parágrafo
|
||||
zh_Hans: 段落
|
||||
value: paragraph
|
||||
- icon: ''
|
||||
label:
|
||||
en_US: Full Document
|
||||
ja_JP: 全文
|
||||
pt_BR: Documento Completo
|
||||
zh_Hans: 全文
|
||||
value: full_doc
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: true
|
||||
scope: null
|
||||
template: null
|
||||
type: select
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove extra spaces in the text
|
||||
ja_JP: テキスト内の余分なスペースを削除するかどうか
|
||||
pt_BR: Se deve remover espaços extras no texto
|
||||
zh_Hans: 是否移除文本中的多余空格
|
||||
label:
|
||||
en_US: Remove Extra Spaces
|
||||
ja_JP: 余分なスペースを削除
|
||||
pt_BR: Remover Espaços Extras
|
||||
zh_Hans: 移除多余空格
|
||||
llm_description: Whether to remove extra spaces in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_extra_spaces
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
- auto_generate: null
|
||||
default: 0
|
||||
form: llm
|
||||
human_description:
|
||||
en_US: Whether to remove URLs and emails in the text
|
||||
ja_JP: テキスト内のURLやメールアドレスを削除するかどうか
|
||||
pt_BR: Se deve remover URLs e e-mails no texto
|
||||
zh_Hans: 是否移除文本中的URL和电子邮件地址
|
||||
label:
|
||||
en_US: Remove URLs and Emails
|
||||
ja_JP: URLとメールアドレスを削除
|
||||
pt_BR: Remover URLs e E-mails
|
||||
zh_Hans: 移除URL和电子邮件地址
|
||||
llm_description: Whether to remove URLs and emails in the text
|
||||
max: null
|
||||
min: null
|
||||
name: remove_urls_emails
|
||||
options: []
|
||||
placeholder: null
|
||||
precision: null
|
||||
required: false
|
||||
scope: null
|
||||
template: null
|
||||
type: boolean
|
||||
params:
|
||||
input_text: ''
|
||||
max_length: ''
|
||||
parent_mode: ''
|
||||
remove_extra_spaces: ''
|
||||
remove_urls_emails: ''
|
||||
separator: ''
|
||||
subchunk_max_length: ''
|
||||
subchunk_separator: ''
|
||||
provider_id: langgenius/parentchild_chunker/parentchild_chunker
|
||||
provider_name: langgenius/parentchild_chunker/parentchild_chunker
|
||||
provider_type: builtin
|
||||
selected: true
|
||||
title: Parent-child Chunker
|
||||
tool_configurations: {}
|
||||
tool_description: Parent-child Chunk Structure
|
||||
tool_label: Parent-child Chunker
|
||||
tool_name: parentchild_chunker
|
||||
tool_parameters:
|
||||
input_text:
|
||||
type: mixed
|
||||
value: '{{#1752565435219.output#}}'
|
||||
max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- max_chunk_length
|
||||
parent_mode:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- parent_mode
|
||||
remove_extra_spaces:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.replace_consecutive_spaces#}}'
|
||||
remove_urls_emails:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delete_urls_email#}}'
|
||||
separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.delimiter#}}'
|
||||
subchunk_max_length:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- shared
|
||||
- child_max_chunk_length
|
||||
subchunk_separator:
|
||||
type: mixed
|
||||
value: '{{#rag.shared.child_delimiter#}}'
|
||||
type: tool
|
||||
height: 52
|
||||
id: '1752490343805'
|
||||
position:
|
||||
x: 1853.5260563244174
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1853.5260563244174
|
||||
y: 281.3910724383104
|
||||
selected: true
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Jina Reader
|
||||
datasource_name: jina_reader
|
||||
datasource_parameters:
|
||||
crawl_sub_pages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_crawl_sub_pages#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752491761974'
|
||||
- jina_limit
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_url#}}'
|
||||
use_sitemap:
|
||||
type: mixed
|
||||
value: '{{#rag.1752491761974.jina_use_sitemap#}}'
|
||||
plugin_id: langgenius/jina_datasource
|
||||
provider_name: jinareader
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Jina Reader
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752491761974'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
datasource_configurations: {}
|
||||
datasource_label: Firecrawl
|
||||
datasource_name: crawl
|
||||
datasource_parameters:
|
||||
crawl_subpages:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}'
|
||||
exclude_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_exclude_paths#}}'
|
||||
include_paths:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_include_only_paths#}}'
|
||||
limit:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_limit
|
||||
max_depth:
|
||||
type: variable
|
||||
value:
|
||||
- rag
|
||||
- '1752565402678'
|
||||
- firecrawl_max_depth
|
||||
only_main_content:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_extract_main_content#}}'
|
||||
url:
|
||||
type: mixed
|
||||
value: '{{#rag.1752565402678.firecrawl_url#}}'
|
||||
plugin_id: langgenius/firecrawl_datasource
|
||||
provider_name: firecrawl
|
||||
provider_type: website_crawl
|
||||
selected: false
|
||||
title: Firecrawl
|
||||
type: datasource
|
||||
height: 52
|
||||
id: '1752565402678'
|
||||
position:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
positionAbsolute:
|
||||
x: 1067.7526055798794
|
||||
y: 417.32608398342404
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
- data:
|
||||
output_type: string
|
||||
selected: false
|
||||
title: Variable Aggregator
|
||||
type: variable-aggregator
|
||||
variables:
|
||||
- - '1752491761974'
|
||||
- content
|
||||
- - '1752565402678'
|
||||
- content
|
||||
height: 129
|
||||
id: '1752565435219'
|
||||
position:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
positionAbsolute:
|
||||
x: 1505.4306671642219
|
||||
y: 281.3910724383104
|
||||
selected: false
|
||||
sourcePosition: right
|
||||
targetPosition: left
|
||||
type: custom
|
||||
width: 242
|
||||
viewport:
|
||||
x: -826.1791044466438
|
||||
y: -71.91725474841303
|
||||
zoom: 0.9980166672552107
|
||||
rag_pipeline_variables:
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: jina_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: jina_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752491761974'
|
||||
default_value: null
|
||||
label: Use sitemap
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl
|
||||
iteratively based on page relevance, yielding fewer but higher-quality pages.
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: jina_use_sitemap
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: URL
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: https://docs.dify.ai/en/
|
||||
required: true
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_url
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: true
|
||||
label: Crawl sub-pages
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_crawl_sub_pages
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: 10
|
||||
label: Limit
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_limit
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Max depth
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: ''
|
||||
required: false
|
||||
tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes
|
||||
the page of the entered url, depth 1 scrapes the url and everything after enteredURL
|
||||
+ one /, and so on.
|
||||
type: number
|
||||
unit: null
|
||||
variable: firecrawl_max_depth
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Exclude paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: blog/*, /about/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_exclude_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: Include only paths
|
||||
max_length: 256
|
||||
options: []
|
||||
placeholder: articles/*
|
||||
required: false
|
||||
tooltips: null
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: firecrawl_include_only_paths
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: '1752565402678'
|
||||
default_value: null
|
||||
label: firecrawl_extract_main_content
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: firecrawl_extract_main_content
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n\n
|
||||
label: delimiter
|
||||
max_length: 100
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 1024
|
||||
label: Maximum chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: \n
|
||||
label: Child delimiter
|
||||
max_length: 199
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: A delimiter is the character used to separate text. \n\n is recommended
|
||||
for splitting the original document into large parent chunks. You can also use
|
||||
special delimiters defined by yourself.
|
||||
type: text-input
|
||||
unit: null
|
||||
variable: child_delimiter
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: 512
|
||||
label: Child max chunk length
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: number
|
||||
unit: characters
|
||||
variable: child_max_chunk_length
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: paragraph
|
||||
label: Parent mode
|
||||
max_length: 48
|
||||
options:
|
||||
- full_doc
|
||||
- paragraph
|
||||
placeholder: null
|
||||
required: true
|
||||
tooltips: null
|
||||
type: select
|
||||
unit: null
|
||||
variable: parent_mode
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Replace consecutive spaces, newlines and tabs
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: replace_consecutive_spaces
|
||||
- allow_file_extension: null
|
||||
allow_file_upload_methods: null
|
||||
allowed_file_types: null
|
||||
belong_to_node_id: shared
|
||||
default_value: null
|
||||
label: Delete all URLs and email addresses
|
||||
max_length: 48
|
||||
options: []
|
||||
placeholder: null
|
||||
required: false
|
||||
tooltips: null
|
||||
type: checkbox
|
||||
unit: null
|
||||
variable: delete_urls_email
|
||||
Reference in New Issue
Block a user