Files
urbanLifeline/dify/api/constants/pipeline_templates.json
2025-12-01 17:21:38 +08:00

7343 lines
822 KiB
JSON
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"pipeline_templates": {
"en-US": {
"pipeline_templates": [
{
"id": "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3",
"name": "General Mode-ECO",
"description": "In this template, the document content is divided into smaller paragraphs, known as general chunks, which are directly used for matching user queries and retrieval in Economical indexing mode.",
"icon": {
"icon_type": "image",
"icon": "52064ff0-26b6-47d0-902f-e331f94d959b",
"icon_background": null,
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAT1klEQVR4Ae1dzXPcRBbvlsZ2xo6dcbwXinyMC+IDW5WY08IJh2NyIFRxJLvhHyDxaWv3kuS0e4v5CwjLHqmCHMgxMbVbBZxIOEAVCWXnq7hsMiaJPf4aad9Pmh5rNBqPPmdamtdVdkutVuv1r396\/fX0RgpNwspvterurqjatqiatlWxhKgYUhyHeLaQFYrwh5OqE3v+SSkqtrruSS\/yoRRijbBa89bRSZN7aVLYq7hu2eKBgfzSWLXpeqkkVmdfmXau4fogA8nc37CyUqs0TLEghfUOEatKhJoXspNU\/ZVqOJ8mbXGHCLlq2\/ZdKY07ZkMsz85Ot5E6a2T6QsB7j2oL9Aa+QxVdoArhryMYhiEMUnmmaQpJKg1\/SEMgcJxzHJumm4ZjFVR+dT4MMWEp8OcNOLdI3algWQ3KQ52GbTl5LcuNGw2L8lEfExBASiHt5YZhfDZ3ZPpOQJZUkzIjIDSdZVgXbCnfI4kXlNQgS6lkOkQD2UZGRlqEU3k47g8CjUZDgIy7uzsUN8TOzm7bg4kcq0Tpq68f+8P1tgspnqROQId4JXGRXrlLalwG0o2NjRLZRh3y4ZyDngiAhNvbWw4ZlZYEEUlLXH\/t6PTVtKVOlQn3H\/7vnLSNazSuqELQkZGSOHCg7MRpC87lZY\/A1tZ2i4x4GoiYtkZMhYCk9aoN0\/6UZFyAoEw8oFCcAK24vr7uHTd+ZY7IxTRm0okJuPKodtGy7SvobtG1lstl0npjxUGfa9JCABqxXq8rItJs2VpMOj6MTUBnrGeKyzQXuwQJR0dHxMTERGu22pKaDwqFAMaFICHIiEDtv3Ti2Mxi3ErGIiC6XMuwv6Sx3jxrvbjQ5\/u+zc0th4hY+sHSjTEq34\/TJUcmYJN8tzHRwDrd1NRka70u35Cy9FERgDZ8\/vyF0yUTkVaNEXk6KgkjEdBLPqzhTU4eZPJFbbWC5QcJX7x46awjxiFhaAL6yQfNx+t5BWNTzOqgG4YmxGJ2VBKGIiCTL2bLDNFtcUnYubEaAFpzwlFFt8uaLwAgTnJ6Q3ADHKEluaq1bX9JiqvSC5qeBPz1YQ07G\/OYcGDMx91uL0iH9zq4oeYF4MyuaV3uhca+XTBtrV0QwvgUBR86NMUTjl5o8nUHAUxMfv\/9uWOBQ13z4onjM0vdoOlKQGfcZ9o\/YIdjfHycdze6IcjpgQhgnXBjYwPX1mjb7s1uyzNdu2Da270G8sGKhbfWAjHmxH0QAGewO0ah0thx7AQCcwcS0O16xTmM+7C3y4ERiIOAZ2t24f7D2rmgMgIJSCZVzuAR5FNWyUE3cxojsB8CmDsoBUbfp1wLmhV3EPDXR7XLapsN3S8HRiAJAuiKYZ5Hw7nqrmE5hive8joISJ9QXUAGqE8OjEAaCMAoGYE04kW\/FmwjIMZ+0H5gLP44MAJpIODhU4W04AVvmW0EVGO\/0VE2KPWCxMfJEfBoQXyk1gotAq48rs3z2K+FCx+kjAC0ICYlFBbwma4qvkVA+jzvAhK561XQcJw2Aq1JrWUtqLJbBJSGfAeJ3P0qaDhOGwF8lotAmtDhGo4dAmJmQiZd80hgDQgUOGSBABwSqG5YzYYdAjbMxgIeyOTLAnYuUyEA8oGECPAPhNghoG1LR\/sZhnsRFzgwAlkgAHtBJ9juONAhIDHzFBLhp4UDI5AlAoqAjmc0elCTgKKKhwZ5nkI6B0YgLQSUkqPe2FF6zS7YnYAodqb1MC6HEfAj0JyEILmKfyWajVTJixxbvQCNnISNDUvcvl0X9+7tiKfPGuLp04Yj+fi4IY68WhKnTo2KkyfHxMyMfmN6EBAWVrCahldciVVpadu3MQOenJzMSRMMp5gg2uefvxC\/3HPdYvRC4a23DoizZya0IyLM9fEJJ\/mOPF2SdqOCoaBHNfaqV9+v443\/\/vtN8csvO+Lxk93WG3\/kSEnMHDbpjR8TADvrMEg5bt3eEDdvbpCZe7Bn06C6f\/fdprh7d8sh4bvvjgdlGUgalmKcb4jtRlX++uDpJWLitbGxMTLB0kdIhQwA\/PzfL3oCj+4Gb3tWRBykHF\/fXBdff72uIIkVA5uzZ\/UwscO3IvhmBB8sleCNHlvE8M+sW\/jii5cCb36YgO7pX58\/d7Rj2kAPUg7UP4h8cydonEdjvVOesd7jx7viEf3dvPmScGjXlCBxuSyFDprQ09tWSrBUBfU8iWHaO\/M8ACws+bzC4L563RIffJDOeHaQcuClQrfrDePjUpwhbfbu6c7eCkMS\/L1Nw5FbNEm5SVpzg7BQAXXBcGXQkxP1mYchjePOMgwE1ImAGLsEvfUKyF4xwEeXmTQMWg4QxjvmA\/kuXZwOJJ+\/ru+eLotLlypivNxqYnoxbZrEPPdnHeg59bzyOCTQaRsOwCcN6I69b3+c8gYpB7QfXgBvgOaDhgsbkPeMb9z3Cy3dJMUl7PO75VPKjjzrTu+9Ht1y9zkdoAP8pAFv+3fftjdglDIHLcfdH9s1+MyMEUrz+esITTh3on2L9fatuj9bX8\/xuy8ItCR4SDsC3kmh61Rohl0vU\/m98aDl+PFu+1rfmTMHveJFOj5J4z5vuBdyHdF7T1bH1AO7v8Gmyyy4Riv7aYUnT+KXNWg5MKP1BuxwxA2YKXvD02d7ExNver+OPTYHVYN+xYkWovWZhGAZIa2QpCsftBz+cdrRo\/EJ6J\/1JsElrbZR5WjXBSvBOB4OBLQjoP9tTdIMRyPMGP3PGbQc\/ucn0Vp+bY4FaV2CdgR8NcFYxw\/q9OH41Ru0HDM+2ZOsaz7xDWuOHmmfFftx6+d5axKi1mb6+fCgZ83NpQfOqVPxDRQGLceJuXa\/PD\/6lmWCsOuW5l\/PPHmyvexu92WV7uFaxaCtOK0mIW+\/VW5bvY8LAtbNsCUVNwxaDv9WGxaQb91q35YLUzdsZ\/q7b2zHDTK0EXCQggQ9G+OT839Ovo+bZN0Mcg1aDjzfv4AMTeYfzwVhqNKwlOPfS4a1kH98qfIPIo4\/SMpQWqxbJbHagOlREu2nqjZoOc6fn2rrDbC7s7RUC6UJofmWPlnr2EsGNjoF8+PFv16BQMqRoC7CvfEGjVNosgaz8yjhNFmJnDsXf9fA\/6xBygET+9KIFD\/9tLcrskvLpD\/9vC2+IwNdZWgwNeXqEXS1MNy9cWNd\/Oe\/dfrRaRpgecJ77x0Uf3xjsN2vEqded7dJ5f2HzxwpDx+eVte0ir+lveEg+za\/kLAU+fDDKTGf0fhmkHKg601iHQSsdDJIhTzPntUQCe0J6EhJ\/0CAH2mf+Blt1alxEMYy2KI6QTPnt\/50QEBjZB0GJUeQfV+Yuu5nPxjm\/qzy5I6AWQGRp3LRxUIb+s20utUBVtPnz09qNelQsjIBFRI5jEFEmGvBYubxE7Lv23DHeugR8JEWeoTTC7Sc1YceIS58TMC4yPF9qSCgCJj9oCkVcbmQoiLABCxqy+akXkzAnDRUUcVkAha1ZXNSLyZgThqqqGIyAYvasjmpFxMwJw1VVDGZgEVt2ZzUiwmYk4Yqqpjxv\/UrKiL71At+WnTwTKqLHPtAFfpSbqxhQtcog4zYe9XBM6kucqQBsdqKywUB8cYHeUhV5lhZekiFZXFUz6RoIJjUwwYviWW3t6F1kcMrU5Lj3BCQPZMKxwSrqAapWo8B2TOpcJx0BpEvzx5SvZpT2y44iRk6XJIl8ZCKsdY\/\/lnr+KCnm2dSL6BBlsvojv\/+t8ORDUN1kcNbv7SOVRes5TIMLH6D3vqwlU\/qIRXk18EzqS5yhMU9Tj4tCQjgk4a4HlKhdfwm74PwTKqLHEnbodf92hGQPZO6TVZkD6leUmpHQPZM6jbP0HhI9bJRh2P2TOq2QpE9pHp5pp0GVN\/8eoWMe4xxVNSgi2dSXeSIil\/U\/NoRMGoFOH++EdCOgGl6borjIdX\/\/DhaVFHCr82xHhg26CJHWHnj5tOOgOyZ1G3KofGQGpe5Wd3HnkldZIvsIdXLHe00IHsmdZunyB5StSYgxkmD9JCK5+vgmVQXObxkyeJYOw2ISrJnUrep2UNqFpQPWSZ7JhWOdyv2kBqSMFllY8+kxTZI1dYe0E\/oYfdMGmRn6Mco6Jw9pAahkrM0LEbDRMxvptWtGll5JtVFjm71jpKuDFJzowGjVC6rvCCADp5JdZEjCc5MwCTo8b2JEVAE1HIZJnHtuIDcIMAEzE1TFVNQJmAx2zU3tWIC5qapiikoE7CY7ZqbWjEBc9NUxRSUCVjMds1NrZiAuWmqYgrKBCxmu+amVlp7x1Io6uIRlOVQLZJerPVeMPY82TPpXmPrgseeRPGP1FactgTUxSMoyxGfZPvdqQhofrz41yvIWC6X98vf12swfbpxY13s7Li\/gxvl4bu7Qvz087Zzy9zcaJRbO\/KyHB2QpJZQr286ZWk3BoTGCfIN2G+PoCxHalzbtyCtumCMcdgz6V576YLHnkTpHakuWKtlGHR57Jl0r5F1wWNPovSPtCEg3na\/yfsweybVBY\/0KddeokHuctaQZNvRB\/ztRSU708UjKMuRrB3D3O3h2ppBvNOCgLp4BGU5wlAoWZ42AiYrKr27dfEIynKk16ZhStJmDKiLR1CWIwxt0sujDQHTqxKXlCcEtCGgLh5BWY7s6WtZ7oRX0vzDEFKs4pGNhpX9k\/d5gi4eQVmOfRoppUtqEmJLEFCToItHUJajv4QAAbVYhtHFIyjL0WcCWrb9Ox5p24PtgnXxCMpyZE9Ay3J\/v0UKuapNF4xq6+IRlOXIloTeTTfYA85LKRdKJVOMjIxk++QepY+PG0IHj6AsR4+GSnh5Z2dH7JLhJk1GbshfHzy9ZEt5bWxsTExMjCcsOp3bYQUSZBMYpfSzZybE2bMTUW7pyMtydECSSsLGxobY3NwCARdLDWk7azE0Ckyl8DQKAXnKZUPc\/JrMs+rRxqZpegRlOdJozc4yLMttUymNVXnvUW1B2vZt0zTFoUNTnbkHmAKTJGghv5lWN5GK7plUFzy64R82\/cWLF\/S5BXXBUp6WKyu1asO0VwzDEJXKobBl9DUfgGfPpHuQ64LHnkTRjtbWfhfQguaInHV+Pe\/+w2dO\/zs9XRE0IYlWGudmBCIioMzxXz92WLrLMLa7Hae2SCKWx9kZgdAI7O421wBtcQc3uQSU7gmmxxwYgSwRUIvQNA15gOc0NaDtnCh2ZikAlz3cCGD9zw22VwPay0hU7HQz8H9GIH0EGo1mFyyNPQKaDXMZj4IG5HFg+qBziXsIYPkFwWyIZcROFzw7Ow2LmGWQj7thwMIhCwQU+cgQ9U6Tc80xID2NyPcNHrq97fpVyUIALnO4Edje3nIAsIXLNZy4kxDnyFhGxAQEChyyQEBpQMsyrqvyWwQ8cXR6mRKdblhlVJk4ZgSSIrC1teXsftA2x+rc7LQzAUGZLQLihPaEbyDe3Kwj4sAIpIaA6lltIa96C20joEGqkRi6Bg3IWtALEx8nQUDxCdrv9WPT171ltREQMxMy0f8EGVgLemHi4yQIrK+vO7cTtz7zl0OkbA9kHVOxDPsH+mSuOj5eFgcOHGjPwGeMQAQEMPZbX9+gr3\/F6mvHDs\/6b23TgLgILUh2Wos4hhtVXpgGEhziIIBvzZUrXv\/YT5XXQUBcoH76K4qcGfHLl676VDdwzAiERQDDuKb181f+sZ8qI5CAuGg25EekNmlCskPjQdehtLqJY0agFwL45mNraxtd7xoZnjo9atA9XQlIXfEq2UxfxU1Qo4N23REkPKfpiYDb9bpLedT1Ls6+QlzqEroSEPlfOz69RIPATzAOhB0\/k7ALipzcQgAcAVecuQNxp1vXq24gDbl\/aM6Kb9OseB4fLk1NTbLZ\/v6QDe1VkO75cyiqBm1qiDuvHT\/8Zi8w9tWAuBmzYsOS71OBqygYD+CZcS9Yh+96G\/loycUYle+HQaGnBlSF4Os5Wh+EJqyyJlSocAwEOsg3Ik\/vN+7zohaagLjJT8KDBw8K0+ypRL3P4+OCIYAx38uXL91uF5ovAvkARSQC4gYvCfEt8eTkJJMQwAxhUBMOrPURkSKTD5BFJiBuapLwS0xM8B1xuXyAt+wAzBAFrPPV63Wn+8WEA2O+sN2uF6ZYBFQF3H\/wdImmxBdxPjY2SiQsszZU4BQ0xngPxgXb281PeGmpxbSMK5isxqlyIgLigfcf1i5IYV8j1woVdMnQhvC0xaF4CLRpPdrhIOuWqyeOzywlqWliAuLh6JIbprhG86FzOAcRJyYmyN+gdr8GC\/E4REQA9nzY1\/XYiC7T9tpHcbpc\/6NTIaAq1NGGtn0ZSzVIAwFHR0dZIyqAchb7iUdkWcXWWtNYJZXapEpAJdG9B0+v0O8\/\/EURERrRJeMYa0UFkoYxxnf4LHdnZ9sxJMA5ApHEMVQuWcZS3LFet+pmQkD1ML9GVOkgIxazS6USddeITXWJ4z4hAHLhD9ZO2OHCX4BjgmVpyxuGJa6nTTxVzUwJqB6y8rg2T2tGNFmR72DpRqV7Y2hJLGpjWQfHiNUfSKqCe71dbJVP5RmGWBHIX1eszSHgVw+UBsM6ncqvSNa00\/PfjvNlyvsNNcJy80vJoDyppbW3ZGrFdi+IJiwVmrAsEEBYQzxFa0jVbqTsXgpfSQUBuOWDZzSbnFNJYxnuMrLSdN3k7TsBuwmy8lutSo6TqkTICkhpCatCv6Z9HPlp4FulyAm4jiUfdY6YlGVHmvd6EY+p4daoB13rqFvzp9cofY2Wx5zr9NNsDwxhrDXop7EIq1Ua+aymMYPteHaMhP8DKleEJHlBQFwAAAAASUVORK5CYII="
},
"copyright": "Copyright 2023 Dify",
"privacy_policy": "https:\/\/dify.ai\n",
"position": 1,
"chunk_structure": "text_model",
"language": "en-US"
},
{
"id": "9553b1e0-0c26-445b-9e18-063ad7eca0b4",
"name": "Parent-child-HQ",
"description": "This template uses an advanced chunking strategy that organizes document text into a hierarchical structure of larger \"parent\" chunks and smaller \"child\" chunks to balance retrieval precision and contextual richness.",
"icon": {
"icon_type": "image",
"icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005",
"icon_background": null,
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAYkklEQVR4Ae2dz28cx5XHq2f4m5JIyo4R2+t46B+H1S5gGUiwa1\/EAFmvkUtsIHGOq6y9Z1vJHyDpD0iknG2vneMmBmxfFo5twPTFzmIDRAYS7cFKSMU\/FCS2RVKiSIpk975PNWtYU9M9nB\/dM8PueoLY3TXVVV2vv\/N+1auaQA0JLV27XpNHqe3K\/yAIZ1WkZitK3c\/jhUEwG8g150I1\/df+E8hn+5\/bnxT3PFArMuaVhgFyTfkeBSpa5jRU6irlUVhZrsafL8\/fPac\/4\/NBUtDvzpeWrs\/ujquFqgpPhZWgJsA6Kc9Q6\/dz+P6EA5G6FFXUsoqij6Kocqm6pRbn5+fqAO4Hj\/oCQJFuCzKYU5GKOPK\/iSqViqoEgaqOVFUgR\/5TBgVy5Bqq7pXpi70\/pr5dVvTzKBJuyn+buA6tsnB3V+oIzqJQ1w1DOYaR2pUj54kkoBTJuahGKr+Yv2vuUmKdDAtzAyCSLpwMTwdR8D153gXzzIBlpFrVQKvKcXR0tA44U8cf+8OBXQEoYNzZ3la7O7tqe2fH7XhZoHr+obvvfNX9IKvrzAEI8NSEej4KoheMXQboxsfH1OjYmAafkWZZDcK3kx0HAOHtrS21vb1jS8ll0Umvit14Prue4pYyBeCVz794qhJULkjTNZofHRlRE1OT+si1p8PFga2t2zEY9yVj5hIxEwDiwYpF8oqwdwEWe+DBheIQUnH95npdIkaBeqMSBWey8KR7BuDVv1x\/Xkzdc6hbVOvk5KSamBgvDvf9SOocQCJubGzEQJRwThiFZ3q1D7sGoLb1JtVZ8bxe4AnHxkbV9PR03VutP7U\/KRQH8J4BIWCExNa\/+ODX7zjT7SC7AqBWuVH0ugQ3T3qp1y3rD\/d9m5tbGog6FEToJgie7kYldwzAPXvvPWFfjTjdsWNH6\/G6w81S\/\/SdcgBpuLZ2w9iGeMrf7hSEHQHQBh8xvKNHj3jwdfrWClYfEN64cVMRUxTqGIRtA9AFH5LPx\/MKhqYuh4MaRhJ2A8K2AOjB1+WbKdFt3YIwnmw9gFHS+OtSpYba9ZLvAGaV9GO0IdgAI2AFzOhIyQH8OBCAS3+5fkGJt4vDgc3n1e4BHC3xx2Cj7hcIZiQX4OxB7Gipgq9c++K05Ki8QsMzM8e8w3EQN\/3nmgM4JqurazoDRyThmQfvueNiGmtSAajtviD6HTMcU1NTfnYjjYO+PJEDxAlv3boluXRqRTKiHk0Lz6Sr4CC6APjIYvFTa4k89oUtOABmmB0DQ3t5Aom1EwGI6hXP+insPuZ2PXkOdMMBa2p24crn159KaiMRgGL3aeMR8Jms5KSbfZnnQCsO4DsYAVYRjZrkFTcBUGw\/wFcDeKhfT54DvXAAVUx6nlAtnAh14ordXhMARV+fpsL0kWm7nj\/3HOiaAyQlQyIRn3elYAMAsf2kXg3E7qGW+zx5DvTEgTqexCEJx8PTdmMNADS239i4Tyi1meTPe+eAJQVZpFanOgCXPr1+Ukq97VdnjT\/JkgNIQZwSoQXxMxZM23UAhpVYNI6OaoPRfO6PngOZccA4tbLUc8E0WgegJBOeotCrX8Maf8yaAyzLhQzWONcA1J6JTB5T4J0PuOApDw6wIUFdDbN+XEgDcHd8d4ELDz644CkvDgA+QKhpSi1w1ACUD7T0q8i+LJ48B\/LkAHv\/QOFubAdqAMraukcoHB2RyWNPngM5cmAvYRU7sEY32uUV51hfVKsxHvnA0z4H1rYj9dZnW+ry6q7683qoLq\/sqFUpo9zQfVMV9XfTVfWPs1V1YmZEPXbXqKLMUyMH2IxKU6C00ItjLnsOiEFn4y3lvAJcL368qT7827b+fxAXPrkVKv5T39A\/CBife2jSg9EwRI57TgglNf4EewuOlkg+mJ2doazUZID30scbDRKuV6Y8UxtXPz4x5aWiMHJlZVWvJRY1PI8ErMHcpI0fKS8T\/fTyhsoaeIZ\/v1zeUvwHhD85Ue4cS1sKVnajXR2PCSpiCZaUUJ1PvLuifnb5VqrUe\/xro+o\/Hp5Q\/\/n4UYU0S6L7pqoaXNRNI\/r45\/++rtV1Wp2il4\/secKyPWZtpFoJZAmd6GJRwWUkpNLZj9YTgXdsNNCge+7hScU59FMBEPe49OQ9Y+rcyem6itX24F+3E9vWgH9nRV381hH1r3Jf2chIQFkrMjsiWwbPwlr2Zy4bAaafidp1CbChJgGeIUDz7Ac31B\/EA3bpJ6JWf5ygVl+6spkIbO7H1vx3aa+MKtkAUGIxsyMCuxoMqRdyUQJKAx9qFlAYiQcrfv35bXX20nqT2kTlPvfweANQW9WnTTt0Q11UMlQmu9As85D0v\/vrqS9lAiCASpJ85x+ZagJTGlAB368WjtVVrkaR\/Dmo\/q8\/EzCLyrcJEBIzTLMt7bpFOxfXI7ifQVXMHF3RRuiMB1X6wv\/ebChFMr126lgD+Kh39qNkFY2954Kv3frPiYR9+zuzDRKWhwGUtFEGMsJOFq3P1SVgGQbOGH+wuNqkBl87NaMIGhsCCNRLAkSSvddp\/WNjstOEo45Rzc9+sKbBaZ6jqMe6wytsKBUAUY8uqFC7Nvio85LMgLi2Gir35cePSN1GlmVVH7D9YWVXmwZJDk1RwViREEycl1VwLxjguXYfNpft6Rr7LQl8qNwk8NFmr\/VtcL2oZ2CKrYqtSY+aJOrHADR62WZGkc6Nt2nGhETD24UAZ6sQC3ab7RVnWR+v+78krmhAzPGlj5kx2Q8BmWcu4rEU0WcA4waPecF4nnyGvdcqvueCL8v65x6ZlhBM\/EUwACuDFDRjbTRoTGnBjh\/KjIRNSD\/Ub1b2W6\/2IRKWZymjFCyFBHz5SuNsxzO1sXqIxbx0A1ATYrHtPaSkCcnkVd\/uj2f5wErrMs9WxGNsAzIXLP+KSIDn9+Jd2kTWSxJlEWIxKp2jS520T17h2nYotmfxZETd3xD\/o8L+bTCqqNkwrvp1QcE1KpRwjGv4M2OSFA\/Mu755xrdk1qSIVAegYK\/wNuDl1ebkAfulAiZ3VoPPTUjGrst53vXt\/lgCUHQqPABd9Wu\/UFRiUoiFQDSJqS7lXf8xySO0U\/pZf1J0KjwAP11PliKd2GOAoB\/1fyCeOcmqhlj8VHQqPABdZwAVmueUWi\/tux42K++KToUHoPsCh8nec+1JO+DNc7uAdMdShOvSAdBeq4t0HNQUXJo9WQRQdTKGwgMQqWJLEhNbyyrLGSnWSVb0QfU7eXlFqFt4ALp5d6syK\/fix8mJpq5KNC94UCEZW1qbZynasfAAZIrrk1v7Ad0zkg1thzrMC3VXtVGOik4LyeRdn\/7vk60+ik6FB+B9041TWUng60eIxZ1lAdxJsyw24OxEWbu8SOeFB+CJmXQpgspNCsm0sg\/zrO8Ci02Oik6FH+GT946rM79tXIXGSx02ey8JaOywVXQqPADxgt0pLnYjYFcCO+426JAMz2Iv18R29U5IQb5+j39tpMHxwA50wZdmj\/XLPrSn4GD7cw9NFIT7rYdReAmoX6ZsscFefyYeyJFr1mMMQ1Y0ywWQwDaVQf0y3lIAEGkXg20\/w4VFSp\/qMMt+mQFA3iEWu32A5y6YYrlAGdRvaQDIQFl+6UrBtJSrTkImvapowOdKP7Naz3whinxsDJIVeKRGCqYNEa+431nRfCHc1XoAuizSj3dRChVsQIdkeevz7aYlmIMIybALwjlnkyKew5W+5tmLeiyNBDQv8GXZ4dT2gClflcU\/a7f3nQBUolkFZ+4zR+w3N6Wr0\/p44d9\/f9U0qY88E+2WjUolAXm5qLfzshj8zG\/3d8jCK37i3VXFIvEn7x1LnSLr1d6jf9SuK\/kop98yqV7GDAV\/uvaVTrs9fnwuLinJXwDo2l8MHUlkwjWGFajGpCm4TkI4tGk2QTftukdMhLJsVPnVV\/HSg9JJQF46KjNtuWYS+FyVSxudpGgh9fB23bZpxybqHOQs2fWLcF46AAK+tFkP94UCBpJNbeL+drKoARvAS\/vZBwM06tjARD2Tw1iW3VJLpYLTwEeQ+q3PtkUyJq+gA4DMJzOllzRrAZgADD\/PgIPBUtCktC8DZOZ5cYaw+WKHZM18VD9e+OaRQoPQqOBDA0CkBL\/X9uEXOzqM8omsmTWSAwCQ98eLfezOUW3QU2YTdfE8CX\/YZDsWqMC0bTvse7o9N1LPDTQDatspMu3bIOx1\/KbNYTkeGgAitV6WReL2HnrtMBGJxIs2nuX3319rkkrU4SXbRH8AMclBset1cm6AZ\/\/eiHt\/GggZww0JE\/U6fre\/QV8PPQD5xh\/kNbbDRHY+oC0XUEjLt7+T\/tt4ABFH5WX5rY\/fd7lAHJX8mKjtVsCzx5AGQrtOp+eMH8962DY5GmoAptlqnTI\/rT7gY1d8V02n1TdgZJ8ZVPgnstsCZYZoB8eBdjEFyMImEbbd9k07HPMAIVrgVwszdW1g9zeocwPAofOCecHsFm+\/YMMko8pwCPhtXqNekXDscEoq\/UHORBzTa54NMX0kHennPlHXSu17xPe+9mW9Kv3\/3\/eO1697OQHEjJM2Xep2\/OYLjeND+8NEQ+WEGEa54AM0F741rT3RdpiHFGHz8CSvFskHgHslG4C09dn37+i1Sf2lSwoRZTX+YZKERgIOzVww3\/gk5hMieftfZjoCDc4F93CvSyzLZHH6sFE\/xm++4MM0\/qEBIA6HK\/kIkTA\/240txT3xBuCNu83TR56hlm6BXdbxDwUAAYWbHIr0yiI1iTCGKwlZbO6CvVvgZHFfmcc\/FAAk7mYTNo8brLU\/7\/Q8jgc2rg8mtjgsVObxDxyA2D5ujA7J143aTQMUbeHE2BQHdgdvC5Z9\/AMHoLsRN9IPJyJrwvO1Qc2Ld\/vOus922nOfoWzjHzgAP\/yi8Udknry39xBJ2ot3bUHmlQdNZR\/\/wAHo7oPMrgV5kRv\/cxMT8uq3VbtlH\/\/AAejuBJ\/njlDMntjElNqgqezjHzgAscVsynPS3Ezdmf7cvk15P4\/uM5Rt\/AMHYD9ftu9r+DgwcADaninsyTNA3CxtGpNWB\/F6yj7+gQPwG84Opmk\/LJMFONzfBB6GLXDLPv6BA\/CEkx704d\/yC42QrmVTng6P3U+r87KPf+AAfOzOxvw0fi08L3KDvqwfaZdQ379c3tRrN554d6XpNsrMWmNX1TdVtgoOy\/itR870dOAAdDOHeXmtVpR1O3qm+1z7sp2gN\/ewVPKf5Dfc2OqXdpLih5TxGSD8+ze\/0ke3v6RnH\/bxJz1zlmUDByBG+A+dqbesc\/YAtTvhz3Rfq5AH97A\/DDuXumt323kBgJF72Xa3Vf7dsI6\/nTFmUWfgAGQQz8refTYhObLM2UvKtWuVbUP\/T7yz0pQiZj9ju+ekfj3xzmqT9LXvH7bx28+W93mjAZZ3byntEyBmnhZJY4gXh4Tqda+UeP+WRruSvtygtOk3jzUpAJps77Q1GcM0fsOHfh2HZk0IKi+WFI3TY90uK6Q9JJ+b6Eq2Cen6bvwNhhugcLSJe7JYkwLQ0lanDcP47THnfW7WhAwNABlwDABWxDWCkBeHymw3TQsnBjsyCUhJGw3RdwyAlaZ7kJb0nQRY7ksj2sPutKU6dRlL\/AVotn4GOf60ceRVPpQAZLCxCrzRBEI+4+Wxjx4ZM2b5IuW8OALYH0gMMW0zIKRYrAIbExK4H8LhcKWlvW1HXKvzv4DQtWeR6uxRmESDGn\/Ss+RZNrQAZNBpkqBbhgC+NMln+nN\/pwPJx6KmLIgwjisJf\/PduVQ7tN\/jz2KMnbZhANisBzptKYf6Rk0Bgl6JNlB5tJlGbogGwLbyktPaSSunLdq0qdWalH6P336ufp8PlQ2YNHikAQAhrtYumdga4Y1WwKM9bDUCxzbZu1LZ5b2cu9uw8Yz\/893ZlrFI+st7\/L2MqZd7jQQcegCaQQIUptJIYb8ssw5\/FpuPMoiX+Q1JNj0xW5Xt2UY62pfFzF6YfpBUvxFg5EEA3Twz7V\/45rQ4Vu1J+bzGn8c422nTAHAo4oDtPDAgwwtu1xNup03q9HtNhu2QsCblmVp7T5rX+NvrPb9a6YZRfn0OVctlX5Mx6JdRUYHSqR1R2JgaP+gH61f\/ZV+T0S8+2\/1E0R7WBHsVFe0BUE7KSLZNxvhbJSj0yh\/XIXL77rX9w3J\/HYCCvdKr4MPy0or6nKUHIMa9TYQ98iJX4rl959XvMLdbegCWfU3GoMFZegCWfU3GIAAY2k6IKKBlHmI3zE\/1DGKQ7fZZ9jUZ7fIpy3reCbG4WfY1GRYrBnJakfBfqeOAOALDuCZlIGgYQKeVIIj0LydHUTlVMDwv85qMAWBOhbtxwnGgguXSOyG8AALEbuoXa1LsedtuX1Sna1K67ecw3Wd8EJ65IvMfy5yEJXVCGDuUlLNHGthByyrju5v\/EvMjy5rfK7Ep61xDu+3Dcm60bajCq5XK3lxw3TU+LKPI+DmxBeOs6cbEUbOsspN8RHL\/kpZ1Aj76KHsA2vaCgyvXvjhdUZVXxsfH1PR0NinoGWOjr82VZU1GX5nqdHbzxk11e3tbBZXg6WDp2vWFSEXvVatVNTNzzKlazssyrMkY5Ju9sXZDbe\/sSCJW8G2ckGUepi4WuSg5lWlNxiBetTXpsaxn4v907SudizU3O4tYHMQzDW2fRV2TMUiGm3T8B+4+HhgALskD1WZnZ1Sl4iMzSS8HrzaPNSlJfRW5bEdigGura0r076UHvn78Ub0mROIylwSKtW0xDMfHs\/+RmCIwFM81jzUpReBNJ2MwQWgVqqvctyfuIn0BOj15DuTJgR1xPqAoiC5x1AAUL3iRi3DHAxA+eMqPA7t7GBNTbx+A1a3qIl0iAcu6OCk\/lvuWbQ4QftF0Sy1y1BJwfn5uRbyRRUIxO6GXgppB\/k\/mHKiDTxwQMEcHdZc3VNH7FNy+3biTPGWePAey4MDtzXh7FdGyGmu0WQegTMctUnB7ywMQPnjKngNGAlZGKq+a1usAnL97btGoYVPRVPJHz4FeObC1tWUyrpbn75rTDght1gGoOwiiNzlu3mpMIdKf+T+eAz1wwGhWmf89bzfTCMANEY2SnoUE9FLQZpM\/74UDFp6WRdO+arfVAEA8E\/GEf04FLwVtNvnzXjiwfnNd3y7x5l+47YjZ10hLS9dno4nod1Jam5qaVBMT7e1f19iKv\/IciDmA7be+fouLZUk+mHf50iAB+VDHBKPgDOcbG5s+MA0jPHXFAdKuwBDk2n6mwSYA8sH8PXNviGjUgemb67H4NDf4o+dAuxzAjGOtURSoN1zbz7SRCMD4w+BH2iGRDJnNzf1fMDI3+qPnQCsObErQeYtJDfYA3NOoSfVTASiIXQ7C2GVGjFpZrEnt+DLPgToHYtUbh\/ICAR9Yqn\/onKQCkHqiii\/iFTNHTB6\/B6HDPX\/ZxAEwAlbADNhJU73mxiYv2HxgjtorHo\/eE1F6koVLx44e9Wn7hjn+2MABQLeGoCKvVJKcH7jn+KMNFRIuWkpA6muvOAieltNlGl67Iegu6X7SCfzzRXscaACfYCWIMXMgfw6UgKYFWb5ZY\/mmXNe8JDRc8Uc40AQ+WW7Zyu6zudY2ALnJBeGRo0dU1S9isvlZunNsPhaaa7WL5OsAfDCrIwBygw1CVtAdPXbUgxDGlJCMw7G3r1DH4INlHQOQmzQIo+h1ufuk6Ho1OTnhp+xgTImION\/GxoZWvzgc2Hztql2bTV0B0DTwx8+\/vCgdP8\/1+NiYmpC5Y6+SDXeKecTeI7mAvV0guf55ZatyzqTYdzrqngBIZyINT8sSuwvyLZhFJSMN\/driTl\/D4ajfIPVkhkOiIecfvOeOi708fc8ApHNUsqjjC\/JteIprgDh9ZFqNjhya30LksT2lcIB8PuZ1rRzRRXE2ftSNynW7yASAplEtDVV0Vq5rlAHAMdn2zUtEuHH4KAF4y3pqTZJVshpNpgA0D\/XHa1+ek2\/Iv8l1jTIkogbjxLiXijBkSAn7jrXh25JEsCWL07jWhLrF1tusXOzW1ksbci4ANJ25EtGUA8bqSFWNyLEi03sj8t9TfzkAuPjPfkDE8NixQG9MYEAXP86iOJlvqg31atbAM6PNFYCmk6W\/Xj8Z7oSnRSqeUhK6MeX2ESmJB01Yp1KNj5zH1\/sA1ddSbpOpZ5cV\/dwAyB2nSRiJyMPbA5POydsD3I4AjfIWe4IvCjTfZ5mu2HiLbvtZXze+yaxbT2iP5AY1rhbCIDwpvxHxiPw6BA5MIigTbvdF2XJA5mzVpTCMrup14VtqMS9Jl\/bYfQdg2oNoTxqbUcI5sli0FkbhrGRK3B\/XD2rmPvnyyi6a8t8mrikvE4ldJmNecYcsL3RZl+nPI\/25\/ALM1UpQWdmV+qJL+JzVaXE9XXlwf\/4f1AC7LPmFaqYAAAAASUVORK5CYII="
},
"copyright": "Copyright 2023 Dify",
"privacy_policy": "https:\/\/dify.ai\n",
"position": 2,
"chunk_structure": "hierarchical_model",
"language": "en-US"
},
{
"id": "9ef3e66a-11c7-4227-897c-3b0f9a42da1a",
"name": "Simple Q&A",
"description": "This template generates structured Q&A pairs by extracting selected columns from a table. These pairs are indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
"icon": {
"icon_type": "image",
"icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b",
"icon_background": null,
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAUPklEQVR4Ae1dW4wcxRWt6pl92rseQ7xgYocdIALFeRglkSBEYkkkwF\/YEoT8RDiKwkd+wEryG+P8JpHNTySEQuwkHzEgYX6C4AM2UghISYTzMMrDySzYeION4\/Wu7X3NdOWe6qnempru3Znpefbca427uroe3afP3lv3Vk2NFF0ihdnZSZEVkyUpJqWSOSFUzlPezbg9X6qcFILySOi6Plb8R+WVCq5X5Kf4RMo5wog+liiB8zCPcJzBVV\/67xFwc0r6MxlF9YpiJr99u76G650Ueq\/tlcKlQq5UGprKKO9eXxDZpNgtVBSp2ntffdrbSSXEDBH5z0qqk5nM8nR+az4kcDswaQsBCxdmp4Tw7lVC0VHgUyWe5wmP2JjJZoSkIz7Ig0g64hySKefpk\/J\/prydl\/a0UoQmfWzBuW\/l+aUSlSF6KV+X9X06+kqU6Ih0jJwkpKeF8o7lJyZOxpRpWnbLCAhN5xdH9lMHD9HdTpk7BlmymYwmWoaOAwMDIeFMGT62B4ESERRkLK6uilKxJFaLxcqOpZjxfXXotontRysvNO+s6QQE8URx9AklxZP0Z5fDrYJ0Q0ODYmBwUJPPaLPmPQa31CwEQMKV5WWxulpc05JERBpPHs1vu+FQs\/ox7TSVgKc\/PLfXy3iHzZhuIJsVw6MjAkeW3kNgeXklIKPRjC3QiE0hYOHS7KQqyp8TxFOAmYkHFNIj0IpXr1wNNSINK094WXUgvzW5J52YgO9dPP9ESamnYG5hWkdGRsTw8FB60OcnCRGARlxcXDREnCOH50DS8WHDBAzGeiMH6a\/hSdzh4OCA2LRpU+ithnfNiVQhAO8ZJAQZIUp4R27dNnGg0YdsiIBlk\/sSdbqbtV6j0Pd2vaWlZU3EcijopMyqfY2Y5LoJqMlXkm\/A0UCcbnx8LIzX9TakfPf1IgBtOD+\/EJhkeMoZdV+9JKyLgDb5EMMbG9vM5Kv3raWsPEi4sHBFIKZI06R1k7BmArrkg+bjeF7K2NTg48AMQxM2QsKaCMjka\/DN9FG1RkkYTLZuABTF+F7CmA9mlzXfBmD16WVYQ3ADHAFXwBkdKdkAjw0JWLjw38PUxm44HBjzsdndANE+vgxuWH7Bbr+46eBGcKxrgk+fn91PK1R+joa3bBlnh2MjNPm6RgCOyeXL83oFjiqJA7feeOOROGhiCRiM+7x3MMMxOjrKsxtxCHJ+JAKIE167dg3X5ihGeGdceCbeBBexqEDlsIqFp9YiMebMdRAAZzA7RpIrrxOILB1JQJheWu64F+M+zO2yMAKNIGBNzU6d\/ujc3qg2IgnoeVIPHkE+syo5qjLnMQLrIQDfwSgwWu9+OMorriJg4eKHB800G8wvCyOQBAGYYr0elEIz\/sqwXrhit1dFQAoo7keBTZs32eU4zQg0jAAWJUOkJ59wtWAFATH2g\/YDY3kVc8N4c0UHAYtP+ntC9uUKApqx3+AQLyi1QeJ0cgRCLRh8SS1sMCRg4fxZ\/f1cOB089gvx4USTEIAWLM+iTQVf0w0aDgnoe95+ZA0M8BeIAmj4\/2YjYBQbTZRMmbZDAkqVuReZbH4NNHxsNgL4Wi6EnBHNNaQ1AQuXLuVoCcNuZLDzARRYWoEANiQIzTC+P06iCVgqrUzhhMkHFFhahQDIBxJqKY1O4agJKJWvtZ9H+7KwMAKtRAB7\/0B8vzSFY3kMKD+Hk4GsnjxGkoURaAkCesEqtSwp3owOAg0o5CSlaTVrmY84YWEEWoAANqPSkvG00iszLnBADDtb0C83yQhoBMpOiF62jwxP70yKBAWgWRiBViMAAhqugXsetsVFp1EbP7b6Zrj9\/kQg1ILEPa8kPR2PoeBgf6LBT912BLJlTxj7gXsZpSZxB9gGl4URaAcCRgNiM3qPdg0OItJkm1kYgXYgYAhInkjOM\/GYtcx23AL30c8IGCfEk97Nod1lAvYzJTr37PS9c3kzuvfMHF3n7oV77hMEjLJTpdLWUAP2ybPzY3YBAqHD63lbmIBd8EL6+RaySujfZdO\/UtQNQHzipz\/qhttI7T28\/53vd\/zZwkkPxAFpWUIQiOYwTMdfTD\/eAJvgfnzrXfTMTMAuehn9eCtMwH586130zJ7QPw5Nc8H0j4URaAcCJg5Iu3DkSAOWnRBeDdMO7LkPQiAkIO0dyCaYKdFRBJiAHYWfO2cCMgc6igATsKPwc+dMQOZARxFgAnYUfu6cCcgc6CgCTMCOws+dMwGZAx1FgAnYUfi5cyYgc6CjCDABOwo\/d84EZA50FIGu3xK\/G77D0NE3lLDzbv+ODWvAhC+YqydDgAmYDD+unRABJmBCALl6MgSYgMnw49oJEWACJgSQqydDgAmYDD+unRABJmBCALl6MgSYgMnw49oJEWACJgSQqydDgAmYDD+unRABJmBCALl6MgS6fi64kcd769z74t2PLoiz85fF\/Mqy2DE2LsaHhsVdN+0Uuz420UiTus788rJ4tfBPcXZhPmzjro\/vFHff9InwPEkC9+3Krusn9L27+Wk5Tw0BQY6f\/eWP9PmTQDpOdoxtEQe++CXxyB2fjisSmY92D\/\/hzeq2\/yCI4FvE8Ye+LnaOj0fWrSUT5Hv0xPGqorjXA1+8pyo\/LRmpMMGnPjov9jx\/jAjy+2qCOG\/q7MJl8d3XX6GX\/WtxZn5NkznFKk5BvEO\/ez22bbT56Mu1t1fRePnkxb+fisoWrxVOR+anJbPnCQjy6ZdPJKhH3jp3pibSwNyC2LaMDw2JnWTWbQEJv\/f6b+ysutKvFv4VWR7P99YHZyKvpSGzp00wyPH4KyeqNBNMIkzsp2i8B7JAXvz738Tb9CLPWEQ1pDm+9+ux7xLaz5Zvffbz2oRjTKk1H5lN0yZIPb+8VPeY7dX\/nK56BrvPt8k8301jzTRKT2tAkMO8fPNyQJDff+NxTZIH8reRgwAnYaf4yVf2iON7HxUP5D9piuojSIOxY5zAkTECMh\/88ldCgoHoT9IYzRbbQbHz10u\/+I+\/VVx2HSWMP9MqPUtAvOgXSKvZAvKBIHECwjy7Z2+VJxyMHZfiqoX544PDYdokovLMtVqOgWddaX4Pfvm+UHOjDZRJqxnuWQK6phHkgsdYi\/zgnkqSBiSIHuzD1BqByXUdlx+++bq5rL1hmP16xB374TnuorAOtLctr8WMEe0yvZjuWQJicG4Lxkg2WexrbhplYZZteZtMcZQgzmeLcTSggbUnbY0p6w3toF2MTW0xxHv49s\/Y2eIFMtMYX6ZNepKA0FjvOgR8uM643v23OGPBGE\/zkds\/TR7vlvC9Y8z47VdeEg8+f1QgbQQB41o1sKkDEtttIN+QOPiDChwo5OOZT1FwPW3SkwQ8dfHDqvew6\/ptVXnrZezYvEYqlIN5jRI4Hj8mB8aWVyk2B0IYgTaFg1OvvPXB+xVVYH5tEw7y2\/LcX+OdJbtcL6V7koBRANdqfk3dXduqCXvG8nhNORyhjVzv2VyH04MwTr39o36c+TVt3+967KSl02aGU0NA89JaccQsiOssoB9ox\/snK015rf2vZ35NG1FmGNo3TdK3BLy8vFL1HreUg9bmAszsnuPH9PyyybOPuP44jQdtrQRTji+Dm48bKjL1XUK75teUc82wqzVNuV499iQBbafAAB9nPs1192gHmM0114weohDLqYuV3jYWBtj94\/qh371hmqgKjJuZmLBAOfHcnyuDy9B2CKq7H3tMiKpwWmzCu+322nlPTsVFBX\/fJSLsHK90LNZ7Ge86jow7+4DpMVd7YawHh+ORO3aRF3wsdEQQItlBK2FATiwDs8UlNa7Bm3VncNCX25\/djp1Gf9\/67BfsrJ5N96QGhFapiuNFhFG+S4sD7vnlM\/oDU2oHkd3VJ66mcafHEB4xfcJcYvmVLZhNwZSeq9mivPPn1pn6s9uMS79GfxxpkZ4kIMB3A8TQCjbBUAYa6TItSD1D8TaYSozXINA0rgZy44iumXOvQ2NiftkWmGK73QduuS3SO8aiiCSSJjPckyYYLw8myF58ahwCxOOM2YOmevbBfXrZFeqAhFgL6BIA5Yx2Q7ko0WNGZ\/YEWhHerDstaOpHechYeGqTFGWf3bNPe9SmXtQRwW879ohnT8NC1Z7VgDDDWHxgCwiGVcW2JsTg3n5RUdovagbDNckwra5WRN+oGxUjxJSamdWw79E1\/dCk9qod\/CFEfVxv2P0jsvvopXTPEhAgg1iu8wAS3vOrZ\/Q8LTQTPiBOnDcKEkcRxQ0Co90Hn\/8FeaHva00EbYQ0NKobUsG9naXV1lGEdYnzMDk0tYh7PzDDaVgh07Mm2Lw0LK\/SWs+ZStMvyJqrNeXtIzRX3PItaM7AzK9Nf5kFqHYbcWkQFmPCn3x1bZwIz9o1v1FmOqpNE5S2zXAaFqr2tAbEi8L47ZWvPRapxaJepJ0XFQu0r2NdXj3hDmhTO0YIx8geH742U7nuD9q7ntCRa4bTsFC15wkIwsC8wiPFSmiY0zhzi3x7vBZoqbX1fDb5TBokRNuuqTfX0SbGbIgRBvPCcILWVrEgPINxJzSXG+er1fyavlwzrIcBCT1q03anjvI\/F\/6r0Pl1123t1D1U9OvuadzoHtEgF14QtNwOClBDU5ovEmEdH0y0kVo1HcZ0py4G3zdG3U9tIw22OfjOsWmr247NwrPZz\/W\/\/13STfb8GDAOGKzP0+KETpCHsAe+xmnGY9BSWIUcp+WChqBph4NwTUSbpgwf60MgtQRcDwaYyDfJXLN0HoFUjAE7DyPfQaMIMAEbRY7rNQUBJmBTYORGGkWACdgoclyvKQgwAZsCIzfSKAJMwEaR43pNQYAJ2BQYuZFGESACyjlUVr6eEGm0Ha7HCNSMgFIh1+Y8IVVAQBFm1twQF2QEGkEgJKAUc10\/E+LOZTbywFynexHgMWD3vpu+uDMmYF+85u59SCZg976b1N6Zb5wQJeeyUokZcj8mS74vPK\/zfGx0\/V9q31YKHyx0QoQiL5iFEeggAp4vBMcBO\/gC+r1rTyqld2ZUiqjIwgi0AQG\/VNK9SCln2AS3AXDuohIB44Mg11NSzCDhkxPCwgi0AwFjbX3lv0d+bzAXHLrG7bgD7qOvEVjzguWcVyrPBQtbL\/Y1PPzwrUbALwXW1sMYMENxQHRYLAYDw1Z3zu0zAqEGVD7FAYsBAcNMxocRaDECmPTQQtzz8tu3z+AETgivCdSw8H8tRsA4vOBeEIYpe8KK1wS2GHpuvliOAdJC6JNAQxOQ\/A99srq6yggxAi1FwAShhV96Dx2VNaCvT9bY2dJ74Mb7GIFisaifXnm2BhSZaeT67AlrcPi\/1iFQKnPMk96aCc5kBqfRJTQgOyJAgqVVCKyWNaDIXJtGH9oE57dunZNCTCMUU\/Q5Htgq8Pu93ZB85IDkt+bnQgIiQUGY3+K4slL9G2rIZ2EEkiKwshT8xK1SJc01tBc4IUFiGhkrET\/ih3wWRiApAkYDeiJ71LQVEjC\/bfu0McOmoCnER0YgKQLLtF2yDkDT1G9+YkI7IGgzJGC5g5dxXLq2WD7lAyPQHASMZZVCHbJbrCRgdugotuqABmQtaMPE6SQIhHzS2m87cWxNKggIb1gJ\/2lcZi24BhKnkiFw9cpV3QBFWY65LdGwr1IKly7l1OryO0KKydHRETE8PFxZgM8YgToQwNjv6tVrtPuVmLll4sa8W7VCA+Kijgl68gDSi4tLHJgGECwNIYBlV+AQxB37mQarCIgL+Y\/dcIJUow5MX7kaqE9TgY+MQK0IYBinl\/kJcSI\/UTn2M21EElBfLKpvaoeEVsgsLQUsNpX4yAhshMASBZ2X9aQGfe+jqLRFjaoTS0AsFpSidAiVoEbDVaxRrXAeI2AhEJjeIJQnlX\/ALHq2ioTJWAKiRH7bTUeU9J\/GHPHC\/AKTMISNE3EIgHzgCjhDX798Os70mvo01FtftFdcXHmD3JjdmUxGjI+NCeltWG39RvlqKhEA6eahqLDqmRY5k9d750YPuq4GRGXtFRf9fXCj0fD8ArGb95PeCNe+u+6Qb0aW1L5aQKhZlRVmZydVRr6B+CBrwlqg7Z8yEeS7b71xn41MzQREJZeEm8c2i0wX7CloPxCn24sAxnxXFq4YswvNVzP5cKd1ERAVbBJiQ8ux8TEmIYDpQzEOh1nlUi\/5AFndBEQlTcKs9xIcE9piS4yMDPOUHYDpI0Gcb3FxUXu7cDgw5qvV7NowNURA08C\/Pzp3RCrvCZwPDQ6KYZo7ZpNs0EnnEeM9LC5YKX+FF6EW7+ryU\/l8sMS+3qdOREB0Vjg\/u19J7zBpwxxMMrThEP0iOUv6EKjQerRsjyJ9h27dduORJE+amIDoHCZZZOVh2ux8L85BxE2bN4mBbNf\/Dg5ul2UDBLCeD\/O61hrRaUlTtY2YXLerphDQNKq1oZAHEapBHgg4ODTIGtEA1GPHKuJRLFhPrd1w04lmPUpTCWhu6t8XZp+SSj5miAiNqMk4PMRa0YDUhUeM7\/Dd8FVaRLBMX07DeSAwtzTWu7J0pNGxXtzjtoSApjNXI5p8kDGTzYgsHT2a3svSh6W9CIBc+GA\/IMxwYccCvTFBSDp9P9NEkJfFlcWjzSaeedqWEtB0Ujh\/frcvivulzNyL0I3Jt4\/QkvCgEdbxMsER6eB8jaD6nPJtMeXsvLSnDYHc50RsDqLoaDSYXpNXJhw2IkW+jt25lYPzaaLmb2mOdhrflIwu0rzcyjfZvHZjWyoUCjkxNjpFG1Tv9oT3OVLyk3GkjG2ELzQHAdqWj4ZKJ31Vos3CaX+ghWvTrdJ0cTfcdgLG3UjgSRMZpZejP9FJ+vvNecq7WZeXatLUU0LmhFQ5c66PivKofEVe6k9oc3mzv7f1rPjpteCUrqvgR4h8SbvRU9gE+4HrLZlpZ9JmeLBWtw0n\/w+IOsoy1qfzJgAAAABJRU5ErkJggg=="
},
"copyright": "Copyright 2023 Dify",
"privacy_policy": "https:\/\/dify.ai\n",
"position": 3,
"chunk_structure": "qa_model",
"language": "en-US"
},
{
"id": "982d1788-837a-40c8-b7de-d37b09a9b2bc",
"name": "Convert to Markdown",
"description": "This template is designed for converting native Office files such as DOCX, XLSX, and PPTX into Markdown to facilitate better information processing. PDF files are not recommended.",
"icon": {
"icon_type": "image",
"icon": "9d658c3a-b22f-487d-8223-db51e9012505",
"icon_background": null,
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAQfElEQVR4Ae2dT4wbVx3H35vxrjd\/dmMnIZA0UrxtilQuTYUEB5CySD2CSJE4Vl0uHIpQk1sFh7YHqt7aCsGBS7fqEQlSwRGpi8QFJMRyQoKEdaR2U9qkdva\/vfYMv+8b\/7zjsZ2xPTP22PN70u6bP2\/en+\/7+Pf+zMwbrVLiNu9XSpSVUpP+tOsUlKsKtH\/l4Z6rXNrW2uyrc6cthAs6hMVfllyVCou\/Y+eq6sM9x3+sfO6Uxvl7Squqq6yyTT7tl5cvFss4MWmXG3cGNjcrhWZerWjlXFdKlyj9a\/RXcogyOCMX\/nsbBJ93vOWZMPLPKFCg\/\/g7dqRZl070y2Wn6VfteHKqu1tfUGC1QTqX6aJ\/utrasGtqfXm5CEDH5o5zl2CSZN1WKPrrBNMKlR\/bXc6yLKUtrXK2rTSJhj8c+3zboeN0riXkVwrdvxkO3xXpDB\/AD5N\/nFxM7P\/vEbUhLec0m+r8okXhHBPWcRwCkCBskk\/bPZ2B0l23ctb7yxeKGz3DxHgwMQBh6Zy8s0oofd8PHWCxc7YBzSbY5ubm2sD1KtdnBKDfXViy\/LuyHVBgGL2aBChgPGocqQZtN44agdhU2XWcN65ePr8WPBHXfuwAAjy1oF6hX9pNyqRpIgBdPj+v5ufmDXxszQYpxDCCDhLfrIeJqhcgrNVr6oh8n5UsW1qvUb\/xjbj1ixXAO1sPblDD+TZlsoSM5uZy6uTCCeNjfxQXVdBR0pzma+LUq1arGxh9ljF2ixgLgBjBUv\/jPW5q4wCPIYhTUI5zlv0k9AKAu3t7fot4myzirThG0pE7VJufVtDc\/gPwoWk9efKkWlpcjGT1ZhmQaSwbDEqhcEadOnXKDAypDDdQ53c+frAatTwjA4i+3uZW5W3Hcd+hTBTm5+dMJhcW8lHzJNenVAH045eWFk1\/HnVOsxPv3d16iC7XyG6kJhhNLoH3e5pDugard+LECZUUeEk0KSOrNQUXjkuvw8OaOjg48KaCaOrGsvQLozTJQ1tAA5\/rfgT4ME935sxSYvBNQX1nNoswOKh7MAAWqEn+CGwMK8hQALbho1Eu5vBgjk0Ghk1Vws+EAqh7MAAWyOFu1tAQDgygwDcTzMReiKgQDgRgL\/iGmUyOvdQSYaoUAAujWsKBADQDDl+zK\/Clqv5TkZkuCGmQau6KheQuFEBMtaCTCVO7uHi6\/VBASLxyOoMKAEIwYsYFGJjkndfCZHgsgHfuP1il5yhuMt0m4rAY5XymFeA+oddK6ps0T4hnAvq6vgCi36ddc1\/XzPMJfH01lBMBBcAK5oY9p18DS4Eg7d2+ANKQGjPcBcx+JzXJ3M6FbMycAmAGd8fIFfCcQL8C9gQQTS9dcKOT5H5RyHFRoLcCuHeMphjPCdzZqtzoFaongNT0ms4jzKg0vb1kk2ODKAD4uCkmDN\/uNSruAvDu\/QrgKwE8NL\/iRIEoCqApxtM05ErOvNM1IOkCkO4uryL0aTKf4kSBOBTAQ8nGaf1K0Ap2ANjq+5VAbIvaONKXODKugI8n856QX44OALnvl5+XZ\/r8Isl2dAXYCuIlNX9sbQA3P65coxPS9\/OrI9uxKQAryCNimhdc4YjbANKboqs4OOd1GPm8+KJAbArwoJbetlvhSNsAKktfx0Fpflka8eNWAK\/lwpElNKyZbfzDyMTJuxVsnz1bhJcaF3zEPDUZm5KMpOlFfqzcUK0+Mo\/xWzVdxDIgxgI2880V6Ckj3ymhakqziT4gVsWAw\/pA8A2A2tUYgKic5Z3EtjhRIAkFsPaPca1+oNcH1PpZHMzROi3iRIEkFWi9P4KOYAnp8FJTZse2PR5xIi0uTX2YtGgyzfnAYlRw1Bobo8fEmSa4Tec0l1DynmoF0A9suRJ8ix8WlKdeWrKIl6gCAJBZA3sWrQhXQopWCpvfRJWQyCemgN8KWtptFpATWu1oYhmShLOlQI6nYprNEi2Kq0sovqW5O4g9caJAcgqwBaQlmQu0gHBrFVNCUZwoMA4FGECwZ7na6wO2D44jB5JGphXgQYilrCvtdlcAzDQTEys8AaivIHVbbsNNrBKyljAbu6Zyi20LmDURpLyTU4AHvDTsOCMATq4eJGVSAGNfMw+IrxSJEwXGoQDf9HDxCggl6AEoE9Hj0F7SCCggTXBAENkdrwIC4Hj1ltQCCuQ+33EVlo+pWw49pRA4G8Nu1Of5vvpqNYZcZDeKf79lelgjC5DEOzn4Bt32jvcRShp6uNIHHLl65MJRFOB5QLqW7gXLIGQUDeWaCAoEAYwQlVwqCkRTIIcvasOdjelD0En0GaIVUa6OU4GofXrOS67hcZfAsIOTEF8UCFdAAAzXSEIkqIAAmKC4EnW4AgJguEYSIkEFBMAExZWowxUQAMM1khAJKiAAJiiuRB2ugAAYrpGESFABATBBcSXqcAUEwHCNJESCCgiACYorUYcrIACGayQhElRAAExQXIk6XAEBMFwjCZGgAgJgguJK1OEK8BrR4SGnNETwnYhXf7uvfvf3+kilWf12Xv3su\/wpei+KqO+sBPMXNb6RCjbBizJnAd\/64Un1zMXhP0fxzCW7C74J1tvMJJ05AFFzH\/z4tLo8xLI4CPvrF+X7yUlQn0kAl05oA+HSQvhyJIAPwD4xBLBJVNSsxplJAFGZAApghblfkeUT+MJUGv18ZgGEZOjXoU\/Yz\/38eydMmH7n5Xh0BTIH4F\/\/Sx+m8LkffH1e\/fT5Bd8RbxPHXvpW55fj\/7XV7AonB6IpkDkAf\/LBnvq44i0LwdIFYcN0SxBKXPMyXSsuXgUyB+D2gate\/M1uF4Robr\/5ZM40ucG5PsCHaz4JgBtvVWQztswBiGoGSLCE24e0RKLPYcARnG5BGIQV+HxCxbiZSQChH\/pzb\/7hoENKTM8ER7wII32\/Dpli3cksgFARt+R++afDvoLi3Ki37fyRYqCDv1Hd81+bi3T9qOmO47qZvxccJiIgg+ULjnjX\/lJ7LJxh8fJ5gOef6hkW6KjXcz7S6mfaAnKl\/IKaWf\/0zN9oqubNP3Y2zxx2GD8ID0AcxhL2uh4DpVlys1WaCDWDUe44HFvDMEsYhI\/z9g0C0P9j4ePT6osFTLDmABke\/wq6MEvYDz50Fx7XZw2mMw37YgETriW2dGz5OLngPh\/PEnwos1hArvkE\/cdZwmCyvcCcRcvH5RYLyEok7PezhGHJRnmCOyzuNJwXCzjGWuhnCftlYdbhQ7kFwH61n9DxQSHMAnwCYEKQhUUbBmFW4BMAw0hJ8Hw\/CLMEnwCYIGCDRB2EMGvwQaOZHwXH\/Z5t3PEBQnb+bT426\/7MAzgNFZhF8LheZBTMSog\/EQUEwInILomyAgIgKyH+RBQQACciuyTKCgiArIT4E1FAAJyI7JIoKyAAshLiT0QBAXAiskuirIAAyEqIPxEFBMCJyC6JsgICICsh\/kQUEAAnIrskygoIgKyE+BNRQACciOySKCuQe7DjLdbYyHUu2sgBxBcF\/Ap8th0PJ9UWd2IB\/erK9tgVAIBVpOq6nYs1jj0nkmBmFPCxVrVcpQXAzFR9OgrqB1Df3fpik7JVKhTOKMuSFjkdVTTbuXAcR1Wrj1DIshA323Wd+tIJgKmvotnOoAA42\/WbytK5TnvAi0GIKiOXTjOe+Z1UllgylSoFeBBCn4qsigVMVdVkLzMWKESxHZkHzF7tp6DE1AS7ZjzsutIEp6A+MpGFpuN99FG7WqZhMlHjKSukv7G1tNsahNDkoDhRYBwKcGvrKOeepXTrXvDx0HgceZA0MqwAj4LBnuVq17sXrNpzMxmWRoo+DgWardbWVVaZBiF2GYk2GvI18HGIL2kcP3llwwLSAoFliNI2i6KQKJCwAr6bHmVr+WKxjPTwhILMBSasvERvFABrcGCP74SUzRH\/+NgckH+iQLwKNI+7ehuImZfoxU7p6OhI5fP5eFOMGFtc7yBEzMbUXn5hiW1MOorAk9Bk6+4hR17uHNfs+OhMR24lFzOnQKPRMGXSyjUW0ADoWu46jjZat0hMCPknCiSgQKPpzba42joG0K7Z60gLFlAGIgmoLlG2FWgceRbQrql1HDR9wOXlYvXO1hfrNBez4hCE1hx3DdvXpWYjbX2a1AjTykia+8wMH2V1A8why+0eKs0D\/hkH6vXjD6dgX5woEJcCh\/WaiYqeiDasYacNIL0St44DNQEQMohLQAG2gPa8tcbRtwF8+mJxne4Gr+OOCAfkQOKLAlEVqNVq5mYHxVNevlA0AxDE2QYQOzQ0\/hD+\/uEBPHGiQGwKcMvqOvoNf6QdAFo1YxqrsIBiBf0yyXYUBXw8la9eLq754+oAECMTmoZ5FwHECvplku0oCuzu7XmXu+77wXg6AMTJXN16h7wyqD08PAyGl31RYCgF\/H2\/p54493rw4i4AYQVpwaJbCHhwcCgT00HFZH9gBfDYFRiCC\/b9OJIuAHHi6qXibR4R7+22zCdfIb4oMKAC6Ma1Hr26Hez7cRQ9AcRJW+sfkVfFEzLSFLNc4g+qwOFhTdVr5qZG1dJei9rr2r4Aeg+qekNm0xTL0h299JNjPRTwml5vKo+a3lv80HOPoJ3zgMEAT10qvkO3Td7F5PT2zo6sHxMUSPa7FAB8YAXMgJ1+TS9f2NcCcgD7yHpd081jtOU7u7syKGFhxO9SANAZRvDIvas2rl4+d7MrUOBAKIAYFWutX6Dryk16lmtnmywhJSROFPArYFpJYgOMkCtblmHGH6TndiiAuMq8PKL1d2hTIOwpY7YPdsFHrDyu3+dXayAAcUFPCGVg4tcyk9umz+e3fEPAB8EGBhCBgxDKwASqZNfxgKPd7A4JH5QbCkBcwBDywOTR9rbME0KYjDnM86HuzUQzDThorm\/gZtcv1dAA4mJA+OSls8\/xFM3+\/oHCDWf8IsTNtgI80t3f329PtVj10eCDUiMByBJjmO227phg1htNMm4+i5tNBWD18H2Po\/oRClh1lHsLDPD7HaOUOhKASPDqxeIamd\/n6HHW2zDHe3v7JpPyPOEo1ZHOa1CXMC5s9aj7tY46f\/rSOTw5FclRXPG5O\/crq9p1X6MYS4g1R2\/X5efnI622EHzLS96Kg7L9XZx6ATw8UOAzJmU8KYWHVfrnYLgzsQLISf\/nk4ev0y\/kJdov4Rg+AQYYF+bzxsexQV2cgg6a5jSHi6IX+nd4N7x+VKeuVN308VpamAeV8axolOa2l66JAMgJBS0iHweMOdtWuVxO2Zat7JzNp7r8KIJ2RZaBA4PqBdjwh6edMI2CFQsAH46xIzjoRTX9oVVTa3GD50uDN5PzNz+rXGvWnVW6PXOdinetV0qwkpZNKwZrTVB6PrYf7NA6mgQpuy+fsZXGxyV8DuHwlyXHAAXL\/GnFW3kA6zAjzJdocSL0zTk8FiLFtpk+CV5M+4CuiXfE6TVdvCnZI0ish8Zea5ublUIzr1a061wjap6lDJT6QYmS8hfdudTnFyOPmziqmfSH1KtMImzQdNo9AIflMpKydP3EHjuA\/TKyeb9Sot9uiVbtLwBKepanQGGvPNwzTUKJrzt\/2irQEZzzO+wHj\/nPz+J2lQqFvw73cNcp4wAZOXqIRFXPnTJVfI+ajapL+6RdmRZeKWMuF+Em7f4PpXL0Ed9VCt8AAAAASUVORK5CYII="
},
"copyright": "Copyright 2023 Dify",
"privacy_policy": "https:\/\/dify.ai\n",
"position": 5,
"chunk_structure": "hierarchical_model",
"language": "en-US"
},
{
"id": "98374ab6-9dcd-434d-983e-268bec156b43",
"name": "LLM Generated Q&A",
"description": "This template is designed to use LLM to extract key information from the input document and generate Q&A pairs indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
"icon": {
"icon_type": "image",
"icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04",
"icon_background": null,
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAQjUlEQVR4Ae1dTYwcxRWuqpnd2R\/veqzgxXaw2YEgRSDBEkJEwsFLDkE5xRwicogUR0g55GJWKGfjXBPJyyU3hLkFKRLmkohD4uVgHIVEOCggRTGZNTbesDbysj\/end3prryveqq3Z6bnv3t2tvu91Uz9dHVV99ffvqpX9bpGigGR4tLStMiKaUeKaallXgidV1o9iMtzpc5LISiPhI6bsOqLymvtHa\/KT3BCyhXCiD4B0QJpP49wXMRRV7rXCbgVLd3FjKbzymKxcPSoOYbjeyn0XPsrxbvFvOPkZjNanXQFkU2KGaHDSNXf60ppa1e1EItE5H9qqa9mMqWFwqGCT+B+YNIXAhZvL80KoU5qoSkU+NSJUkooYmMmmxGSQnyQB5EUIg3JVPJMovJlywfzkh7XmtCkT1CQdgN5ruNQGaKXdk1Z16XQ1cKhEPEGcpWQXhBavVmYmrraoExk2bEREJrOLY+epgZ+RFc7a68YZMlmMoZoGQqHhoZ8wtkyHPYHAYcICjKWd3aEU3bETrlc3bAUi66rz31j6uiF6gPRpSInIIgnymNntBQv079dHpcK0uVyw2JoeNiQz2qz6G6Da4oKAZBwu1QSOzvlXS1JRKTx5IXC4fvPRdWOrSdSAl774tYplVHn7ZhuKJsVI2OjAiHL\/kOgVNr2yGg1YwwaMRICFu8uTeuyfIMgngXMTDygkByBVtxY3\/A1Ig0rL6qsnisc6t2S7pmA179cPuNo\/Sq6W3Sto6OjYmQklxz0+U58BKARNzc3LRFXyOCZ63V82DUBvbHe6Fn6b3gZVzg8PCTGx8d9a9W\/ao4kCgFYzyAhyAjRQs0\/fHhqrtub7IqAlS73bWp0hrVet9Dv7\/O2tkqGiJWpoKsyq1\/opkvumICGfI68BEMD83STkxP+fN3+hpSvvlMEoA1XV9e8LhmWckY\/1ykJOyJgkHyYw5uYOMDk6\/SpJaw8SLi2ti4wp0jLpB2TsG0C1pIPmo\/n8xLGpi5vB90wNGE3JGyLgEy+Lp9Mik7rloTeYmsLoGiO722M+dDtsuZrAVZKD6M3BDfAEXAFnDEzJS3waEnA4u3\/nac6ZmBwYMzH3W4LRFN8GNwI2AUzbnn8bCs4mnbB15aXTpOHyhuo+ODBSTY4WqHJxw0CMEy++mrVeOBoR8w9fOTIfCNoGhLQG\/epD7HCMTY2xqsbjRDk\/FAEME947949HFuhOcInG03PNO6Cy3Aq0Hl4sfDSWijGnNkEAXAGq2Mk+YqfQGjpUAKi6yV3x1MY92Ftl4UR6AaBwNLs7LU7t06F1RFKQKWkGTyCfNYrOexkzmMEmiEA28EqMPJ3Px9mFdcRsPjlF2ftMhu6XxZGoBcE0BUbf1CamnG3R4zjSrC+OgLShOJpFBg\/MB4sx3FGoGsE4JQMkUqeqdWCVQTE2A\/aD4xlL+au8eYTaxAI8Mm8JxQ8XEVAO\/YbzrFDaRAkjveOgK8FvZfU\/Ap9AhaXb5r3c2F08NjPx4cjESEALVhZRZv1XtP1KvYJ6Cp1GllDQ\/wCkQcNf0eNgFVstFAya+v2CSh15iQyufu10HAYNQJ4LRdCxojhGuKGgMW7d\/PkwjCDDDY+gAJLHAhgQwK\/G8b74ySGgI6zPYsEkw8osMSFAMgHEhpxxmYRGgJK7Rrtp2hfFhZGIE4EsPcPxHWdWYSVMaB8AomhrFk8RpSFEYgFAeOwSjVLmm9GA54GFHKa4uTNWuEjEiyMQAwIYDMqIxlllF6FcZ4BYtkZQ7tcJSNgEKgYIcZtHxnK7EyKCE1AszACcSMAAlqugXsK2+Ki0bCNH+O+GK4\/nQj4WpC4pxypzHwMTQ6mEw2+674jkK1YwtgPXGW0nsYVYBtcFkagHwhYDYjN6BXtGuzNSFPfzMII9AMBS0CyRPLKzsfsZvbjEriNNCNgjRAl1YN+v8sETDMl9u7e6b1z+SCaV3aNbu+uhVtOCQJW2WnHOeRrwJTcO9\/mACDgG7xKHWQCDsADSfMlKC3wu2zUBbMVnGYe9PXe\/UUPzAOSW4I3Ec0E7OtD4MY8BFL7AsiJ3\/0m0Rz47Je\/2hf3x2PAffGYknuRTMDkPtt9cWdKmB+HprVg+mNhBPqBgJ0HpF048qQBK0YIe8P0A3tugxDwCUh7B3IXzJTYUwSYgHsKPzfOBGQO7CkCTMA9hZ8bZwIyB\/YUASbgnsLPjTMBmQN7isDArgUnfa12T5\/6ADXOGnCAHkYaL4UJmManPkD3zAQcoIeRxksZ2DFg7cPYL\/5ttdfdbjqtY17WgO0yhMvFggATMBZYudJ2EWACtosUl4sFASZgLLBype0iwARsFykuFwsC+8YKjuXuG1R65dZn4sWLb1UdfevUT8R3jx2vyuNE7wiwBgzBcHVruy735upXdXmc0TsCTMAQDFe3t0JyOSsOBJiAIajeXKvXdmF5IadyVocIMAFDAPvkzu263Jtrq3V5nNE7AkzAEAxvhGjAK5\/fCCnJWb0iwASsQRCa7pM7yzW5QqALvsGGSB0uvWYwAWsQvPL5ZzU5u8k\/\/PtfuwmORYIAE7AGxvkP3q\/J2U2+\/tE\/xGqJLeRdRHqPMQEDGJ7\/4LIIG\/\/ZIqulkjjfhKC2HIftI8AErGAF8rVDLmhBlGWJBoHUL8V5Wu2yALHaFRAV5809\/T0xmRtp9zQuF4JAagkIAr3+0d8N8RDvVEDYd4vXDAmfOXZCHJ+c7LQKLk8IJJ6AcCyw67iYYsHnr2Tp3ohgYhlTM6\/85U+GSI99bUo8QCR89D4KJyaNZpzM5ciB4QQTrQkCiSdgrVdLEyx6OvTxl8sCH2jFoCT9XZbgvXYTZyOkG9T4nMgQYAJGBiVX1A0CTMBuUONzIkMg8WNAeDLDysUKBowGeLog\/DhkvbcXVI+T4fHM108YA+SBiYOmqgcmvbCXepN+buIJ2MiNHiSEhwuW3pqtfjQjAKzclx7\/Nn2+xfOBzYBqcizxBGx079BSP\/7mQfF84REzF9jp6sZLjz8V60R0Wqzn1BLQEhNaDCsakHZJOPf0s\/45th4Ou0OAjZAKbiAhutNWYjVfq3J8vD0EmIABnLy13VwgpzqKbttqy+ojnOoWASZgADnPqHgqkFMdfekJNjaqEek9xQSswbBZN\/yD6UdqSnOyVwSYgDUIQguGebY8Rk4Gx3lerwat3pNMwBAMnwnZggOeLizRI8AEDMHUrmQEDz1K7lYs0SPABAzBNIyAYXkhp3JWhwgwAUMAmxyud7PH2JAlegSYgCGYTo4M1+Xyux91kESSkfqluDAU4UaflrXYsPvvZx5rwH6izW3VIbBvNGC3v6PRjSbr9Y25OpQ5oyEC+4aADe8g4gPv\/vc\/4teXL3XtIxjx5SS+OiZg5RHj9c35v70vrtzibdj6yfrUExDvCb\/y5z8y8frJukBbA0vAbsZuuK92x4p2nNdsPxg4nrK7fYAtMUQHloAx3Kup0hLP22otfEsOvEfy2+\/\/kJ0P4noIgXpTRcBWBgaI9\/J3nuXfAwkQJO5oKgjYysDAOu\/ZZ58Tzz\/E\/n5xE662fiKgXBFC57WrhVSy9vi+T7948fcNDQzPA5pfq+z3Q9Za2yZXskLqFaFFXtOXpL+kSaNpFTYw9u5J+wSUggiYMmEDY7AeeGoIyAbGYBHPXk3iCcgGhn3UgxkmloBsYAwm4XBVrjVCtFzJSi0WySaZdlxXKJUM7yw2MAaXfLgy3wgROnlGyOWf\/oJXMAabf1VXp1whaB6QWEnzgEkQfnd3fz1FJbU2P46rNVGRhRHoAwKu45hWpJSLyRj09QE0biI6BKwNghqVlmIREZeMEBZGoB8I2N7W1e51snuxFhwwjftxBdxGqhHYtYLlinKwFgwJ6sVUw8M3HzcCruP1tgpjwAzNA6LBctkbGMbdONfPCPgaULsrSpQ9AvqZjA8jEDMCWPQwQtxThaNHF5GAEZKUuUBzc\/w1sAhYgxfc86ZhKpYwfAJZGIE4EShX5gDJEfoq2jEEJPvDJHZ2duJsm+tmBISdhKbIdcBR0YCuSeyyk5FiBOJBoFwum4q1CmpAkVlArsuWsAGHv+JDwKlwTEm12wVnMsMLaBIakA0RIMESFwI7FQ0oMvcW0IbpgguHDq3Q60gLmIopuzwfGBf4aa\/XJx8ZIIVDhRWfgIjQJMx7CLe3txGwMAKRI7C95e1EobVjuIYGPCPEiywgY7vEBAQOLNEjYDWgEtkLtnafgIXDRxdsN2wL2kIcMgK9IlCiHw03E9C09FuYmjIGCOr0CVhp4B2EW\/c2K0kOGIFoELA9qxT6XLDGagJmcxewVQc0IGvBIEwc7wUBn09G+x0lju1KFQFhDWvhvobDrAV3QeJYbwhsrG+YCmiW5c3ammjYVy3Fu3fzeqf0IW0TMz02NipGRup\/tKX6DE4xAo0RwNhvY+Me+ZuKxYemjhRqS1ZpQBw0c4JKziG+ubnFE9MAgqUrBOB2BQ5Basd+tsI6AuJA4b77L5JqNBPT6xue+rQncMgItIsAhnHGzU+Ii4Wp6rGfrSOUgOZgWf\/cGCTkIbO15bHYnsQhI9AKgS2adC6ZRQ1676OsTY8adk5DAsJZUArnHE6CGvW9WMNq4TxGIICA1\/V6U3lSu3PW6TlQxI82JCBKFA4fm9fSfQ1rxGura0xCHzaONEIA5ANXwBl6\/fK1Rl2vPZ+Ges3FWMXl7UtkxsxkMhkxOTGRyK18m6PAR9tBAKRbhaKC1zM5OZPV+2Sr85pqQJxsrOKy+wLMaFS8ukbsTsg+Mq3A4ePtI1BDvkXp6BfaObulBrSVFJeWpnVGXsL8IGtCiwqHQCCEfM81G\/cFUWubgDiploQHJg6ITEL2FAyCwvH2EcCYb31t3Xa70Hxtkw+tdERAnBAkITa0nJicYBICmBSKNTisl0un5ANkHRMQJxkSZtXbMExoiy0xOjrCS3YAJkWCeb7NzU3T\/cLgwJiv3W43CFNXBLQVfHrn1rzU6gzSueFhMUJrx9wlW3SSGWK8B+eC7corvJhqURulVwsFz8W+07vuiYBorLi8dFpLdZ60YR5dMrRhLpfr9Dq4\/D5AoErrkdsezfSde\/jwkfleLr1nAqJxdMkiK8\/TvgqnkAYRxw+Mi6FsYjfhx22mRuDPh3XdgI\/ogqSl2m663FrQIiGgrdRoQyHPYqoGeSDgcG6YNaIFaJ+FdcSjuWCztHb\/sYtR3UqkBLQX9entpVellj+zRIRGNGQcybFWtCANYIjxHd4N3yEnghK9nIa0J+huaay3vjXf7Viv0e3GQkDbWK1GtPkgYyabEVkKFS3vZenD0l8EQC58sB8QVriwY4HZmMAnnbmeBSLIO2J980LUxLN3GysBbSPF5eUZV5RPS5k5iakbmx8MoSVhQWNaR2W8EHEvvUtQk6b8oNhywbykxy2Bau8Tc3MQTaHVYMYnr0I4bESKfDN3V3uyl14gar5Ha7QLeFMyvEh0udVPMrp6G9ZULBbzYmJsljaonlFCPUFKfroRKRtWwgeiQYC25aOh0lVXO7RZOO0PtHZvIS5N1+iC+07ARhfiWdJERqny9C86Tf+\/eaXVg6a81NP2PC1kXkidt2kTasqj8lV5iU\/Q5vJ2f+\/AveKn17wkHdfejxC5knajp2kT7AdutmSmnUmjsGADzXYd\/T+j7cbUE7Qx3wAAAABJRU5ErkJggg=="
},
"copyright": "Copyright 2023 Dify",
"privacy_policy": "https:\/\/dify.ai\n",
"position": 6,
"chunk_structure": "qa_model",
"language": "en-US"
}
]
},
"9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3": {
"chunk_structure": "text_model",
"description": "In this template, the document content is divided into smaller paragraphs, known as general chunks, which are directly used for matching user queries and retrieval in Economical indexing mode.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/general_chunker:0.0.7@a685cc66820d0471545499d2ff5c87ed7e51525470155dbc2f82e1114cd2a9d6\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: d86a91f4-9a03-4680-a040-e5210e5595e6\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: General Mode-ECO\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751337124089-source-1750836372241-target\n selected: false\n source: '1751337124089'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: variable-aggregator\n targetType: tool\n id: 1753346901505-source-1751337124089-target\n selected: false\n source: '1753346901505'\n sourceHandle: source\n target: '1751337124089'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: text_model\n index_chunk_variable_selector:\n - '1751337124089'\n - result\n indexing_technique: economy\n keyword_number: 10\n retrieval_model:\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: keyword_search\n top_k: 3\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 479.7628208876065\n y: 326\n positionAbsolute:\n x: 479.7628208876065\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 532.832924599999\n positionAbsolute:\n x: -417.5334221022782\n y: 532.832924599999\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 265\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"General\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" divides\n content into chunks and retrieves the most relevant ones based on the users\n query for LLM processing. You can customize chunking rules—such as delimiter,\n maximum length, and overlap—to fit different document formats or scenarios.\n Preprocessing options are also available to clean up the text by removing\n excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 265\n id: '1751253953926'\n position:\n x: 184.46657789772178\n y: 407.42301051148354\n positionAbsolute:\n x: 184.46657789772178\n y: 407.42301051148354\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 344\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 344\n id: '1751254117904'\n position:\n x: 479.7628208876065\n y: 472.46585541244207\n positionAbsolute:\n x: 479.7628208876065\n y: 472.46585541244207\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: The result of the general chunk tool.\n properties:\n general_chunks:\n items:\n description: The chunk of the text.\n type: string\n type: array\n type: object\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The text you want to chunk.\n ja_JP: The text you want to chunk.\n pt_BR: The text you want to chunk.\n zh_Hans: 你想要分块的文本。\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Input Content\n zh_Hans: 输入变量\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_variable\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The delimiter of the chunks.\n ja_JP: The delimiter of the chunks.\n pt_BR: The delimiter of the chunks.\n zh_Hans: 块的分隔符。\n label:\n en_US: Delimiter\n ja_JP: Delimiter\n pt_BR: Delimiter\n zh_Hans: 分隔符\n llm_description: The delimiter of the chunks, the format of the delimiter\n must be a string.\n max: null\n min: null\n name: delimiter\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The maximum chunk length.\n ja_JP: The maximum chunk length.\n pt_BR: The maximum chunk length.\n zh_Hans: 最大块的长度。\n label:\n en_US: Maximum Chunk Length\n ja_JP: Maximum Chunk Length\n pt_BR: Maximum Chunk Length\n zh_Hans: 最大块的长度\n llm_description: The maximum chunk length, the format of the chunk size\n must be an integer.\n max: null\n min: null\n name: max_chunk_length\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The chunk overlap length.\n ja_JP: The chunk overlap length.\n pt_BR: The chunk overlap length.\n zh_Hans: 块的重叠长度。\n label:\n en_US: Chunk Overlap Length\n ja_JP: Chunk Overlap Length\n pt_BR: Chunk Overlap Length\n zh_Hans: 块的重叠长度\n llm_description: The chunk overlap length, the format of the chunk overlap\n length must be an integer.\n max: null\n min: null\n name: chunk_overlap_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Replace consecutive spaces, newlines and tabs\n zh_Hans: 替换连续的空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Replace consecutive spaces, newlines and tabs\n zh_Hans: 替换连续的空格、换行符和制表符\n llm_description: Replace consecutive spaces, newlines and tabs, the format\n of the replace must be a boolean.\n max: null\n min: null\n name: replace_consecutive_spaces_newlines_tabs\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Delete all URLs and email addresses\n zh_Hans: 删除所有URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Delete all URLs and email addresses\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Delete all URLs and email addresses, the format of the\n delete must be a boolean.\n max: null\n min: null\n name: delete_all_urls_and_email_addresses\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n chunk_overlap_length: ''\n delete_all_urls_and_email_addresses: ''\n delimiter: ''\n input_variable: ''\n max_chunk_length: ''\n replace_consecutive_spaces_newlines_tabs: ''\n provider_id: langgenius/general_chunker/general_chunker\n provider_name: langgenius/general_chunker/general_chunker\n provider_type: builtin\n selected: false\n title: General Chunker\n tool_configurations: {}\n tool_description: A tool for general text chunking mode, the chunks retrieved\n and recalled are the same.\n tool_label: General Chunker\n tool_name: general_chunker\n tool_node_version: '2'\n tool_parameters:\n chunk_overlap_length:\n type: variable\n value:\n - rag\n - shared\n - Chunk_Overlap_Length\n delete_all_urls_and_email_addresses:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n delimiter:\n type: mixed\n value: '{{#rag.shared.Dilmiter#}}'\n input_variable:\n type: mixed\n value: '{{#1753346901505.output#}}'\n max_chunk_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Chunk_Length\n replace_consecutive_spaces_newlines_tabs:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n type: tool\n height: 52\n id: '1751337124089'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n height: 187\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1067.06980963949\n y: 236.10252072775984\n positionAbsolute:\n x: -1067.06980963949\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 1463.3408543698197\n y: 224.29398382646679\n zoom: 0.6387381963193622\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Dilmiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n\\n and \\n are\n commonly used delimiters for separating paragraphs and lines. Combined with\n commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum\n chunk length. You can also use special delimiters defined by yourself (e.g.\n ***).\n type: text-input\n unit: null\n variable: Dilmiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Chunk Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Chunk_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 128\n label: Chunk Overlap Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Setting the chunk overlap can maintain the semantic relevance between\n them, enhancing the retrieve effect. It is recommended to set 10%-25% of the\n maximum chunk size.\n type: number\n unit: tokens\n variable: Chunk_Overlap_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n",
"graph": {
"edges": [
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1751337124089-source-1750836372241-target",
"selected": false,
"source": "1751337124089",
"sourceHandle": "source",
"target": "1750836372241",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "variable-aggregator",
"targetType": "tool"
},
"id": "1753346901505-source-1751337124089-target",
"selected": false,
"source": "1753346901505",
"sourceHandle": "source",
"target": "1751337124089",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "variable-aggregator"
},
"id": "1750836391776-source-1753346901505-target",
"selected": false,
"source": "1750836391776",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "document-extractor",
"targetType": "variable-aggregator"
},
"id": "1753349228522-source-1753346901505-target",
"selected": false,
"source": "1753349228522",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1754023419266-source-1753346901505-target",
"selected": false,
"source": "1754023419266",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756442998557-source-1756442986174-target",
"selected": false,
"source": "1756442998557",
"sourceHandle": "source",
"target": "1756442986174",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "variable-aggregator",
"targetType": "if-else"
},
"id": "1756442986174-source-1756443014860-target",
"selected": false,
"source": "1756442986174",
"sourceHandle": "source",
"target": "1756443014860",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1750836380067-source-1756442986174-target",
"selected": false,
"source": "1750836380067",
"sourceHandle": "source",
"target": "1756442986174",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "if-else",
"targetType": "tool"
},
"id": "1756443014860-true-1750836391776-target",
"selected": false,
"source": "1756443014860",
"sourceHandle": "true",
"target": "1750836391776",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "if-else",
"targetType": "document-extractor"
},
"id": "1756443014860-false-1753349228522-target",
"selected": false,
"source": "1756443014860",
"sourceHandle": "false",
"target": "1753349228522",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756896212061-source-1753346901505-target",
"source": "1756896212061",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756907397615-source-1753346901505-target",
"source": "1756907397615",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "text_model",
"index_chunk_variable_selector": [
"1751337124089",
"result"
],
"indexing_technique": "economy",
"keyword_number": 10,
"retrieval_model": {
"score_threshold": 0.5,
"score_threshold_enabled": false,
"search_method": "keyword_search",
"top_k": 3
},
"selected": false,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750836372241",
"position": {
"x": 479.7628208876065,
"y": 326
},
"positionAbsolute": {
"x": 479.7628208876065,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"txt",
"markdown",
"mdx",
"pdf",
"html",
"xlsx",
"xls",
"vtt",
"properties",
"doc",
"docx",
"csv",
"eml",
"msg",
"pptx",
"xml",
"epub",
"ppt",
"md"
],
"plugin_id": "langgenius/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File",
"type": "datasource"
},
"height": 52,
"id": "1750836380067",
"position": {
"x": -1371.6520723158733,
"y": 224.87938381325645
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 224.87938381325645
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"documents": {
"description": "the documents extracted from the file",
"items": {
"type": "object"
},
"type": "array"
},
"images": {
"description": "The images extracted from the file",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
},
"label": {
"en_US": "file",
"ja_JP": "file",
"pt_BR": "file",
"zh_Hans": "file"
},
"llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"max": null,
"min": null,
"name": "file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
}
],
"params": {
"file": ""
},
"provider_id": "langgenius/dify_extractor/dify_extractor",
"provider_name": "langgenius/dify_extractor/dify_extractor",
"provider_type": "builtin",
"selected": false,
"title": "Dify Extractor",
"tool_configurations": {},
"tool_description": "Dify Extractor",
"tool_label": "Dify Extractor",
"tool_name": "dify_extractor",
"tool_node_version": "2",
"tool_parameters": {
"file": {
"type": "variable",
"value": [
"1756442986174",
"output"
]
}
},
"type": "tool"
},
"height": 52,
"id": "1750836391776",
"position": {
"x": -417.5334221022782,
"y": 268.1692071834485
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 268.1692071834485
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 252,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1124
},
"height": 252,
"id": "1751252161631",
"position": {
"x": -1371.6520723158733,
"y": -123.758428116601
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": -123.758428116601
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1124
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 388,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 285
},
"height": 388,
"id": "1751252440357",
"position": {
"x": -1723.9942193415582,
"y": 224.87938381325645
},
"positionAbsolute": {
"x": -1723.9942193415582,
"y": 224.87938381325645
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 285
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 430,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 430,
"id": "1751253091602",
"position": {
"x": -417.5334221022782,
"y": 532.832924599999
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 532.832924599999
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 265,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"General Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" divides content into chunks and retrieves the most relevant ones based on the users query for LLM processing. You can customize chunking rules—such as delimiter, maximum length, and overlap—to fit different document formats or scenarios. Preprocessing options are also available to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 265,
"id": "1751253953926",
"position": {
"x": 184.46657789772178,
"y": 407.42301051148354
},
"positionAbsolute": {
"x": 184.46657789772178,
"y": 407.42301051148354
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 344,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 344,
"id": "1751254117904",
"position": {
"x": 479.7628208876065,
"y": 472.46585541244207
},
"positionAbsolute": {
"x": 479.7628208876065,
"y": 472.46585541244207
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"result": {
"description": "The result of the general chunk tool.",
"properties": {
"general_chunks": {
"items": {
"description": "The chunk of the text.",
"type": "string"
},
"type": "array"
}
},
"type": "object"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The text you want to chunk.",
"ja_JP": "The text you want to chunk.",
"pt_BR": "The text you want to chunk.",
"zh_Hans": "你想要分块的文本。"
},
"label": {
"en_US": "Input Content",
"ja_JP": "Input Content",
"pt_BR": "Input Content",
"zh_Hans": "输入变量"
},
"llm_description": "The text you want to chunk.",
"max": null,
"min": null,
"name": "input_variable",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The delimiter of the chunks.",
"ja_JP": "The delimiter of the chunks.",
"pt_BR": "The delimiter of the chunks.",
"zh_Hans": "块的分隔符。"
},
"label": {
"en_US": "Delimiter",
"ja_JP": "Delimiter",
"pt_BR": "Delimiter",
"zh_Hans": "分隔符"
},
"llm_description": "The delimiter of the chunks, the format of the delimiter must be a string.",
"max": null,
"min": null,
"name": "delimiter",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The maximum chunk length.",
"ja_JP": "The maximum chunk length.",
"pt_BR": "The maximum chunk length.",
"zh_Hans": "最大块的长度。"
},
"label": {
"en_US": "Maximum Chunk Length",
"ja_JP": "Maximum Chunk Length",
"pt_BR": "Maximum Chunk Length",
"zh_Hans": "最大块的长度"
},
"llm_description": "The maximum chunk length, the format of the chunk size must be an integer.",
"max": null,
"min": null,
"name": "max_chunk_length",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The chunk overlap length.",
"ja_JP": "The chunk overlap length.",
"pt_BR": "The chunk overlap length.",
"zh_Hans": "块的重叠长度。"
},
"label": {
"en_US": "Chunk Overlap Length",
"ja_JP": "Chunk Overlap Length",
"pt_BR": "Chunk Overlap Length",
"zh_Hans": "块的重叠长度"
},
"llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer.",
"max": null,
"min": null,
"name": "chunk_overlap_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "Replace consecutive spaces, newlines and tabs",
"ja_JP": "Replace consecutive spaces, newlines and tabs",
"pt_BR": "Replace consecutive spaces, newlines and tabs",
"zh_Hans": "替换连续的空格、换行符和制表符"
},
"label": {
"en_US": "Replace consecutive spaces, newlines and tabs",
"ja_JP": "Replace consecutive spaces, newlines and tabs",
"pt_BR": "Replace consecutive spaces, newlines and tabs",
"zh_Hans": "替换连续的空格、换行符和制表符"
},
"llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean.",
"max": null,
"min": null,
"name": "replace_consecutive_spaces_newlines_tabs",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "Delete all URLs and email addresses",
"ja_JP": "Delete all URLs and email addresses",
"pt_BR": "Delete all URLs and email addresses",
"zh_Hans": "删除所有URL和电子邮件地址"
},
"label": {
"en_US": "Delete all URLs and email addresses",
"ja_JP": "Delete all URLs and email addresses",
"pt_BR": "Delete all URLs and email addresses",
"zh_Hans": "删除所有URL和电子邮件地址"
},
"llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean.",
"max": null,
"min": null,
"name": "delete_all_urls_and_email_addresses",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
}
],
"params": {
"chunk_overlap_length": "",
"delete_all_urls_and_email_addresses": "",
"delimiter": "",
"input_variable": "",
"max_chunk_length": "",
"replace_consecutive_spaces_newlines_tabs": ""
},
"provider_id": "langgenius/general_chunker/general_chunker",
"provider_name": "langgenius/general_chunker/general_chunker",
"provider_type": "builtin",
"selected": false,
"title": "General Chunker",
"tool_configurations": {},
"tool_description": "A tool for general text chunking mode, the chunks retrieved and recalled are the same.",
"tool_label": "General Chunker",
"tool_name": "general_chunker",
"tool_node_version": "2",
"tool_parameters": {
"chunk_overlap_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Chunk_Overlap_Length"
]
},
"delete_all_urls_and_email_addresses": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_2"
]
},
"delimiter": {
"type": "mixed",
"value": "{{#rag.shared.Dilmiter#}}"
},
"input_variable": {
"type": "mixed",
"value": "{{#1753346901505.output#}}"
},
"max_chunk_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Chunk_Length"
]
},
"replace_consecutive_spaces_newlines_tabs": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_1"
]
}
},
"type": "tool"
},
"height": 52,
"id": "1751337124089",
"position": {
"x": 184.46657789772178,
"y": 326
},
"positionAbsolute": {
"x": 184.46657789772178,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"output_type": "string",
"selected": false,
"title": "Variable Aggregator",
"type": "variable-aggregator",
"variables": [
[
"1750836391776",
"text"
],
[
"1753349228522",
"text"
],
[
"1754023419266",
"content"
],
[
"1756896212061",
"content"
]
]
},
"height": 187,
"id": "1753346901505",
"position": {
"x": -117.24452412456148,
"y": 326
},
"positionAbsolute": {
"x": -117.24452412456148,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_array_file": false,
"selected": false,
"title": "Doc Extractor",
"type": "document-extractor",
"variable_selector": [
"1756442986174",
"output"
]
},
"height": 92,
"id": "1753349228522",
"position": {
"x": -417.5334221022782,
"y": 417.25474169825833
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 417.25474169825833
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Notion",
"datasource_name": "notion_datasource",
"datasource_parameters": {},
"plugin_id": "langgenius/notion_datasource",
"provider_name": "notion_datasource",
"provider_type": "online_document",
"selected": false,
"title": "Notion",
"type": "datasource"
},
"height": 52,
"id": "1754023419266",
"position": {
"x": -1369.6904698303242,
"y": 440.01452302398053
},
"positionAbsolute": {
"x": -1369.6904698303242,
"y": 440.01452302398053
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"output_type": "file",
"selected": false,
"title": "Variable Aggregator",
"type": "variable-aggregator",
"variables": [
[
"1750836380067",
"file"
],
[
"1756442998557",
"file"
]
]
},
"height": 135,
"id": "1756442986174",
"position": {
"x": -1067.06980963949,
"y": 236.10252072775984
},
"positionAbsolute": {
"x": -1067.06980963949,
"y": 236.10252072775984
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Google Drive",
"datasource_name": "google_drive",
"datasource_parameters": {},
"plugin_id": "langgenius/google_drive",
"provider_name": "google_drive",
"provider_type": "online_drive",
"selected": false,
"title": "Google Drive",
"type": "datasource"
},
"height": 52,
"id": "1756442998557",
"position": {
"x": -1371.6520723158733,
"y": 326
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"cases": [
{
"case_id": "true",
"conditions": [
{
"comparison_operator": "is",
"id": "1581dd11-7898-41f4-962f-937283ba7e01",
"value": ".xlsx",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528",
"value": ".xls",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b",
"value": ".md",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa",
"value": ".markdown",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "53abfe95-c7d0-4f63-ad37-17d425d25106",
"value": ".mdx",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "436877b8-8c0a-4cc6-9565-92754db08571",
"value": ".html",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c",
"value": ".htm",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b",
"value": ".docx",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "62d11445-876a-493f-85d3-8fc020146bdd",
"value": ".csv",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "02c4bce8-7668-4ccd-b750-4281f314b231",
"value": ".txt",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
}
],
"id": "true",
"logical_operator": "or"
}
],
"selected": false,
"title": "IF/ELSE",
"type": "if-else"
},
"height": 358,
"id": "1756443014860",
"position": {
"x": -733.5977815139424,
"y": 236.10252072775984
},
"positionAbsolute": {
"x": -733.5977815139424,
"y": 236.10252072775984
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Jina Reader",
"datasource_name": "jina_reader",
"datasource_parameters": {
"crawl_sub_pages": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jina_subpages"
]
},
"limit": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jina_limit"
]
},
"url": {
"type": "mixed",
"value": "{{#rag.1756896212061.jina_url#}}"
},
"use_sitemap": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jian_sitemap"
]
}
},
"plugin_id": "langgenius/jina_datasource",
"provider_name": "jinareader",
"provider_type": "website_crawl",
"selected": false,
"title": "Jina Reader",
"type": "datasource"
},
"height": 52,
"id": "1756896212061",
"position": {
"x": -1371.6520723158733,
"y": 538.9988445953813
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 538.9988445953813
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Firecrawl",
"datasource_name": "crawl",
"datasource_parameters": {
"crawl_subpages": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"firecrawl_subpages"
]
},
"exclude_paths": {
"type": "mixed",
"value": "{{#rag.1756907397615.exclude_paths#}}"
},
"include_paths": {
"type": "mixed",
"value": "{{#rag.1756907397615.include_paths#}}"
},
"limit": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"max_pages"
]
},
"max_depth": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"max_depth"
]
},
"only_main_content": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"main_content"
]
},
"url": {
"type": "mixed",
"value": "{{#rag.1756907397615.firecrawl_url1#}}"
}
},
"plugin_id": "langgenius/firecrawl_datasource",
"provider_name": "firecrawl",
"provider_type": "website_crawl",
"selected": false,
"title": "Firecrawl",
"type": "datasource"
},
"height": 52,
"id": "1756907397615",
"position": {
"x": -1371.6520723158733,
"y": 644.3296146102903
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 644.3296146102903
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
}
],
"viewport": {
"x": 1463.3408543698197,
"y": 224.29398382646679,
"zoom": 0.6387381963193622
}
},
"icon_info": {
"icon": "52064ff0-26b6-47d0-902f-e331f94d959b",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"id": "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3",
"name": "General Mode-ECO",
"icon": {
"icon": "52064ff0-26b6-47d0-902f-e331f94d959b",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"language": "zh-Hans",
"position": 1
},
"9553b1e0-0c26-445b-9e18-063ad7eca0b4": {
"chunk_structure": "hierarchical_model",
"description": "This template uses an advanced chunking strategy that organizes document text into a hierarchical structure of larger \"parent\" chunks and smaller \"child\" chunks to balance retrieval precision and contextual richness.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\n version: null\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 6509176c-def5-421c-b966-5122ad6bf658\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: Parent-child-HQ\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: tool\n id: 1753346901505-source-1756972161593-target\n source: '1753346901505'\n sourceHandle: source\n target: '1756972161593'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1756972161593-source-1750836372241-target\n source: '1756972161593'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1756972161593'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius/jina/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 479.7628208876065\n y: 326\n positionAbsolute:\n x: 479.7628208876065\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 547.4103414077279\n positionAbsolute:\n x: -417.5334221022782\n y: 547.4103414077279\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 638\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such\n as a paragraph, a section, or even an entire document—that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM). length, and overlap—to fit different\n document formats or scenarios. Preprocessing options are also available\n to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 638\n id: '1751253953926'\n position:\n x: 184.46657789772178\n y: 407.42301051148354\n positionAbsolute:\n x: 184.46657789772178\n y: 407.42301051148354\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 410\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 410\n id: '1751254117904'\n position:\n x: 479.7628208876065\n y: 472.46585541244207\n positionAbsolute:\n x: 479.7628208876065\n y: 472.46585541244207\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n - - '1756907397615'\n - content\n height: 213\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1054.415447856335\n y: 236.10252072775984\n positionAbsolute:\n x: -1054.415447856335\n y: 236.10252072775984\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The text you want to chunk.\n ja_JP: The text you want to chunk.\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em parágrafos com base no separador e no comprimento\n máximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuperá-lo.\n zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: 父块模式\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - icon: ''\n label:\n en_US: paragraph\n ja_JP: paragraph\n pt_BR: paragraph\n zh_Hans: paragraph\n value: paragraph\n - icon: ''\n label:\n en_US: full_doc\n ja_JP: full_doc\n pt_BR: full_doc\n zh_Hans: full_doc\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divisão\n zh_Hans: 用于分块的分隔符\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: 父块分隔符\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento máximo para divisão\n zh_Hans: 用于分块的最大长度\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento Máximo do Bloco Pai\n zh_Hans: 最大父块长度\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivisão\n zh_Hans: 用于子分块的分隔符\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivisão\n zh_Hans: 子分块分隔符\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento máximo para subdivisão\n zh_Hans: 用于子分块的最大长度\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento Máximo de Subdivisão\n zh_Hans: 子分块最大长度\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espaços extras no texto\n zh_Hans: 是否移除文本中的连续空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espaços consecutivos, novas linhas e guias\n zh_Hans: 替换连续空格、换行符和制表符\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: 是否移除文本中的URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius/parentchild_chunker/parentchild_chunker\n provider_name: langgenius/parentchild_chunker/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1753346901505.output#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - parent_length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - parent_mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.parent_dilmiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - child_length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.child_delimiter#}}'\n type: tool\n height: 52\n id: '1756972161593'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 947.2141381290828\n y: 179.30600859363653\n zoom: 0.47414481289660987\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: parent_mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: parent_dilmiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: parent_length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: child_delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: child_length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n",
"graph": {
"edges": [
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "variable-aggregator"
},
"id": "1750836391776-source-1753346901505-target",
"selected": false,
"source": "1750836391776",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "document-extractor",
"targetType": "variable-aggregator"
},
"id": "1753349228522-source-1753346901505-target",
"selected": false,
"source": "1753349228522",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1754023419266-source-1753346901505-target",
"selected": false,
"source": "1754023419266",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756442998557-source-1756442986174-target",
"selected": false,
"source": "1756442998557",
"sourceHandle": "source",
"target": "1756442986174",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "variable-aggregator",
"targetType": "if-else"
},
"id": "1756442986174-source-1756443014860-target",
"selected": false,
"source": "1756442986174",
"sourceHandle": "source",
"target": "1756443014860",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1750836380067-source-1756442986174-target",
"selected": false,
"source": "1750836380067",
"sourceHandle": "source",
"target": "1756442986174",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "if-else",
"targetType": "tool"
},
"id": "1756443014860-true-1750836391776-target",
"selected": false,
"source": "1756443014860",
"sourceHandle": "true",
"target": "1750836391776",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "if-else",
"targetType": "document-extractor"
},
"id": "1756443014860-false-1753349228522-target",
"selected": false,
"source": "1756443014860",
"sourceHandle": "false",
"target": "1753349228522",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756896212061-source-1753346901505-target",
"source": "1756896212061",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756907397615-source-1753346901505-target",
"source": "1756907397615",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "variable-aggregator",
"targetType": "tool"
},
"id": "1753346901505-source-1756972161593-target",
"source": "1753346901505",
"sourceHandle": "source",
"target": "1756972161593",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1756972161593-source-1750836372241-target",
"source": "1756972161593",
"sourceHandle": "source",
"target": "1750836372241",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "hierarchical_model",
"embedding_model": "jina-embeddings-v2-base-en",
"embedding_model_provider": "langgenius/jina/jina",
"index_chunk_variable_selector": [
"1756972161593",
"result"
],
"indexing_technique": "high_quality",
"keyword_number": 10,
"retrieval_model": {
"reranking_enable": true,
"reranking_mode": "reranking_model",
"reranking_model": {
"reranking_model_name": "jina-reranker-v1-base-en",
"reranking_provider_name": "langgenius/jina/jina"
},
"score_threshold": 0,
"score_threshold_enabled": false,
"search_method": "hybrid_search",
"top_k": 3,
"weights": null
},
"selected": false,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750836372241",
"position": {
"x": 479.7628208876065,
"y": 326
},
"positionAbsolute": {
"x": 479.7628208876065,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"txt",
"markdown",
"mdx",
"pdf",
"html",
"xlsx",
"xls",
"vtt",
"properties",
"doc",
"docx",
"csv",
"eml",
"msg",
"pptx",
"xml",
"epub",
"ppt",
"md"
],
"plugin_id": "langgenius/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File",
"type": "datasource"
},
"height": 52,
"id": "1750836380067",
"position": {
"x": -1371.6520723158733,
"y": 224.87938381325645
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 224.87938381325645
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"documents": {
"description": "the documents extracted from the file",
"items": {
"type": "object"
},
"type": "array"
},
"images": {
"description": "The images extracted from the file",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
},
"label": {
"en_US": "file",
"ja_JP": "file",
"pt_BR": "file",
"zh_Hans": "file"
},
"llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"max": null,
"min": null,
"name": "file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
}
],
"params": {
"file": ""
},
"provider_id": "langgenius/dify_extractor/dify_extractor",
"provider_name": "langgenius/dify_extractor/dify_extractor",
"provider_type": "builtin",
"selected": false,
"title": "Dify Extractor",
"tool_configurations": {},
"tool_description": "Dify Extractor",
"tool_label": "Dify Extractor",
"tool_name": "dify_extractor",
"tool_node_version": "2",
"tool_parameters": {
"file": {
"type": "variable",
"value": [
"1756442986174",
"output"
]
}
},
"type": "tool"
},
"height": 52,
"id": "1750836391776",
"position": {
"x": -417.5334221022782,
"y": 268.1692071834485
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 268.1692071834485
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 252,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1124
},
"height": 252,
"id": "1751252161631",
"position": {
"x": -1371.6520723158733,
"y": -123.758428116601
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": -123.758428116601
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1124
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 388,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 285
},
"height": 388,
"id": "1751252440357",
"position": {
"x": -1723.9942193415582,
"y": 224.87938381325645
},
"positionAbsolute": {
"x": -1723.9942193415582,
"y": 224.87938381325645
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 285
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 430,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 430,
"id": "1751253091602",
"position": {
"x": -417.5334221022782,
"y": 547.4103414077279
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 547.4103414077279
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 638,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such as a paragraph, a section, or even an entire document—that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM). length, and overlap—to fit different document formats or scenarios. Preprocessing options are also available to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 638,
"id": "1751253953926",
"position": {
"x": 184.46657789772178,
"y": 407.42301051148354
},
"positionAbsolute": {
"x": 184.46657789772178,
"y": 407.42301051148354
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 410,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 410,
"id": "1751254117904",
"position": {
"x": 479.7628208876065,
"y": 472.46585541244207
},
"positionAbsolute": {
"x": 479.7628208876065,
"y": 472.46585541244207
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"output_type": "string",
"selected": false,
"title": "Variable Aggregator",
"type": "variable-aggregator",
"variables": [
[
"1750836391776",
"text"
],
[
"1753349228522",
"text"
],
[
"1754023419266",
"content"
],
[
"1756896212061",
"content"
],
[
"1756907397615",
"content"
]
]
},
"height": 213,
"id": "1753346901505",
"position": {
"x": -117.24452412456148,
"y": 326
},
"positionAbsolute": {
"x": -117.24452412456148,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_array_file": false,
"selected": false,
"title": "Doc Extractor",
"type": "document-extractor",
"variable_selector": [
"1756442986174",
"output"
]
},
"height": 92,
"id": "1753349228522",
"position": {
"x": -417.5334221022782,
"y": 417.25474169825833
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 417.25474169825833
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Notion",
"datasource_name": "notion_datasource",
"datasource_parameters": {},
"plugin_id": "langgenius/notion_datasource",
"provider_name": "notion_datasource",
"provider_type": "online_document",
"selected": false,
"title": "Notion",
"type": "datasource"
},
"height": 52,
"id": "1754023419266",
"position": {
"x": -1369.6904698303242,
"y": 440.01452302398053
},
"positionAbsolute": {
"x": -1369.6904698303242,
"y": 440.01452302398053
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"output_type": "file",
"selected": false,
"title": "Variable Aggregator",
"type": "variable-aggregator",
"variables": [
[
"1750836380067",
"file"
],
[
"1756442998557",
"file"
]
]
},
"height": 135,
"id": "1756442986174",
"position": {
"x": -1054.415447856335,
"y": 236.10252072775984
},
"positionAbsolute": {
"x": -1054.415447856335,
"y": 236.10252072775984
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Google Drive",
"datasource_name": "google_drive",
"datasource_parameters": {},
"plugin_id": "langgenius/google_drive",
"provider_name": "google_drive",
"provider_type": "online_drive",
"selected": false,
"title": "Google Drive",
"type": "datasource"
},
"height": 52,
"id": "1756442998557",
"position": {
"x": -1371.6520723158733,
"y": 326
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"cases": [
{
"case_id": "true",
"conditions": [
{
"comparison_operator": "is",
"id": "1581dd11-7898-41f4-962f-937283ba7e01",
"value": ".xlsx",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528",
"value": ".xls",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b",
"value": ".md",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa",
"value": ".markdown",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "53abfe95-c7d0-4f63-ad37-17d425d25106",
"value": ".mdx",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "436877b8-8c0a-4cc6-9565-92754db08571",
"value": ".html",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c",
"value": ".htm",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b",
"value": ".docx",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "62d11445-876a-493f-85d3-8fc020146bdd",
"value": ".csv",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "02c4bce8-7668-4ccd-b750-4281f314b231",
"value": ".txt",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
}
],
"id": "true",
"logical_operator": "or"
}
],
"selected": false,
"title": "IF/ELSE",
"type": "if-else"
},
"height": 358,
"id": "1756443014860",
"position": {
"x": -733.5977815139424,
"y": 236.10252072775984
},
"positionAbsolute": {
"x": -733.5977815139424,
"y": 236.10252072775984
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Jina Reader",
"datasource_name": "jina_reader",
"datasource_parameters": {
"crawl_sub_pages": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jina_subpages"
]
},
"limit": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jina_limit"
]
},
"url": {
"type": "mixed",
"value": "{{#rag.1756896212061.jina_url#}}"
},
"use_sitemap": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jian_sitemap"
]
}
},
"plugin_id": "langgenius/jina_datasource",
"provider_name": "jinareader",
"provider_type": "website_crawl",
"selected": false,
"title": "Jina Reader",
"type": "datasource"
},
"height": 52,
"id": "1756896212061",
"position": {
"x": -1371.6520723158733,
"y": 538.9988445953813
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 538.9988445953813
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Firecrawl",
"datasource_name": "crawl",
"datasource_parameters": {
"crawl_subpages": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"firecrawl_subpages"
]
},
"exclude_paths": {
"type": "mixed",
"value": "{{#rag.1756907397615.exclude_paths#}}"
},
"include_paths": {
"type": "mixed",
"value": "{{#rag.1756907397615.include_paths#}}"
},
"limit": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"max_pages"
]
},
"max_depth": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"max_depth"
]
},
"only_main_content": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"main_content"
]
},
"url": {
"type": "mixed",
"value": "{{#rag.1756907397615.firecrawl_url1#}}"
}
},
"plugin_id": "langgenius/firecrawl_datasource",
"provider_name": "firecrawl",
"provider_type": "website_crawl",
"selected": false,
"title": "Firecrawl",
"type": "datasource"
},
"height": 52,
"id": "1756907397615",
"position": {
"x": -1371.6520723158733,
"y": 644.3296146102903
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 644.3296146102903
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The text you want to chunk.",
"ja_JP": "The text you want to chunk.",
"pt_BR": "Conteúdo de Entrada",
"zh_Hans": "输入文本"
},
"label": {
"en_US": "Input Content",
"ja_JP": "Input Content",
"pt_BR": "Conteúdo de Entrada",
"zh_Hans": "输入文本"
},
"llm_description": "The text you want to chunk.",
"max": null,
"min": null,
"name": "input_text",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": "paragraph",
"form": "llm",
"human_description": {
"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"pt_BR": "Dividir texto em parágrafos com base no separador e no comprimento máximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuperá-lo.",
"zh_Hans": "根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。"
},
"label": {
"en_US": "Parent Mode",
"ja_JP": "Parent Mode",
"pt_BR": "Modo Pai",
"zh_Hans": "父块模式"
},
"llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"max": null,
"min": null,
"name": "parent_mode",
"options": [
{
"icon": "",
"label": {
"en_US": "paragraph",
"ja_JP": "paragraph",
"pt_BR": "paragraph",
"zh_Hans": "paragraph"
},
"value": "paragraph"
},
{
"icon": "",
"label": {
"en_US": "full_doc",
"ja_JP": "full_doc",
"pt_BR": "full_doc",
"zh_Hans": "full_doc"
},
"value": "full_doc"
}
],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": "\n\n",
"form": "llm",
"human_description": {
"en_US": "Separator used for chunking",
"ja_JP": "Separator used for chunking",
"pt_BR": "Separador usado para divisão",
"zh_Hans": "用于分块的分隔符"
},
"label": {
"en_US": "Parent Delimiter",
"ja_JP": "Parent Delimiter",
"pt_BR": "Separador de Pai",
"zh_Hans": "父块分隔符"
},
"llm_description": "The separator used to split chunks",
"max": null,
"min": null,
"name": "separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 1024,
"form": "llm",
"human_description": {
"en_US": "Maximum length for chunking",
"ja_JP": "Maximum length for chunking",
"pt_BR": "Comprimento máximo para divisão",
"zh_Hans": "用于分块的最大长度"
},
"label": {
"en_US": "Maximum Parent Chunk Length",
"ja_JP": "Maximum Parent Chunk Length",
"pt_BR": "Comprimento Máximo do Bloco Pai",
"zh_Hans": "最大父块长度"
},
"llm_description": "Maximum length allowed per chunk",
"max": null,
"min": null,
"name": "max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": ". ",
"form": "llm",
"human_description": {
"en_US": "Separator used for subchunking",
"ja_JP": "Separator used for subchunking",
"pt_BR": "Separador usado para subdivisão",
"zh_Hans": "用于子分块的分隔符"
},
"label": {
"en_US": "Child Delimiter",
"ja_JP": "Child Delimiter",
"pt_BR": "Separador de Subdivisão",
"zh_Hans": "子分块分隔符"
},
"llm_description": "The separator used to split subchunks",
"max": null,
"min": null,
"name": "subchunk_separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 512,
"form": "llm",
"human_description": {
"en_US": "Maximum length for subchunking",
"ja_JP": "Maximum length for subchunking",
"pt_BR": "Comprimento máximo para subdivisão",
"zh_Hans": "用于子分块的最大长度"
},
"label": {
"en_US": "Maximum Child Chunk Length",
"ja_JP": "Maximum Child Chunk Length",
"pt_BR": "Comprimento Máximo de Subdivisão",
"zh_Hans": "子分块最大长度"
},
"llm_description": "Maximum length allowed per subchunk",
"max": null,
"min": null,
"name": "subchunk_max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove consecutive spaces, newlines and tabs",
"ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
"pt_BR": "Se deve remover espaços extras no texto",
"zh_Hans": "是否移除文本中的连续空格、换行符和制表符"
},
"label": {
"en_US": "Replace consecutive spaces, newlines and tabs",
"ja_JP": "Replace consecutive spaces, newlines and tabs",
"pt_BR": "Substituir espaços consecutivos, novas linhas e guias",
"zh_Hans": "替换连续空格、换行符和制表符"
},
"llm_description": "Whether to remove consecutive spaces, newlines and tabs",
"max": null,
"min": null,
"name": "remove_extra_spaces",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove URLs and emails in the text",
"ja_JP": "Whether to remove URLs and emails in the text",
"pt_BR": "Se deve remover URLs e e-mails no texto",
"zh_Hans": "是否移除文本中的URL和电子邮件地址"
},
"label": {
"en_US": "Delete all URLs and email addresses",
"ja_JP": "Delete all URLs and email addresses",
"pt_BR": "Remover todas as URLs e e-mails",
"zh_Hans": "删除所有URL和电子邮件地址"
},
"llm_description": "Whether to remove URLs and emails in the text",
"max": null,
"min": null,
"name": "remove_urls_emails",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
}
],
"params": {
"input_text": "",
"max_length": "",
"parent_mode": "",
"remove_extra_spaces": "",
"remove_urls_emails": "",
"separator": "",
"subchunk_max_length": "",
"subchunk_separator": ""
},
"provider_id": "langgenius/parentchild_chunker/parentchild_chunker",
"provider_name": "langgenius/parentchild_chunker/parentchild_chunker",
"provider_type": "builtin",
"selected": false,
"title": "Parent-child Chunker",
"tool_configurations": {},
"tool_description": "Process documents into parent-child chunk structures",
"tool_label": "Parent-child Chunker",
"tool_name": "parentchild_chunker",
"tool_node_version": "2",
"tool_parameters": {
"input_text": {
"type": "mixed",
"value": "{{#1753346901505.output#}}"
},
"max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"parent_length"
]
},
"parent_mode": {
"type": "variable",
"value": [
"rag",
"shared",
"parent_mode"
]
},
"remove_extra_spaces": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_1"
]
},
"remove_urls_emails": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_2"
]
},
"separator": {
"type": "mixed",
"value": "{{#rag.shared.parent_dilmiter#}}"
},
"subchunk_max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"child_length"
]
},
"subchunk_separator": {
"type": "mixed",
"value": "{{#rag.shared.child_delimiter#}}"
}
},
"type": "tool"
},
"height": 52,
"id": "1756972161593",
"position": {
"x": 184.46657789772178,
"y": 326
},
"positionAbsolute": {
"x": 184.46657789772178,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
}
],
"viewport": {
"x": 947.2141381290828,
"y": 179.30600859363653,
"zoom": 0.47414481289660987
}
},
"icon_info": {
"icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"id": "9553b1e0-0c26-445b-9e18-063ad7eca0b4",
"name": "Parent-child-HQ",
"icon": {
"icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"language": "zh-Hans",
"position": 2
},
"9ef3e66a-11c7-4227-897c-3b0f9a42da1a": {
"chunk_structure": "qa_model",
"description": "This template generates structured Q&A pairs by extracting selected columns from a table. These pairs are indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/qa_chunk:0.0.8@1fed9644646bdd48792cdf5a1d559a3df336bd3a8edb0807227499fb56dce3af\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n version: null\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 769900fc-8a31-4584-94f6-f227357c00c8\n icon_background: null\n icon_type: image\n icon_url: \n name: Simple Q&A\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750836380067-source-1753253430271-target\n source: '1750836380067'\n sourceHandle: source\n target: '1753253430271'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1753253430271-source-1750836372241-target\n source: '1753253430271'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: qa_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1753253430271'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: false\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: null\n reranking_provider_name: null\n score_threshold: 0\n score_threshold_enabled: false\n search_method: semantic_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 160\n y: 326\n positionAbsolute:\n x: 160\n y: 326\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - csv\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -714.4192784522008\n y: 326\n positionAbsolute:\n x: -714.4192784522008\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 249\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1115\n height: 249\n id: '1751252161631'\n position:\n x: -714.4192784522008\n y: -19.94142868660783\n positionAbsolute:\n x: -714.4192784522008\n y: -19.94142868660783\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1115\n - data:\n author: TenTen\n desc: ''\n height: 281\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 415\n height: 281\n id: '1751252440357'\n position:\n x: -1206.996048993409\n y: 311.5998178583933\n positionAbsolute:\n x: -1206.996048993409\n y: 311.5998178583933\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 415\n - data:\n author: TenTen\n desc: ''\n height: 403\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 403\n id: '1751254117904'\n position:\n x: 160\n y: 471.1516409864865\n positionAbsolute:\n x: 160\n y: 471.1516409864865\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 341\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts\n specified columns from tables to generate structured Q&A pairs. Users can\n independently designate which columns to use for questions and which for\n answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These\n pairs are indexed by the question field, so user queries are matched directly\n against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching\n strategy improves clarity and precision, especially in scenarios involving\n high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 341\n id: '1751356019653'\n position:\n x: -282.74494795239\n y: 411.6979750489463\n positionAbsolute:\n x: -282.74494795239\n y: 411.6979750489463\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: The result of the general chunk tool.\n properties:\n qa_chunks:\n items:\n description: The QA chunk.\n properties:\n answer:\n description: The answer of the QA chunk.\n type: string\n question:\n description: The question of the QA chunk.\n type: string\n type: object\n type: array\n type: object\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file you want to extract QA from.\n ja_JP: The file you want to extract QA from.\n pt_BR: The file you want to extract QA from.\n zh_Hans: 你想要提取 QA 的文件。\n label:\n en_US: Input File\n ja_JP: Input File\n pt_BR: Input File\n zh_Hans: 输入文件\n llm_description: The file you want to extract QA from.\n max: null\n min: null\n name: input_file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Column number for question.\n ja_JP: Column number for question.\n pt_BR: Column number for question.\n zh_Hans: 问题所在的列。\n label:\n en_US: Column number for question\n ja_JP: Column number for question\n pt_BR: Column number for question\n zh_Hans: 问题所在的列\n llm_description: The column number for question, the format of the column\n number must be an integer.\n max: null\n min: null\n name: question_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 1\n form: llm\n human_description:\n en_US: Column number for answer.\n ja_JP: Column number for answer.\n pt_BR: Column number for answer.\n zh_Hans: 答案所在的列。\n label:\n en_US: Column number for answer\n ja_JP: Column number for answer\n pt_BR: Column number for answer\n zh_Hans: 答案所在的列\n llm_description: The column number for answer, the format of the column\n number must be an integer.\n max: null\n min: null\n name: answer_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n params:\n answer_column: ''\n input_file: ''\n question_column: ''\n provider_id: langgenius/qa_chunk/qa_chunk\n provider_name: langgenius/qa_chunk/qa_chunk\n provider_type: builtin\n selected: false\n title: Q&A PROCESSOR\n tool_configurations: {}\n tool_description: A tool for QA chunking mode.\n tool_label: QA Chunk\n tool_name: qa_chunk\n tool_node_version: '2'\n tool_parameters:\n answer_column:\n type: variable\n value:\n - rag\n - shared\n - Column_Number_for_Answers\n input_file:\n type: variable\n value:\n - '1750836380067'\n - file\n question_column:\n type: variable\n value:\n - rag\n - shared\n - Column_Number_for_Questions\n type: tool\n height: 52\n id: '1753253430271'\n position:\n x: -282.74494795239\n y: 326\n positionAbsolute:\n x: -282.74494795239\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 173\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Simple\n Q&A Template\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" requires\n a pre-prepared table of question-answer pairs. As a result, it only supports\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"File\n Upload\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" data\n source, accepting \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\"csv\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" file\n formats.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 173\n id: '1753411065636'\n position:\n x: -714.4192784522008\n y: 411.6979750489463\n positionAbsolute:\n x: -714.4192784522008\n y: 411.6979750489463\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n viewport:\n x: 698.8920691163195\n y: 311.46417000656925\n zoom: 0.41853867943092266\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1\n label: Column Number for Questions\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Specify a column in the table as Questions. The number of first column is\n 0.\n type: number\n unit: ''\n variable: Column_Number_for_Questions\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 2\n label: Column Number for Answers\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Specify a column in the table as Answers. The number of first column is\n 0.\n type: number\n unit: null\n variable: Column_Number_for_Answers\n",
"graph": {
"edges": [
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "datasource",
"targetType": "tool"
},
"id": "1750836380067-source-1753253430271-target",
"source": "1750836380067",
"sourceHandle": "source",
"target": "1753253430271",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1753253430271-source-1750836372241-target",
"source": "1753253430271",
"sourceHandle": "source",
"target": "1750836372241",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "qa_model",
"embedding_model": "jina-embeddings-v2-base-en",
"embedding_model_provider": "langgenius/jina/jina",
"index_chunk_variable_selector": [
"1753253430271",
"result"
],
"indexing_technique": "high_quality",
"keyword_number": 10,
"retrieval_model": {
"reranking_enable": false,
"reranking_mode": "reranking_model",
"reranking_model": {
"reranking_model_name": null,
"reranking_provider_name": null
},
"score_threshold": 0,
"score_threshold_enabled": false,
"search_method": "semantic_search",
"top_k": 3,
"weights": null
},
"selected": true,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750836372241",
"position": {
"x": 160,
"y": 326
},
"positionAbsolute": {
"x": 160,
"y": 326
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"csv"
],
"plugin_id": "langgenius/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File",
"type": "datasource"
},
"height": 52,
"id": "1750836380067",
"position": {
"x": -714.4192784522008,
"y": 326
},
"positionAbsolute": {
"x": -714.4192784522008,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 249,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1115
},
"height": 249,
"id": "1751252161631",
"position": {
"x": -714.4192784522008,
"y": -19.94142868660783
},
"positionAbsolute": {
"x": -714.4192784522008,
"y": -19.94142868660783
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1115
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 281,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 415
},
"height": 281,
"id": "1751252440357",
"position": {
"x": -1206.996048993409,
"y": 311.5998178583933
},
"positionAbsolute": {
"x": -1206.996048993409,
"y": 311.5998178583933
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 415
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 403,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 403,
"id": "1751254117904",
"position": {
"x": 160,
"y": 471.1516409864865
},
"positionAbsolute": {
"x": 160,
"y": 471.1516409864865
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 341,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts specified columns from tables to generate structured Q&A pairs. Users can independently designate which columns to use for questions and which for answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These pairs are indexed by the question field, so user queries are matched directly against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching strategy improves clarity and precision, especially in scenarios involving high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 341,
"id": "1751356019653",
"position": {
"x": -282.74494795239,
"y": 411.6979750489463
},
"positionAbsolute": {
"x": -282.74494795239,
"y": 411.6979750489463
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"result": {
"description": "The result of the general chunk tool.",
"properties": {
"qa_chunks": {
"items": {
"description": "The QA chunk.",
"properties": {
"answer": {
"description": "The answer of the QA chunk.",
"type": "string"
},
"question": {
"description": "The question of the QA chunk.",
"type": "string"
}
},
"type": "object"
},
"type": "array"
}
},
"type": "object"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The file you want to extract QA from.",
"ja_JP": "The file you want to extract QA from.",
"pt_BR": "The file you want to extract QA from.",
"zh_Hans": "你想要提取 QA 的文件。"
},
"label": {
"en_US": "Input File",
"ja_JP": "Input File",
"pt_BR": "Input File",
"zh_Hans": "输入文件"
},
"llm_description": "The file you want to extract QA from.",
"max": null,
"min": null,
"name": "input_file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Column number for question.",
"ja_JP": "Column number for question.",
"pt_BR": "Column number for question.",
"zh_Hans": "问题所在的列。"
},
"label": {
"en_US": "Column number for question",
"ja_JP": "Column number for question",
"pt_BR": "Column number for question",
"zh_Hans": "问题所在的列"
},
"llm_description": "The column number for question, the format of the column number must be an integer.",
"max": null,
"min": null,
"name": "question_column",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": 1,
"form": "llm",
"human_description": {
"en_US": "Column number for answer.",
"ja_JP": "Column number for answer.",
"pt_BR": "Column number for answer.",
"zh_Hans": "答案所在的列。"
},
"label": {
"en_US": "Column number for answer",
"ja_JP": "Column number for answer",
"pt_BR": "Column number for answer",
"zh_Hans": "答案所在的列"
},
"llm_description": "The column number for answer, the format of the column number must be an integer.",
"max": null,
"min": null,
"name": "answer_column",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "number"
}
],
"params": {
"answer_column": "",
"input_file": "",
"question_column": ""
},
"provider_id": "langgenius/qa_chunk/qa_chunk",
"provider_name": "langgenius/qa_chunk/qa_chunk",
"provider_type": "builtin",
"selected": false,
"title": "Q&A PROCESSOR",
"tool_configurations": {},
"tool_description": "A tool for QA chunking mode.",
"tool_label": "QA Chunk",
"tool_name": "qa_chunk",
"tool_node_version": "2",
"tool_parameters": {
"answer_column": {
"type": "variable",
"value": [
"rag",
"shared",
"Column_Number_for_Answers"
]
},
"input_file": {
"type": "variable",
"value": [
"1750836380067",
"file"
]
},
"question_column": {
"type": "variable",
"value": [
"rag",
"shared",
"Column_Number_for_Questions"
]
}
},
"type": "tool"
},
"height": 52,
"id": "1753253430271",
"position": {
"x": -282.74494795239,
"y": 326
},
"positionAbsolute": {
"x": -282.74494795239,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 173,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Simple Q&A Template\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" requires a pre-prepared table of question-answer pairs. As a result, it only supports \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"File Upload\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" data source, accepting \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\"csv\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" file formats.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 173,
"id": "1753411065636",
"position": {
"x": -714.4192784522008,
"y": 411.6979750489463
},
"positionAbsolute": {
"x": -714.4192784522008,
"y": 411.6979750489463
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
}
],
"viewport": {
"x": 698.8920691163195,
"y": 311.46417000656925,
"zoom": 0.41853867943092266
}
},
"icon_info": {
"icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"id": "9ef3e66a-11c7-4227-897c-3b0f9a42da1a",
"name": "Simple Q&A",
"icon": {
"icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"language": "zh-Hans",
"position": 3
},
"982d1788-837a-40c8-b7de-d37b09a9b2bc": {
"chunk_structure": "hierarchical_model",
"description": "This template is designed for converting native Office files such as DOCX, XLSX, and PPTX into Markdown to facilitate better information processing. PDF files are not recommended.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: yevanchen/markitdown:0.0.4@776b3e2e930e2ffd28a75bb20fecbe7a020849cf754f86e604acacf1258877f6\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 9d658c3a-b22f-487d-8223-db51e9012505\n icon_background: null\n icon_type: image\n icon_url: \n name: Convert to Markdown\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751359716720-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751359716720'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751359716720-source-1751336942081-target\n source: '1751359716720'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n hybridSearchMode: weighted_score\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n vector_setting:\n embedding_model_name: jina-embeddings-v2-base-en\n embedding_provider_name: langgenius/jina/jina\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 357.7591396590142\n y: 282\n positionAbsolute:\n x: 357.7591396590142\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - html\n - xlsx\n - xls\n - doc\n - docx\n - csv\n - pptx\n - xml\n - ppt\n - txt\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -580.684520226929\n y: 282\n positionAbsolute:\n x: -580.684520226929\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 316\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 374\n height: 316\n id: '1751264451381'\n position:\n x: -1034.2054006208518\n y: 282\n positionAbsolute:\n x: -1034.2054006208518\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 374\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -580.684520226929\n y: -21.891401375096322\n positionAbsolute:\n x: -580.684520226929\n y: -21.891401375096322\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 417\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Markitdown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n serves as an excellent alternative to traditional document extraction nodes,\n offering robust file conversion capabilities within the Dify ecosystem.\n It leverages MarkItDown''s plugin-based architecture to provide seamless\n conversion of multiple file formats to Markdown.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 241\n height: 417\n id: '1751266402561'\n position:\n x: -266.96080929383595\n y: 372.64040589639495\n positionAbsolute:\n x: -266.96080929383595\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 241\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such\n as a paragraph, a section, or even an entire document—that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 37.74090119950054\n y: 372.64040589639495\n positionAbsolute:\n x: 37.74090119950054\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 357.7591396590142\n y: 434.3959856026883\n positionAbsolute:\n x: 357.7591396590142\n y: 434.3959856026883\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em parágrafos com base no separador e no comprimento\n máximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuperá-lo.\n zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: 父块模式\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Parágrafo\n zh_Hans: 段落\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: 全文\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divisão\n zh_Hans: 用于分块的分隔符\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: 父块分隔符\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento máximo para divisão\n zh_Hans: 用于分块的最大长度\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento Máximo do Bloco Pai\n zh_Hans: 最大父块长度\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivisão\n zh_Hans: 用于子分块的分隔符\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivisão\n zh_Hans: 子分块分隔符\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento máximo para subdivisão\n zh_Hans: 用于子分块的最大长度\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento Máximo de Subdivisão\n zh_Hans: 子分块最大长度\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espaços extras no texto\n zh_Hans: 是否移除文本中的连续空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espaços consecutivos, novas linhas e guias\n zh_Hans: 替换连续空格、换行符和制表符\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: 是否移除文本中的URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius/parentchild_chunker/parentchild_chunker\n provider_name: langgenius/parentchild_chunker/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751359716720.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 37.74090119950054\n y: 282\n positionAbsolute:\n x: 37.74090119950054\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema: null\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Upload files for processing\n ja_JP: Upload files for processing\n pt_BR: Carregar arquivos para processamento\n zh_Hans: 上传文件进行处理\n label:\n en_US: Files\n ja_JP: Files\n pt_BR: Arquivos\n zh_Hans: 文件\n llm_description: ''\n max: null\n min: null\n name: files\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: files\n params:\n files: ''\n provider_id: yevanchen/markitdown/markitdown\n provider_name: yevanchen/markitdown/markitdown\n provider_type: builtin\n selected: false\n title: markitdown\n tool_configurations: {}\n tool_description: Python tool for converting files and office documents to\n Markdown.\n tool_label: markitdown\n tool_name: markitdown\n tool_node_version: '2'\n tool_parameters:\n files:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 52\n id: '1751359716720'\n position:\n x: -266.96080929383595\n y: 282\n positionAbsolute:\n x: -266.96080929383595\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 301\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MarkItDown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n recommended for converting and handling a wide range of file formats, particularly\n for transforming content into Markdown. It works especially well for converting\n native Office files—such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"DOCX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"XLSX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"PPTX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"—into\n Markdown to facilitate better information processing. However, as some users\n have noted its suboptimal performance in extracting content from PDF files,\n using it for PDFs is not recommended.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 301\n id: '1753425718313'\n position:\n x: -580.684520226929\n y: 372.64040589639495\n positionAbsolute:\n x: -580.684520226929\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n viewport:\n x: 747.6785299994758\n y: 94.6209873206409\n zoom: 0.8152773235379324\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n",
"graph": {
"edges": [
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1751336942081-source-1750400198569-target",
"selected": false,
"source": "1751336942081",
"sourceHandle": "source",
"target": "1750400198569",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "tool"
},
"id": "1750400203722-source-1751359716720-target",
"selected": false,
"source": "1750400203722",
"sourceHandle": "source",
"target": "1751359716720",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "tool"
},
"id": "1751359716720-source-1751336942081-target",
"source": "1751359716720",
"sourceHandle": "source",
"target": "1751336942081",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "hierarchical_model",
"embedding_model": "jina-embeddings-v2-base-en",
"embedding_model_provider": "langgenius/jina/jina",
"index_chunk_variable_selector": [
"1751336942081",
"result"
],
"indexing_technique": "high_quality",
"keyword_number": 10,
"retrieval_model": {
"hybridSearchMode": "weighted_score",
"score_threshold": 0.5,
"score_threshold_enabled": false,
"search_method": "hybrid_search",
"top_k": 3,
"vector_setting": {
"embedding_model_name": "jina-embeddings-v2-base-en",
"embedding_provider_name": "langgenius/jina/jina"
}
},
"selected": true,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750400198569",
"position": {
"x": 357.7591396590142,
"y": 282
},
"positionAbsolute": {
"x": 357.7591396590142,
"y": 282
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"html",
"xlsx",
"xls",
"doc",
"docx",
"csv",
"pptx",
"xml",
"ppt",
"txt"
],
"plugin_id": "langgenius/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File",
"type": "datasource"
},
"height": 52,
"id": "1750400203722",
"position": {
"x": -580.684520226929,
"y": 282
},
"positionAbsolute": {
"x": -580.684520226929,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 316,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 374
},
"height": 316,
"id": "1751264451381",
"position": {
"x": -1034.2054006208518,
"y": 282
},
"positionAbsolute": {
"x": -1034.2054006208518,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 374
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 260,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1182
},
"height": 260,
"id": "1751266376760",
"position": {
"x": -580.684520226929,
"y": -21.891401375096322
},
"positionAbsolute": {
"x": -580.684520226929,
"y": -21.891401375096322
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1182
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 417,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Markitdown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" serves as an excellent alternative to traditional document extraction nodes, offering robust file conversion capabilities within the Dify ecosystem. It leverages MarkItDown's plugin-based architecture to provide seamless conversion of multiple file formats to Markdown.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 241
},
"height": 417,
"id": "1751266402561",
"position": {
"x": -266.96080929383595,
"y": 372.64040589639495
},
"positionAbsolute": {
"x": -266.96080929383595,
"y": 372.64040589639495
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 241
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 554,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such as a paragraph, a section, or even an entire document—that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 554,
"id": "1751266447821",
"position": {
"x": 37.74090119950054,
"y": 372.64040589639495
},
"positionAbsolute": {
"x": 37.74090119950054,
"y": 372.64040589639495
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 411,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 411,
"id": "1751266580099",
"position": {
"x": 357.7591396590142,
"y": 434.3959856026883
},
"positionAbsolute": {
"x": 357.7591396590142,
"y": 434.3959856026883
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"result": {
"description": "Parent child chunks result",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "",
"ja_JP": "",
"pt_BR": "",
"zh_Hans": ""
},
"label": {
"en_US": "Input Content",
"ja_JP": "Input Content",
"pt_BR": "Conteúdo de Entrada",
"zh_Hans": "输入文本"
},
"llm_description": "The text you want to chunk.",
"max": null,
"min": null,
"name": "input_text",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": "paragraph",
"form": "llm",
"human_description": {
"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"pt_BR": "Dividir texto em parágrafos com base no separador e no comprimento máximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuperá-lo.",
"zh_Hans": "根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。"
},
"label": {
"en_US": "Parent Mode",
"ja_JP": "Parent Mode",
"pt_BR": "Modo Pai",
"zh_Hans": "父块模式"
},
"llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"max": null,
"min": null,
"name": "parent_mode",
"options": [
{
"label": {
"en_US": "Paragraph",
"ja_JP": "Paragraph",
"pt_BR": "Parágrafo",
"zh_Hans": "段落"
},
"value": "paragraph"
},
{
"label": {
"en_US": "Full Document",
"ja_JP": "Full Document",
"pt_BR": "Documento Completo",
"zh_Hans": "全文"
},
"value": "full_doc"
}
],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": "\n\n",
"form": "llm",
"human_description": {
"en_US": "Separator used for chunking",
"ja_JP": "Separator used for chunking",
"pt_BR": "Separador usado para divisão",
"zh_Hans": "用于分块的分隔符"
},
"label": {
"en_US": "Parent Delimiter",
"ja_JP": "Parent Delimiter",
"pt_BR": "Separador de Pai",
"zh_Hans": "父块分隔符"
},
"llm_description": "The separator used to split chunks",
"max": null,
"min": null,
"name": "separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 1024,
"form": "llm",
"human_description": {
"en_US": "Maximum length for chunking",
"ja_JP": "Maximum length for chunking",
"pt_BR": "Comprimento máximo para divisão",
"zh_Hans": "用于分块的最大长度"
},
"label": {
"en_US": "Maximum Parent Chunk Length",
"ja_JP": "Maximum Parent Chunk Length",
"pt_BR": "Comprimento Máximo do Bloco Pai",
"zh_Hans": "最大父块长度"
},
"llm_description": "Maximum length allowed per chunk",
"max": null,
"min": null,
"name": "max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": ". ",
"form": "llm",
"human_description": {
"en_US": "Separator used for subchunking",
"ja_JP": "Separator used for subchunking",
"pt_BR": "Separador usado para subdivisão",
"zh_Hans": "用于子分块的分隔符"
},
"label": {
"en_US": "Child Delimiter",
"ja_JP": "Child Delimiter",
"pt_BR": "Separador de Subdivisão",
"zh_Hans": "子分块分隔符"
},
"llm_description": "The separator used to split subchunks",
"max": null,
"min": null,
"name": "subchunk_separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 512,
"form": "llm",
"human_description": {
"en_US": "Maximum length for subchunking",
"ja_JP": "Maximum length for subchunking",
"pt_BR": "Comprimento máximo para subdivisão",
"zh_Hans": "用于子分块的最大长度"
},
"label": {
"en_US": "Maximum Child Chunk Length",
"ja_JP": "Maximum Child Chunk Length",
"pt_BR": "Comprimento Máximo de Subdivisão",
"zh_Hans": "子分块最大长度"
},
"llm_description": "Maximum length allowed per subchunk",
"max": null,
"min": null,
"name": "subchunk_max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove consecutive spaces, newlines and tabs",
"ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
"pt_BR": "Se deve remover espaços extras no texto",
"zh_Hans": "是否移除文本中的连续空格、换行符和制表符"
},
"label": {
"en_US": "Replace consecutive spaces, newlines and tabs",
"ja_JP": "Replace consecutive spaces, newlines and tabs",
"pt_BR": "Substituir espaços consecutivos, novas linhas e guias",
"zh_Hans": "替换连续空格、换行符和制表符"
},
"llm_description": "Whether to remove consecutive spaces, newlines and tabs",
"max": null,
"min": null,
"name": "remove_extra_spaces",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove URLs and emails in the text",
"ja_JP": "Whether to remove URLs and emails in the text",
"pt_BR": "Se deve remover URLs e e-mails no texto",
"zh_Hans": "是否移除文本中的URL和电子邮件地址"
},
"label": {
"en_US": "Delete all URLs and email addresses",
"ja_JP": "Delete all URLs and email addresses",
"pt_BR": "Remover todas as URLs e e-mails",
"zh_Hans": "删除所有URL和电子邮件地址"
},
"llm_description": "Whether to remove URLs and emails in the text",
"max": null,
"min": null,
"name": "remove_urls_emails",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
}
],
"params": {
"input_text": "",
"max_length": "",
"parent_mode": "",
"remove_extra_spaces": "",
"remove_urls_emails": "",
"separator": "",
"subchunk_max_length": "",
"subchunk_separator": ""
},
"provider_id": "langgenius/parentchild_chunker/parentchild_chunker",
"provider_name": "langgenius/parentchild_chunker/parentchild_chunker",
"provider_type": "builtin",
"selected": false,
"title": "Parent-child Chunker",
"tool_configurations": {},
"tool_description": "Process documents into parent-child chunk structures",
"tool_label": "Parent-child Chunker",
"tool_name": "parentchild_chunker",
"tool_node_version": "2",
"tool_parameters": {
"input_text": {
"type": "mixed",
"value": "{{#1751359716720.text#}}"
},
"max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Parent_Length"
]
},
"parent_mode": {
"type": "variable",
"value": [
"rag",
"shared",
"Parent_Mode"
]
},
"separator": {
"type": "mixed",
"value": "{{#rag.shared.Parent_Delimiter#}}"
},
"subchunk_max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Child_Length"
]
},
"subchunk_separator": {
"type": "mixed",
"value": "{{#rag.shared.Child_Delimiter#}}"
}
},
"type": "tool"
},
"height": 52,
"id": "1751336942081",
"position": {
"x": 37.74090119950054,
"y": 282
},
"positionAbsolute": {
"x": 37.74090119950054,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"output_schema": null,
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "Upload files for processing",
"ja_JP": "Upload files for processing",
"pt_BR": "Carregar arquivos para processamento",
"zh_Hans": "上传文件进行处理"
},
"label": {
"en_US": "Files",
"ja_JP": "Files",
"pt_BR": "Arquivos",
"zh_Hans": "文件"
},
"llm_description": "",
"max": null,
"min": null,
"name": "files",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "files"
}
],
"params": {
"files": ""
},
"provider_id": "yevanchen/markitdown/markitdown",
"provider_name": "yevanchen/markitdown/markitdown",
"provider_type": "builtin",
"selected": false,
"title": "markitdown",
"tool_configurations": {},
"tool_description": "Python tool for converting files and office documents to Markdown.",
"tool_label": "markitdown",
"tool_name": "markitdown",
"tool_node_version": "2",
"tool_parameters": {
"files": {
"type": "variable",
"value": [
"1750400203722",
"file"
]
}
},
"type": "tool"
},
"height": 52,
"id": "1751359716720",
"position": {
"x": -266.96080929383595,
"y": 282
},
"positionAbsolute": {
"x": -266.96080929383595,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 301,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MarkItDown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is recommended for converting and handling a wide range of file formats, particularly for transforming content into Markdown. It works especially well for converting native Office files—such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"DOCX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"XLSX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"PPTX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"—into Markdown to facilitate better information processing. However, as some users have noted its suboptimal performance in extracting content from PDF files, using it for PDFs is not recommended.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 301,
"id": "1753425718313",
"position": {
"x": -580.684520226929,
"y": 372.64040589639495
},
"positionAbsolute": {
"x": -580.684520226929,
"y": 372.64040589639495
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
}
],
"viewport": {
"x": 747.6785299994758,
"y": 94.6209873206409,
"zoom": 0.8152773235379324
}
},
"icon_info": {
"icon": "9d658c3a-b22f-487d-8223-db51e9012505",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"id": "982d1788-837a-40c8-b7de-d37b09a9b2bc",
"name": "Convert to Markdown",
"icon": {
"icon": "9d658c3a-b22f-487d-8223-db51e9012505",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"language": "zh-Hans",
"position": 4
},
"98374ab6-9dcd-434d-983e-268bec156b43": {
"chunk_structure": "qa_model",
"description": "This template is designed to use LLM to extract key information from the input document and generate Q&A pairs indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/qa_chunk:0.0.8@1fed9644646bdd48792cdf5a1d559a3df336bd3a8edb0807227499fb56dce3af\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: bowenliang123/md_exporter:2.0.0@13e1aca1995328e41c080ff9f7f6d898df60ff74a3f4d98d6de4b18ab5b92c2e\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 2b887f89-b6c9-4288-be43-635fee45216b\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: LLM Generated Q&A\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: llm\n id: 1753346901505-source-1756912504019-target\n source: '1753346901505'\n sourceHandle: source\n target: '1756912504019'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1756912504019-source-1756912537172-target\n source: '1756912504019'\n sourceHandle: source\n target: '1756912537172'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1756912537172-source-1756912274158-target\n source: '1756912537172'\n sourceHandle: source\n target: '1756912274158'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1756912274158-source-1750836372241-target\n source: '1756912274158'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: qa_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1756912274158'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n hybridSearchMode: weighted_score\n reranking_enable: false\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: semantic_search\n top_k: 3\n vector_setting:\n embedding_model_name: jina-embeddings-v2-base-en\n embedding_provider_name: langgenius/jina/jina\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 1150.8369138826617\n y: 326\n positionAbsolute:\n x: 1150.8369138826617\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 546.5283142529594\n positionAbsolute:\n x: -417.5334221022782\n y: 546.5283142529594\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 336\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts\n specified columns from tables to generate structured Q&A pairs. Users can\n independently designate which columns to use for questions and which for\n answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These\n pairs are indexed by the question field, so user queries are matched directly\n against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching\n strategy improves clarity and precision, especially in scenarios involving\n high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 336\n id: '1751253953926'\n position:\n x: 794.2003154321724\n y: 417.25474169825833\n positionAbsolute:\n x: 794.2003154321724\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 410\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 410\n id: '1751254117904'\n position:\n x: 1150.8369138826617\n y: 475.88970282568215\n positionAbsolute:\n x: 1150.8369138826617\n y: 475.88970282568215\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n height: 187\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1067.06980963949\n y: 236.10252072775984\n positionAbsolute:\n x: -1067.06980963949\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file you want to extract QA from.\n ja_JP: The file you want to extract QA from.\n pt_BR: The file you want to extract QA from.\n zh_Hans: 你想要提取 QA 的文件。\n label:\n en_US: Input File\n ja_JP: Input File\n pt_BR: Input File\n zh_Hans: 输入文件\n llm_description: The file you want to extract QA from.\n max: null\n min: null\n name: input_file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Column number for question.\n ja_JP: Column number for question.\n pt_BR: Column number for question.\n zh_Hans: 问题所在的列。\n label:\n en_US: Column number for question\n ja_JP: Column number for question\n pt_BR: Column number for question\n zh_Hans: 问题所在的列\n llm_description: The column number for question, the format of the column\n number must be an integer.\n max: null\n min: null\n name: question_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 1\n form: llm\n human_description:\n en_US: Column number for answer.\n ja_JP: Column number for answer.\n pt_BR: Column number for answer.\n zh_Hans: 答案所在的列。\n label:\n en_US: Column number for answer\n ja_JP: Column number for answer\n pt_BR: Column number for answer\n zh_Hans: 答案所在的列\n llm_description: The column number for answer, the format of the column\n number must be an integer.\n max: null\n min: null\n name: answer_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n params:\n answer_column: ''\n input_file: ''\n question_column: ''\n provider_id: langgenius/qa_chunk/qa_chunk\n provider_name: langgenius/qa_chunk/qa_chunk\n provider_type: builtin\n selected: false\n title: Q&A Processor\n tool_configurations: {}\n tool_description: A tool for QA chunking mode.\n tool_label: QA Chunk\n tool_name: qa_chunk\n tool_node_version: '2'\n tool_parameters:\n answer_column:\n type: constant\n value: 2\n input_file:\n type: variable\n value:\n - '1756912537172'\n - files\n question_column:\n type: constant\n value: 1\n type: tool\n height: 52\n id: '1756912274158'\n position:\n x: 794.2003154321724\n y: 326\n positionAbsolute:\n x: 794.2003154321724\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius/anthropic/anthropic\n prompt_template:\n - id: 7f8105aa-a37d-4f5a-b581-babeeb31e833\n role: system\n text: '\n\n Generate a list of Q&A pairs based on {{#1753346901505.output#}}. Present\n the output as a Markdown table, where the first column is serial number,\n the second column is Question, and the third column is Question. Ensure\n that the table format can be easily converted into a CSV file.\n\n Example Output Format:\n\n | Index | Question | Answer |\n\n |-------|-----------|--------|\n\n | 1 | What is the main purpose of the document? | The document explains\n the company''s new product launch strategy. ![image](https://cloud.dify.ai/files/xxxxxxx)\n |\n\n | 2 || When will the product be launched? | The product will be launched\n in Q3 of this year. |\n\n\n Instructions:\n\n Read and understand the input text.\n\n Extract key information and generate meaningful questions and answers.\n\n Preserve any ![image] URLs from the input text in the answers.\n\n Keep questions concise and specific.\n\n Ensure answers are accurate, self-contained, and clear.\n\n Output only the Markdown table without any extra explanation.'\n selected: false\n title: LLM\n type: llm\n vision:\n enabled: false\n height: 88\n id: '1756912504019'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Markdown text\n ja_JP: Markdown text\n pt_BR: Markdown text\n zh_Hans: Markdown格式文本必须为Markdown表格格式\n label:\n en_US: Markdown text\n ja_JP: Markdown text\n pt_BR: Markdown text\n zh_Hans: Markdown格式文本\n llm_description: ''\n max: null\n min: null\n name: md_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Filename of the output file\n ja_JP: Filename of the output file\n pt_BR: Filename of the output file\n zh_Hans: 输出文件名\n label:\n en_US: Filename of the output file\n ja_JP: Filename of the output file\n pt_BR: Filename of the output file\n zh_Hans: 输出文件名\n llm_description: ''\n max: null\n min: null\n name: output_filename\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n md_text: ''\n output_filename: ''\n provider_id: bowenliang123/md_exporter/md_exporter\n provider_name: bowenliang123/md_exporter/md_exporter\n provider_type: builtin\n selected: false\n title: Markdown to CSV file\n tool_configurations: {}\n tool_description: Generate CSV file from Markdown text\n tool_label: Markdown to CSV file\n tool_name: md_to_csv\n tool_node_version: '2'\n tool_parameters:\n md_text:\n type: mixed\n value: '{{#1756912504019.text#}}'\n output_filename:\n type: mixed\n value: LLM Generated Q&A\n type: tool\n height: 52\n id: '1756912537172'\n position:\n x: 484.75465419110174\n y: 326\n positionAbsolute:\n x: 484.75465419110174\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 174\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n LLM-generated Q&A pairs are designed to extract key information from the\n input text and present it in a structured, easy-to-use format. Each pair\n consists of a concise question that captures an important point or detail,\n and a clear, self-contained answer that provides the relevant information\n without requiring additional context. The output is formatted as a Markdown\n table with three columns—Index, Question, and Answer—so that it can be easily\n converted into a CSV file for further processing. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 528\n height: 174\n id: '1756912556940'\n position:\n x: 184.46657789772178\n y: 462.64405262857747\n positionAbsolute:\n x: 184.46657789772178\n y: 462.64405262857747\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 528\n viewport:\n x: 1149.1394490177502\n y: 317.2338302699771\n zoom: 0.4911032886685182\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: depthtest\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: depthtest\n",
"graph": {
"edges": [
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "variable-aggregator"
},
"id": "1750836391776-source-1753346901505-target",
"selected": false,
"source": "1750836391776",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "document-extractor",
"targetType": "variable-aggregator"
},
"id": "1753349228522-source-1753346901505-target",
"selected": false,
"source": "1753349228522",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1754023419266-source-1753346901505-target",
"selected": false,
"source": "1754023419266",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756442998557-source-1756442986174-target",
"selected": false,
"source": "1756442998557",
"sourceHandle": "source",
"target": "1756442986174",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "variable-aggregator",
"targetType": "if-else"
},
"id": "1756442986174-source-1756443014860-target",
"selected": false,
"source": "1756442986174",
"sourceHandle": "source",
"target": "1756443014860",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1750836380067-source-1756442986174-target",
"selected": false,
"source": "1750836380067",
"sourceHandle": "source",
"target": "1756442986174",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "if-else",
"targetType": "tool"
},
"id": "1756443014860-true-1750836391776-target",
"selected": false,
"source": "1756443014860",
"sourceHandle": "true",
"target": "1750836391776",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "if-else",
"targetType": "document-extractor"
},
"id": "1756443014860-false-1753349228522-target",
"selected": false,
"source": "1756443014860",
"sourceHandle": "false",
"target": "1753349228522",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756896212061-source-1753346901505-target",
"source": "1756896212061",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "variable-aggregator"
},
"id": "1756907397615-source-1753346901505-target",
"source": "1756907397615",
"sourceHandle": "source",
"target": "1753346901505",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "variable-aggregator",
"targetType": "llm"
},
"id": "1753346901505-source-1756912504019-target",
"source": "1753346901505",
"sourceHandle": "source",
"target": "1756912504019",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "llm",
"targetType": "tool"
},
"id": "1756912504019-source-1756912537172-target",
"source": "1756912504019",
"sourceHandle": "source",
"target": "1756912537172",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "tool"
},
"id": "1756912537172-source-1756912274158-target",
"source": "1756912537172",
"sourceHandle": "source",
"target": "1756912274158",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1756912274158-source-1750836372241-target",
"source": "1756912274158",
"sourceHandle": "source",
"target": "1750836372241",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "qa_model",
"embedding_model": "jina-embeddings-v2-base-en",
"embedding_model_provider": "langgenius/jina/jina",
"index_chunk_variable_selector": [
"1756912274158",
"result"
],
"indexing_technique": "high_quality",
"keyword_number": 10,
"retrieval_model": {
"hybridSearchMode": "weighted_score",
"reranking_enable": false,
"score_threshold": 0.5,
"score_threshold_enabled": false,
"search_method": "semantic_search",
"top_k": 3,
"vector_setting": {
"embedding_model_name": "jina-embeddings-v2-base-en",
"embedding_provider_name": "langgenius/jina/jina"
}
},
"selected": false,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750836372241",
"position": {
"x": 1150.8369138826617,
"y": 326
},
"positionAbsolute": {
"x": 1150.8369138826617,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"txt",
"markdown",
"mdx",
"pdf",
"html",
"xlsx",
"xls",
"vtt",
"properties",
"doc",
"docx",
"csv",
"eml",
"msg",
"pptx",
"xml",
"epub",
"ppt",
"md"
],
"plugin_id": "langgenius/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File",
"type": "datasource"
},
"height": 52,
"id": "1750836380067",
"position": {
"x": -1371.6520723158733,
"y": 224.87938381325645
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 224.87938381325645
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"documents": {
"description": "the documents extracted from the file",
"items": {
"type": "object"
},
"type": "array"
},
"images": {
"description": "The images extracted from the file",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
},
"label": {
"en_US": "file",
"ja_JP": "file",
"pt_BR": "file",
"zh_Hans": "file"
},
"llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"max": null,
"min": null,
"name": "file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
}
],
"params": {
"file": ""
},
"provider_id": "langgenius/dify_extractor/dify_extractor",
"provider_name": "langgenius/dify_extractor/dify_extractor",
"provider_type": "builtin",
"selected": false,
"title": "Dify Extractor",
"tool_configurations": {},
"tool_description": "Dify Extractor",
"tool_label": "Dify Extractor",
"tool_name": "dify_extractor",
"tool_node_version": "2",
"tool_parameters": {
"file": {
"type": "variable",
"value": [
"1756442986174",
"output"
]
}
},
"type": "tool"
},
"height": 52,
"id": "1750836391776",
"position": {
"x": -417.5334221022782,
"y": 268.1692071834485
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 268.1692071834485
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 252,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1124
},
"height": 252,
"id": "1751252161631",
"position": {
"x": -1371.6520723158733,
"y": -123.758428116601
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": -123.758428116601
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1124
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 388,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 285
},
"height": 388,
"id": "1751252440357",
"position": {
"x": -1723.9942193415582,
"y": 224.87938381325645
},
"positionAbsolute": {
"x": -1723.9942193415582,
"y": 224.87938381325645
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 285
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 430,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 430,
"id": "1751253091602",
"position": {
"x": -417.5334221022782,
"y": 546.5283142529594
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 546.5283142529594
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 336,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts specified columns from tables to generate structured Q&A pairs. Users can independently designate which columns to use for questions and which for answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These pairs are indexed by the question field, so user queries are matched directly against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching strategy improves clarity and precision, especially in scenarios involving high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 336,
"id": "1751253953926",
"position": {
"x": 794.2003154321724,
"y": 417.25474169825833
},
"positionAbsolute": {
"x": 794.2003154321724,
"y": 417.25474169825833
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 410,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 410,
"id": "1751254117904",
"position": {
"x": 1150.8369138826617,
"y": 475.88970282568215
},
"positionAbsolute": {
"x": 1150.8369138826617,
"y": 475.88970282568215
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"output_type": "string",
"selected": false,
"title": "Variable Aggregator",
"type": "variable-aggregator",
"variables": [
[
"1750836391776",
"text"
],
[
"1753349228522",
"text"
],
[
"1754023419266",
"content"
],
[
"1756896212061",
"content"
]
]
},
"height": 187,
"id": "1753346901505",
"position": {
"x": -117.24452412456148,
"y": 326
},
"positionAbsolute": {
"x": -117.24452412456148,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_array_file": false,
"selected": false,
"title": "Doc Extractor",
"type": "document-extractor",
"variable_selector": [
"1756442986174",
"output"
]
},
"height": 92,
"id": "1753349228522",
"position": {
"x": -417.5334221022782,
"y": 417.25474169825833
},
"positionAbsolute": {
"x": -417.5334221022782,
"y": 417.25474169825833
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Notion",
"datasource_name": "notion_datasource",
"datasource_parameters": {},
"plugin_id": "langgenius/notion_datasource",
"provider_name": "notion_datasource",
"provider_type": "online_document",
"selected": false,
"title": "Notion",
"type": "datasource"
},
"height": 52,
"id": "1754023419266",
"position": {
"x": -1369.6904698303242,
"y": 440.01452302398053
},
"positionAbsolute": {
"x": -1369.6904698303242,
"y": 440.01452302398053
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"output_type": "file",
"selected": false,
"title": "Variable Aggregator",
"type": "variable-aggregator",
"variables": [
[
"1750836380067",
"file"
],
[
"1756442998557",
"file"
]
]
},
"height": 135,
"id": "1756442986174",
"position": {
"x": -1067.06980963949,
"y": 236.10252072775984
},
"positionAbsolute": {
"x": -1067.06980963949,
"y": 236.10252072775984
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Google Drive",
"datasource_name": "google_drive",
"datasource_parameters": {},
"plugin_id": "langgenius/google_drive",
"provider_name": "google_drive",
"provider_type": "online_drive",
"selected": false,
"title": "Google Drive",
"type": "datasource"
},
"height": 52,
"id": "1756442998557",
"position": {
"x": -1371.6520723158733,
"y": 326
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"cases": [
{
"case_id": "true",
"conditions": [
{
"comparison_operator": "is",
"id": "1581dd11-7898-41f4-962f-937283ba7e01",
"value": ".xlsx",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528",
"value": ".xls",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b",
"value": ".md",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa",
"value": ".markdown",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "53abfe95-c7d0-4f63-ad37-17d425d25106",
"value": ".mdx",
"varType": "string",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "436877b8-8c0a-4cc6-9565-92754db08571",
"value": ".html",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c",
"value": ".htm",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b",
"value": ".docx",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "62d11445-876a-493f-85d3-8fc020146bdd",
"value": ".csv",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
},
{
"comparison_operator": "is",
"id": "02c4bce8-7668-4ccd-b750-4281f314b231",
"value": ".txt",
"varType": "file",
"variable_selector": [
"1756442986174",
"output",
"extension"
]
}
],
"id": "true",
"logical_operator": "or"
}
],
"selected": false,
"title": "IF/ELSE",
"type": "if-else"
},
"height": 358,
"id": "1756443014860",
"position": {
"x": -733.5977815139424,
"y": 236.10252072775984
},
"positionAbsolute": {
"x": -733.5977815139424,
"y": 236.10252072775984
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Jina Reader",
"datasource_name": "jina_reader",
"datasource_parameters": {
"crawl_sub_pages": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jina_subpages"
]
},
"limit": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jina_limit"
]
},
"url": {
"type": "mixed",
"value": "{{#rag.1756896212061.jina_url#}}"
},
"use_sitemap": {
"type": "variable",
"value": [
"rag",
"1756896212061",
"jian_sitemap"
]
}
},
"plugin_id": "langgenius/jina_datasource",
"provider_name": "jinareader",
"provider_type": "website_crawl",
"selected": false,
"title": "Jina Reader",
"type": "datasource"
},
"height": 52,
"id": "1756896212061",
"position": {
"x": -1371.6520723158733,
"y": 538.9988445953813
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 538.9988445953813
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "Firecrawl",
"datasource_name": "crawl",
"datasource_parameters": {
"crawl_subpages": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"firecrawl_subpages"
]
},
"exclude_paths": {
"type": "mixed",
"value": "{{#rag.1756907397615.exclude_paths#}}"
},
"include_paths": {
"type": "mixed",
"value": "{{#rag.1756907397615.include_paths#}}"
},
"limit": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"max_pages"
]
},
"max_depth": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"max_depth"
]
},
"only_main_content": {
"type": "variable",
"value": [
"rag",
"1756907397615",
"main_content"
]
},
"url": {
"type": "mixed",
"value": "{{#rag.1756907397615.firecrawl_url1#}}"
}
},
"plugin_id": "langgenius/firecrawl_datasource",
"provider_name": "firecrawl",
"provider_type": "website_crawl",
"selected": false,
"title": "Firecrawl",
"type": "datasource"
},
"height": 52,
"id": "1756907397615",
"position": {
"x": -1371.6520723158733,
"y": 644.3296146102903
},
"positionAbsolute": {
"x": -1371.6520723158733,
"y": 644.3296146102903
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The file you want to extract QA from.",
"ja_JP": "The file you want to extract QA from.",
"pt_BR": "The file you want to extract QA from.",
"zh_Hans": "你想要提取 QA 的文件。"
},
"label": {
"en_US": "Input File",
"ja_JP": "Input File",
"pt_BR": "Input File",
"zh_Hans": "输入文件"
},
"llm_description": "The file you want to extract QA from.",
"max": null,
"min": null,
"name": "input_file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Column number for question.",
"ja_JP": "Column number for question.",
"pt_BR": "Column number for question.",
"zh_Hans": "问题所在的列。"
},
"label": {
"en_US": "Column number for question",
"ja_JP": "Column number for question",
"pt_BR": "Column number for question",
"zh_Hans": "问题所在的列"
},
"llm_description": "The column number for question, the format of the column number must be an integer.",
"max": null,
"min": null,
"name": "question_column",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": 1,
"form": "llm",
"human_description": {
"en_US": "Column number for answer.",
"ja_JP": "Column number for answer.",
"pt_BR": "Column number for answer.",
"zh_Hans": "答案所在的列。"
},
"label": {
"en_US": "Column number for answer",
"ja_JP": "Column number for answer",
"pt_BR": "Column number for answer",
"zh_Hans": "答案所在的列"
},
"llm_description": "The column number for answer, the format of the column number must be an integer.",
"max": null,
"min": null,
"name": "answer_column",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "number"
}
],
"params": {
"answer_column": "",
"input_file": "",
"question_column": ""
},
"provider_id": "langgenius/qa_chunk/qa_chunk",
"provider_name": "langgenius/qa_chunk/qa_chunk",
"provider_type": "builtin",
"selected": false,
"title": "Q&A Processor",
"tool_configurations": {},
"tool_description": "A tool for QA chunking mode.",
"tool_label": "QA Chunk",
"tool_name": "qa_chunk",
"tool_node_version": "2",
"tool_parameters": {
"answer_column": {
"type": "constant",
"value": 2
},
"input_file": {
"type": "variable",
"value": [
"1756912537172",
"files"
]
},
"question_column": {
"type": "constant",
"value": 1
}
},
"type": "tool"
},
"height": 52,
"id": "1756912274158",
"position": {
"x": 794.2003154321724,
"y": 326
},
"positionAbsolute": {
"x": 794.2003154321724,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"context": {
"enabled": false,
"variable_selector": []
},
"model": {
"completion_params": {
"temperature": 0.7
},
"mode": "chat",
"name": "claude-3-5-sonnet-20240620",
"provider": "langgenius/anthropic/anthropic"
},
"prompt_template": [
{
"id": "7f8105aa-a37d-4f5a-b581-babeeb31e833",
"role": "system",
"text": "\nGenerate a list of Q&A pairs based on {{#1753346901505.output#}}. Present the output as a Markdown table, where the first column is serial number, the second column is Question, and the third column is Question. Ensure that the table format can be easily converted into a CSV file.\nExample Output Format:\n| Index | Question | Answer |\n|-------|-----------|--------|\n| 1 | What is the main purpose of the document? | The document explains the company's new product launch strategy. ![image](https://cloud.dify.ai/files/xxxxxxx) |\n| 2 || When will the product be launched? | The product will be launched in Q3 of this year. |\n\nInstructions:\nRead and understand the input text.\nExtract key information and generate meaningful questions and answers.\nPreserve any ![image] URLs from the input text in the answers.\nKeep questions concise and specific.\nEnsure answers are accurate, self-contained, and clear.\nOutput only the Markdown table without any extra explanation."
}
],
"selected": false,
"title": "LLM",
"type": "llm",
"vision": {
"enabled": false
}
},
"height": 88,
"id": "1756912504019",
"position": {
"x": 184.46657789772178,
"y": 326
},
"positionAbsolute": {
"x": 184.46657789772178,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "Markdown text",
"ja_JP": "Markdown text",
"pt_BR": "Markdown text",
"zh_Hans": "Markdown格式文本必须为Markdown表格格式"
},
"label": {
"en_US": "Markdown text",
"ja_JP": "Markdown text",
"pt_BR": "Markdown text",
"zh_Hans": "Markdown格式文本"
},
"llm_description": "",
"max": null,
"min": null,
"name": "md_text",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "Filename of the output file",
"ja_JP": "Filename of the output file",
"pt_BR": "Filename of the output file",
"zh_Hans": "输出文件名"
},
"label": {
"en_US": "Filename of the output file",
"ja_JP": "Filename of the output file",
"pt_BR": "Filename of the output file",
"zh_Hans": "输出文件名"
},
"llm_description": "",
"max": null,
"min": null,
"name": "output_filename",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
}
],
"params": {
"md_text": "",
"output_filename": ""
},
"provider_id": "bowenliang123/md_exporter/md_exporter",
"provider_name": "bowenliang123/md_exporter/md_exporter",
"provider_type": "builtin",
"selected": false,
"title": "Markdown to CSV file",
"tool_configurations": {},
"tool_description": "Generate CSV file from Markdown text",
"tool_label": "Markdown to CSV file",
"tool_name": "md_to_csv",
"tool_node_version": "2",
"tool_parameters": {
"md_text": {
"type": "mixed",
"value": "{{#1756912504019.text#}}"
},
"output_filename": {
"type": "mixed",
"value": "LLM Generated Q&A"
}
},
"type": "tool"
},
"height": 52,
"id": "1756912537172",
"position": {
"x": 484.75465419110174,
"y": 326
},
"positionAbsolute": {
"x": 484.75465419110174,
"y": 326
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 174,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The LLM-generated Q&A pairs are designed to extract key information from the input text and present it in a structured, easy-to-use format. Each pair consists of a concise question that captures an important point or detail, and a clear, self-contained answer that provides the relevant information without requiring additional context. The output is formatted as a Markdown table with three columns—Index, Question, and Answer—so that it can be easily converted into a CSV file for further processing. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 528
},
"height": 174,
"id": "1756912556940",
"position": {
"x": 184.46657789772178,
"y": 462.64405262857747
},
"positionAbsolute": {
"x": 184.46657789772178,
"y": 462.64405262857747
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 528
}
],
"viewport": {
"x": 1149.1394490177502,
"y": 317.2338302699771,
"zoom": 0.4911032886685182
}
},
"icon_info": {
"icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"id": "98374ab6-9dcd-434d-983e-268bec156b43",
"name": "LLM Generated Q&A",
"icon": {
"icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04",
"icon_background": null,
"icon_type": "image",
"icon_url": ""
},
"language": "zh-Hans",
"position": 5
},
{
"chunk_structure": "hierarchical_model",
"description": "This knowledge pipeline uses LLMs to extract content from images and tables in documents and automatically generate descriptive annotations for contextual enrichment.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: e642577f-da15-4c03-81b9-c9dec9189a3c\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII=\n name: Contextual Enrichment Using LLM\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1758002850987-source-1751336942081-target\n source: '1758002850987'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1756915693835-source-1758027159239-target\n source: '1756915693835'\n sourceHandle: source\n target: '1758027159239'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: llm\n id: 1758027159239-source-1758002850987-target\n source: '1758027159239'\n sourceHandle: source\n target: '1758002850987'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 474.7618603027596\n y: 282\n positionAbsolute:\n x: 474.7618603027596\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 458\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different\n types of Data Sources have different input and output types. The output\n of File Upload and Online Drive are files, while the output of Online Doc\n and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 458\n id: '1751264451381'\n position:\n x: -893.2836123260277\n y: 378.2537898330178\n positionAbsolute:\n x: -893.2836123260277\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -704.0614991386192\n y: -73.30453110517956\n positionAbsolute:\n x: -704.0614991386192\n y: -73.30453110517956\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 304\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 304\n id: '1751266402561'\n position:\n x: -555.2228329530462\n y: 592.0458661166498\n positionAbsolute:\n x: -555.2228329530462\n y: 592.0458661166498\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 153.2996965006646\n y: 378.2537898330178\n positionAbsolute:\n x: 153.2996965006646\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 482.3389174180554\n y: 437.9839361130071\n positionAbsolute:\n x: 482.3389174180554\n y: 437.9839361130071\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1758002850987.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 144.55897745117755\n y: 282\n positionAbsolute:\n x: 144.55897745117755\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 446\n selected: true\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In\n this step, the LLM is responsible for enriching and reorganizing content,\n along with images and tables. The goal is to maintain the integrity of image\n URLs and tables while providing contextual descriptions and summaries to\n enhance understanding. The content should be structured into well-organized\n paragraphs, using double newlines to separate them. The LLM should enrich\n the document by adding relevant descriptions for images and extracting key\n insights from tables, ensuring the content remains easy to retrieve within\n a Retrieval-Augmented Generation (RAG) system. The final output should preserve\n the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 446\n id: '1753967810859'\n position:\n x: -176.67459682201036\n y: 405.2790698865377\n positionAbsolute:\n x: -176.67459682201036\n y: 405.2790698865377\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - pdf\n - doc\n - docx\n - pptx\n - ppt\n - jpg\n - png\n - jpeg\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1756915693835'\n position:\n x: -893.2836123260277\n y: 282\n positionAbsolute:\n x: -893.2836123260277\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius\/anthropic\/anthropic\n prompt_template:\n - id: beb97761-d30d-4549-9b67-de1b8292e43d\n role: system\n text: \"You are an AI document assistant. \\nYour tasks are:\\nEnrich the content\\\n \\ contextually:\\nAdd meaningful descriptions for each image.\\nSummarize\\\n \\ key information from each table.\\nOutput the enriched content\u00a0with clear\\\n \\ annotations showing the\u00a0corresponding image and table positions, so\\\n \\ the text can later be aligned back into the original document. Preserve\\\n \\ any ![image] URLs from the input text.\\nYou will receive two inputs:\\n\\\n The file and text\u00a0(may contain images url and tables).\\nThe final output\\\n \\ should be a\u00a0single, enriched version of the original document with ![image]\\\n \\ url preserved.\\nGenerate output directly without saying words like:\\\n \\ Here's the enriched version of the original text with the image description\\\n \\ inserted.\"\n - id: f92ef0cd-03a7-48a7-80e8-bcdc965fb399\n role: user\n text: The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}.\n selected: false\n title: LLM\n type: llm\n vision:\n configs:\n detail: high\n variable_selector:\n - '1756915693835'\n - file\n enabled: true\n height: 88\n id: '1758002850987'\n position:\n x: -176.67459682201036\n y: 282\n positionAbsolute:\n x: -176.67459682201036\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: The file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: (For local deployment v1 and v2) Parsing method, can be\n auto, ocr, or txt. Default is auto. If results are not satisfactory, try\n ocr\n max: null\n min: null\n name: parse_method\n options:\n - icon: ''\n label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - icon: ''\n label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - icon: ''\n label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable formula\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable formula\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable table\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable table\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n pt_BR: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n zh_Hans: \uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API and local deployment v2) Specify document\n language, default ch, can be set to auto(local deployment need to specify\n the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: pipeline\n form: form\n human_description:\n en_US: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline\n pt_BR: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline\n label:\n en_US: Backend type\n ja_JP: \u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7\n pt_BR: Backend type\n zh_Hans: \u89e3\u6790\u540e\u7aef\n llm_description: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n max: null\n min: null\n name: backend\n options:\n - icon: ''\n label:\n en_US: pipeline\n ja_JP: pipeline\n pt_BR: pipeline\n zh_Hans: pipeline\n value: pipeline\n - icon: ''\n label:\n en_US: vlm-transformers\n ja_JP: vlm-transformers\n pt_BR: vlm-transformers\n zh_Hans: vlm-transformers\n value: vlm-transformers\n - icon: ''\n label:\n en_US: vlm-sglang-engine\n ja_JP: vlm-sglang-engine\n pt_BR: vlm-sglang-engine\n zh_Hans: vlm-sglang-engine\n value: vlm-sglang-engine\n - icon: ''\n label:\n en_US: vlm-sglang-client\n ja_JP: vlm-sglang-client\n pt_BR: vlm-sglang-client\n zh_Hans: vlm-sglang-client\n value: vlm-sglang-client\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: ''\n form: form\n human_description:\n en_US: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a\n pt_BR: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a\n label:\n en_US: sglang-server url\n ja_JP: sglang-server\u30a2\u30c9\u30ec\u30b9\n pt_BR: sglang-server url\n zh_Hans: sglang-server\u5730\u5740\n llm_description: '(For local deployment v2 when backend is vlm-sglang-client)\n Example: http:\/\/127.0.0.1:8000, default is empty'\n max: null\n min: null\n name: sglang_server_url\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n backend: ''\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n parse_method: ''\n sglang_server_url: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: Parse File\n tool_configurations:\n backend:\n type: constant\n value: pipeline\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: true\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: mixed\n value: '[]'\n language:\n type: mixed\n value: auto\n parse_method:\n type: constant\n value: auto\n sglang_server_url:\n type: mixed\n value: ''\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756915693835'\n - file\n type: tool\n height: 270\n id: '1758027159239'\n position:\n x: -544.9739996945534\n y: 282\n positionAbsolute:\n x: -544.9739996945534\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 679.9701291615181\n y: -191.49392257836791\n zoom: 0.8239704766223018\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: ''\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: ''\n type: checkbox\n unit: null\n variable: clean_2\n",
"graph": {
"edges": [
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1751336942081-source-1750400198569-target",
"selected": false,
"source": "1751336942081",
"sourceHandle": "source",
"target": "1750400198569",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "llm",
"targetType": "tool"
},
"id": "1758002850987-source-1751336942081-target",
"source": "1758002850987",
"sourceHandle": "source",
"target": "1751336942081",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInIteration": false,
"isInLoop": false,
"sourceType": "datasource",
"targetType": "tool"
},
"id": "1756915693835-source-1758027159239-target",
"source": "1756915693835",
"sourceHandle": "source",
"target": "1758027159239",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "llm"
},
"id": "1758027159239-source-1758002850987-target",
"source": "1758027159239",
"sourceHandle": "source",
"target": "1758002850987",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "hierarchical_model",
"embedding_model": "jina-embeddings-v2-base-en",
"embedding_model_provider": "langgenius\/jina\/jina",
"index_chunk_variable_selector": [
"1751336942081",
"result"
],
"indexing_technique": "high_quality",
"keyword_number": 10,
"retrieval_model": {
"reranking_enable": true,
"reranking_mode": "reranking_model",
"reranking_model": {
"reranking_model_name": "jina-reranker-v1-base-en",
"reranking_provider_name": "langgenius\/jina\/jina"
},
"score_threshold": 0,
"score_threshold_enabled": false,
"search_method": "hybrid_search",
"top_k": 3,
"weights": null
},
"selected": false,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750400198569",
"position": {
"x": 474.7618603027596,
"y": 282
},
"positionAbsolute": {
"x": 474.7618603027596,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 458,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 458,
"id": "1751264451381",
"position": {
"x": -893.2836123260277,
"y": 378.2537898330178
},
"positionAbsolute": {
"x": -893.2836123260277,
"y": 378.2537898330178
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 260,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source \u2192 use extractor to extract document content \u2192 split and clean content into structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1182
},
"height": 260,
"id": "1751266376760",
"position": {
"x": -704.0614991386192,
"y": -73.30453110517956
},
"positionAbsolute": {
"x": -704.0614991386192,
"y": -73.30453110517956
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1182
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 304,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is an advanced open-source document extractor designed specifically to convert complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into high-quality, machine-readable formats like Markdown and JSON. MinerU addresses challenges in document parsing such as layout detection, formula recognition, and multi-language support, which are critical for generating high-quality training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 304,
"id": "1751266402561",
"position": {
"x": -555.2228329530462,
"y": 592.0458661166498
},
"positionAbsolute": {
"x": -555.2228329530462,
"y": 592.0458661166498
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 554,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such as a paragraph, a section, or even an entire document\u2014that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 554,
"id": "1751266447821",
"position": {
"x": 153.2996965006646,
"y": 378.2537898330178
},
"positionAbsolute": {
"x": 153.2996965006646,
"y": 378.2537898330178
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 411,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 411,
"id": "1751266580099",
"position": {
"x": 482.3389174180554,
"y": 437.9839361130071
},
"positionAbsolute": {
"x": 482.3389174180554,
"y": 437.9839361130071
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"result": {
"description": "Parent child chunks result",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "",
"ja_JP": "",
"pt_BR": "",
"zh_Hans": ""
},
"label": {
"en_US": "Input Content",
"ja_JP": "Input Content",
"pt_BR": "Conte\u00fado de Entrada",
"zh_Hans": "\u8f93\u5165\u6587\u672c"
},
"llm_description": "The text you want to chunk.",
"max": null,
"min": null,
"name": "input_text",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": "paragraph",
"form": "llm",
"human_description": {
"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.",
"zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002"
},
"label": {
"en_US": "Parent Mode",
"ja_JP": "Parent Mode",
"pt_BR": "Modo Pai",
"zh_Hans": "\u7236\u5757\u6a21\u5f0f"
},
"llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"max": null,
"min": null,
"name": "parent_mode",
"options": [
{
"label": {
"en_US": "Paragraph",
"ja_JP": "Paragraph",
"pt_BR": "Par\u00e1grafo",
"zh_Hans": "\u6bb5\u843d"
},
"value": "paragraph"
},
{
"label": {
"en_US": "Full Document",
"ja_JP": "Full Document",
"pt_BR": "Documento Completo",
"zh_Hans": "\u5168\u6587"
},
"value": "full_doc"
}
],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": "\n\n",
"form": "llm",
"human_description": {
"en_US": "Separator used for chunking",
"ja_JP": "Separator used for chunking",
"pt_BR": "Separador usado para divis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26"
},
"label": {
"en_US": "Parent Delimiter",
"ja_JP": "Parent Delimiter",
"pt_BR": "Separador de Pai",
"zh_Hans": "\u7236\u5757\u5206\u9694\u7b26"
},
"llm_description": "The separator used to split chunks",
"max": null,
"min": null,
"name": "separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 1024,
"form": "llm",
"human_description": {
"en_US": "Maximum length for chunking",
"ja_JP": "Maximum length for chunking",
"pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
},
"label": {
"en_US": "Maximum Parent Chunk Length",
"ja_JP": "Maximum Parent Chunk Length",
"pt_BR": "Comprimento M\u00e1ximo do Bloco Pai",
"zh_Hans": "\u6700\u5927\u7236\u5757\u957f\u5ea6"
},
"llm_description": "Maximum length allowed per chunk",
"max": null,
"min": null,
"name": "max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": ". ",
"form": "llm",
"human_description": {
"en_US": "Separator used for subchunking",
"ja_JP": "Separator used for subchunking",
"pt_BR": "Separador usado para subdivis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26"
},
"label": {
"en_US": "Child Delimiter",
"ja_JP": "Child Delimiter",
"pt_BR": "Separador de Subdivis\u00e3o",
"zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26"
},
"llm_description": "The separator used to split subchunks",
"max": null,
"min": null,
"name": "subchunk_separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 512,
"form": "llm",
"human_description": {
"en_US": "Maximum length for subchunking",
"ja_JP": "Maximum length for subchunking",
"pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
},
"label": {
"en_US": "Maximum Child Chunk Length",
"ja_JP": "Maximum Child Chunk Length",
"pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o",
"zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6"
},
"llm_description": "Maximum length allowed per subchunk",
"max": null,
"min": null,
"name": "subchunk_max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove consecutive spaces, newlines and tabs",
"ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
"pt_BR": "Se deve remover espa\u00e7os extras no texto",
"zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
},
"label": {
"en_US": "Replace consecutive spaces, newlines and tabs",
"ja_JP": "Replace consecutive spaces, newlines and tabs",
"pt_BR": "Substituir espa\u00e7os consecutivos, novas linhas e guias",
"zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
},
"llm_description": "Whether to remove consecutive spaces, newlines and tabs",
"max": null,
"min": null,
"name": "remove_extra_spaces",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove URLs and emails in the text",
"ja_JP": "Whether to remove URLs and emails in the text",
"pt_BR": "Se deve remover URLs e e-mails no texto",
"zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
},
"label": {
"en_US": "Delete all URLs and email addresses",
"ja_JP": "Delete all URLs and email addresses",
"pt_BR": "Remover todas as URLs e e-mails",
"zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
},
"llm_description": "Whether to remove URLs and emails in the text",
"max": null,
"min": null,
"name": "remove_urls_emails",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
}
],
"params": {
"input_text": "",
"max_length": "",
"parent_mode": "",
"remove_extra_spaces": "",
"remove_urls_emails": "",
"separator": "",
"subchunk_max_length": "",
"subchunk_separator": ""
},
"provider_id": "langgenius\/parentchild_chunker\/parentchild_chunker",
"provider_name": "langgenius\/parentchild_chunker\/parentchild_chunker",
"provider_type": "builtin",
"selected": false,
"title": "Parent-child Chunker",
"tool_configurations": {},
"tool_description": "Process documents into parent-child chunk structures",
"tool_label": "Parent-child Chunker",
"tool_name": "parentchild_chunker",
"tool_node_version": "2",
"tool_parameters": {
"input_text": {
"type": "mixed",
"value": "{{#1758002850987.text#}}"
},
"max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Parent_Length"
]
},
"parent_mode": {
"type": "variable",
"value": [
"rag",
"shared",
"Parent_Mode"
]
},
"remove_extra_spaces": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_1"
]
},
"remove_urls_emails": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_2"
]
},
"separator": {
"type": "mixed",
"value": "{{#rag.shared.Parent_Delimiter#}}"
},
"subchunk_max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Child_Length"
]
},
"subchunk_separator": {
"type": "mixed",
"value": "{{#rag.shared.Child_Delimiter#}}"
}
},
"type": "tool"
},
"height": 52,
"id": "1751336942081",
"position": {
"x": 144.55897745117755,
"y": 282
},
"positionAbsolute": {
"x": 144.55897745117755,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 446,
"selected": true,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In this step, the LLM is responsible for enriching and reorganizing content, along with images and tables. The goal is to maintain the integrity of image URLs and tables while providing contextual descriptions and summaries to enhance understanding. The content should be structured into well-organized paragraphs, using double newlines to separate them. The LLM should enrich the document by adding relevant descriptions for images and extracting key insights from tables, ensuring the content remains easy to retrieve within a Retrieval-Augmented Generation (RAG) system. The final output should preserve the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 446,
"id": "1753967810859",
"position": {
"x": -176.67459682201036,
"y": 405.2790698865377
},
"positionAbsolute": {
"x": -176.67459682201036,
"y": 405.2790698865377
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"pdf",
"doc",
"docx",
"pptx",
"ppt",
"jpg",
"png",
"jpeg"
],
"plugin_id": "langgenius\/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File",
"type": "datasource"
},
"height": 52,
"id": "1756915693835",
"position": {
"x": -893.2836123260277,
"y": 282
},
"positionAbsolute": {
"x": -893.2836123260277,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"context": {
"enabled": false,
"variable_selector": []
},
"model": {
"completion_params": {
"temperature": 0.7
},
"mode": "chat",
"name": "claude-3-5-sonnet-20240620",
"provider": "langgenius\/anthropic\/anthropic"
},
"prompt_template": [
{
"id": "beb97761-d30d-4549-9b67-de1b8292e43d",
"role": "system",
"text": "You are an AI document assistant. \nYour tasks are:\nEnrich the content contextually:\nAdd meaningful descriptions for each image.\nSummarize key information from each table.\nOutput the enriched content\u00a0with clear annotations showing the\u00a0corresponding image and table positions, so the text can later be aligned back into the original document. Preserve any ![image] URLs from the input text.\nYou will receive two inputs:\nThe file and text\u00a0(may contain images url and tables).\nThe final output should be a\u00a0single, enriched version of the original document with ![image] url preserved.\nGenerate output directly without saying words like: Here's the enriched version of the original text with the image description inserted."
},
{
"id": "f92ef0cd-03a7-48a7-80e8-bcdc965fb399",
"role": "user",
"text": "The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}."
}
],
"selected": false,
"title": "LLM",
"type": "llm",
"vision": {
"configs": {
"detail": "high",
"variable_selector": [
"1756915693835",
"file"
]
},
"enabled": true
}
},
"height": 88,
"id": "1758002850987",
"position": {
"x": -176.67459682201036,
"y": 282
},
"positionAbsolute": {
"x": -176.67459682201036,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"ja_JP": "\u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)",
"pt_BR": "The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
},
"label": {
"en_US": "file",
"ja_JP": "file",
"pt_BR": "file",
"zh_Hans": "file"
},
"llm_description": "The file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"max": null,
"min": null,
"name": "file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
},
{
"auto_generate": null,
"default": "auto",
"form": "form",
"human_description": {
"en_US": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
"ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044",
"pt_BR": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
"zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr"
},
"label": {
"en_US": "parse method",
"ja_JP": "\u89e3\u6790\u65b9\u6cd5",
"pt_BR": "parse method",
"zh_Hans": "\u89e3\u6790\u65b9\u6cd5"
},
"llm_description": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
"max": null,
"min": null,
"name": "parse_method",
"options": [
{
"icon": "",
"label": {
"en_US": "auto",
"ja_JP": "auto",
"pt_BR": "auto",
"zh_Hans": "auto"
},
"value": "auto"
},
{
"icon": "",
"label": {
"en_US": "ocr",
"ja_JP": "ocr",
"pt_BR": "ocr",
"zh_Hans": "ocr"
},
"value": "ocr"
},
{
"icon": "",
"label": {
"en_US": "txt",
"ja_JP": "txt",
"pt_BR": "txt",
"zh_Hans": "txt"
},
"value": "txt"
}
],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": 1,
"form": "form",
"human_description": {
"en_US": "(For official API and local deployment v2) Whether to enable formula recognition",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
"pt_BR": "(For official API and local deployment v2) Whether to enable formula recognition",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
},
"label": {
"en_US": "Enable formula recognition",
"ja_JP": "\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
"pt_BR": "Enable formula recognition",
"zh_Hans": "\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
},
"llm_description": "(For official API and local deployment v2) Whether to enable formula recognition",
"max": null,
"min": null,
"name": "enable_formula",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": 1,
"form": "form",
"human_description": {
"en_US": "(For official API and local deployment v2) Whether to enable table recognition",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
"pt_BR": "(For official API and local deployment v2) Whether to enable table recognition",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b"
},
"label": {
"en_US": "Enable table recognition",
"ja_JP": "\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
"pt_BR": "Enable table recognition",
"zh_Hans": "\u5f00\u542f\u8868\u683c\u8bc6\u522b"
},
"llm_description": "(For official API and local deployment v2) Whether to enable table recognition",
"max": null,
"min": null,
"name": "enable_table",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": "auto",
"form": "form",
"human_description": {
"en_US": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
"pt_BR": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
"zh_Hans": "\uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5"
},
"label": {
"en_US": "Document language",
"ja_JP": "\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e",
"pt_BR": "Document language",
"zh_Hans": "\u6587\u6863\u8bed\u8a00"
},
"llm_description": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
"max": null,
"min": null,
"name": "language",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 0,
"form": "form",
"human_description": {
"en_US": "(For official API) Whether to enable OCR recognition",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
"pt_BR": "(For official API) Whether to enable OCR recognition",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b"
},
"label": {
"en_US": "Enable OCR recognition",
"ja_JP": "OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
"pt_BR": "Enable OCR recognition",
"zh_Hans": "\u5f00\u542fOCR\u8bc6\u522b"
},
"llm_description": "(For official API) Whether to enable OCR recognition",
"max": null,
"min": null,
"name": "enable_ocr",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": "[]",
"form": "form",
"human_description": {
"en_US": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059",
"pt_BR": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a"
},
"label": {
"en_US": "Extra export formats",
"ja_JP": "\u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f",
"pt_BR": "Extra export formats",
"zh_Hans": "\u989d\u5916\u5bfc\u51fa\u683c\u5f0f"
},
"llm_description": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
"max": null,
"min": null,
"name": "extra_formats",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": "pipeline",
"form": "form",
"human_description": {
"en_US": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline",
"ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline",
"pt_BR": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline",
"zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline"
},
"label": {
"en_US": "Backend type",
"ja_JP": "\u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7",
"pt_BR": "Backend type",
"zh_Hans": "\u89e3\u6790\u540e\u7aef"
},
"llm_description": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline",
"max": null,
"min": null,
"name": "backend",
"options": [
{
"icon": "",
"label": {
"en_US": "pipeline",
"ja_JP": "pipeline",
"pt_BR": "pipeline",
"zh_Hans": "pipeline"
},
"value": "pipeline"
},
{
"icon": "",
"label": {
"en_US": "vlm-transformers",
"ja_JP": "vlm-transformers",
"pt_BR": "vlm-transformers",
"zh_Hans": "vlm-transformers"
},
"value": "vlm-transformers"
},
{
"icon": "",
"label": {
"en_US": "vlm-sglang-engine",
"ja_JP": "vlm-sglang-engine",
"pt_BR": "vlm-sglang-engine",
"zh_Hans": "vlm-sglang-engine"
},
"value": "vlm-sglang-engine"
},
{
"icon": "",
"label": {
"en_US": "vlm-sglang-client",
"ja_JP": "vlm-sglang-client",
"pt_BR": "vlm-sglang-client",
"zh_Hans": "vlm-sglang-client"
},
"value": "vlm-sglang-client"
}
],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": "",
"form": "form",
"human_description": {
"en_US": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty",
"ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a",
"pt_BR": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty",
"zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a"
},
"label": {
"en_US": "sglang-server url",
"ja_JP": "sglang-server\u30a2\u30c9\u30ec\u30b9",
"pt_BR": "sglang-server url",
"zh_Hans": "sglang-server\u5730\u5740"
},
"llm_description": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty",
"max": null,
"min": null,
"name": "sglang_server_url",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
}
],
"params": {
"backend": "",
"enable_formula": "",
"enable_ocr": "",
"enable_table": "",
"extra_formats": "",
"file": "",
"language": "",
"parse_method": "",
"sglang_server_url": ""
},
"provider_id": "langgenius\/mineru\/mineru",
"provider_name": "langgenius\/mineru\/mineru",
"provider_type": "builtin",
"selected": false,
"title": "Parse File",
"tool_configurations": {
"backend": {
"type": "constant",
"value": "pipeline"
},
"enable_formula": {
"type": "constant",
"value": 1
},
"enable_ocr": {
"type": "constant",
"value": true
},
"enable_table": {
"type": "constant",
"value": 1
},
"extra_formats": {
"type": "mixed",
"value": "[]"
},
"language": {
"type": "mixed",
"value": "auto"
},
"parse_method": {
"type": "constant",
"value": "auto"
},
"sglang_server_url": {
"type": "mixed",
"value": ""
}
},
"tool_description": "a tool for parsing text, tables, and images, supporting multiple formats such as pdf, pptx, docx, etc. supporting multiple languages such as English, Chinese, etc.",
"tool_label": "Parse File",
"tool_name": "parse-file",
"tool_node_version": "2",
"tool_parameters": {
"file": {
"type": "variable",
"value": [
"1756915693835",
"file"
]
}
},
"type": "tool"
},
"height": 270,
"id": "1758027159239",
"position": {
"x": -544.9739996945534,
"y": 282
},
"positionAbsolute": {
"x": -544.9739996945534,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
}
],
"viewport": {
"x": 679.9701291615181,
"y": -191.49392257836791,
"zoom": 0.8239704766223018
}
},
"icon_info": {
"icon": "e642577f-da15-4c03-81b9-c9dec9189a3c",
"icon_background": null,
"icon_type": "image",
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII="
},
"id": "103825d3-7018-43ae-bcf0-f3c001f3eb69",
"name": "Contextual Enrichment Using LLM"
},
{
"chunk_structure": "hierarchical_model",
"description": "This Knowledge Pipeline extracts images and tables from complex PDF documents for downstream processing.",
"export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 87426868-91d6-4774-a535-5fd4595a77b3\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII=\n name: Complex PDF with Images & Tables\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751281136356-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751281136356'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751338398711-source-1750400198569-target\n selected: false\n source: '1751338398711'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751281136356-source-1751338398711-target\n selected: false\n source: '1751281136356'\n sourceHandle: source\n target: '1751338398711'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751338398711'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 355.92518399555183\n y: 282\n positionAbsolute:\n x: 355.92518399555183\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File Upload\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -579\n y: 282\n positionAbsolute:\n x: -579\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 337\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 358\n height: 337\n id: '1751264451381'\n position:\n x: -990.8091030156684\n y: 282\n positionAbsolute:\n x: -990.8091030156684\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 358\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -579\n y: -22.64803881585007\n positionAbsolute:\n x: -579\n y: -22.64803881585007\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 541\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor for large language models (LLMs) like MinerU is a tool\n that preprocesses and converts diverse document types into structured, clean,\n and machine-readable data. This structured data can then be used to train\n or augment LLMs and retrieval-augmented generation (RAG) systems by providing\n them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 541\n id: '1751266402561'\n position:\n x: -263.7680017647218\n y: 558.328085421591\n positionAbsolute:\n x: -263.7680017647218\n y: 558.328085421591\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 42.95253988413964\n y: 366.1915342509804\n positionAbsolute:\n x: 42.95253988413964\n y: 366.1915342509804\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 355.92518399555183\n y: 434.6494699299023\n positionAbsolute:\n x: 355.92518399555183\n y: 434.6494699299023\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n credential_id: fd1cbc33-1481-47ee-9af2-954b53d350e0\n is_team_authorization: false\n output_schema:\n properties:\n full_zip_url:\n description: The zip URL of the complete parsed result\n type: string\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: Parsing method, can be auto, ocr, or txt. Default is auto.\n If results are not satisfactory, try ocr\n max: null\n min: null\n name: parse_method\n options:\n - label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable formula recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable formula recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API) Whether to enable formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable table recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable table recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API) Whether to enable table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: doclayout_yolo\n form: form\n human_description:\n en_US: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo\n \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044\n pt_BR: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo\n \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d\n label:\n en_US: Layout model\n ja_JP: \u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb\n pt_BR: Layout model\n zh_Hans: \u5e03\u5c40\u68c0\u6d4b\u6a21\u578b\n llm_description: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed model\n withbetter effect'\n max: null\n min: null\n name: layout_model\n options:\n - label:\n en_US: doclayout_yolo\n ja_JP: doclayout_yolo\n pt_BR: doclayout_yolo\n zh_Hans: doclayout_yolo\n value: doclayout_yolo\n - label:\n en_US: layoutlmv3\n ja_JP: layoutlmv3\n pt_BR: layoutlmv3\n zh_Hans: layoutlmv3\n value: layoutlmv3\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n pt_BR: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API) Specify document language, default\n ch, can be set to auto, when auto, the model will automatically identify\n document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n layout_model: ''\n parse_method: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: MinerU\n tool_configurations:\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: 0\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: constant\n value: '[]'\n language:\n type: constant\n value: auto\n layout_model:\n type: constant\n value: doclayout_yolo\n parse_method:\n type: constant\n value: auto\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 244\n id: '1751281136356'\n position:\n x: -263.7680017647218\n y: 282\n positionAbsolute:\n x: -263.7680017647218\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751281136356.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751338398711'\n position:\n x: 42.95253988413964\n y: 282\n positionAbsolute:\n x: 42.95253988413964\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 628.3302331655243\n y: 120.08894361588159\n zoom: 0.7027501395646496\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n",
"graph": {
"edges": [
{
"data": {
"isInLoop": false,
"sourceType": "datasource",
"targetType": "tool"
},
"id": "1750400203722-source-1751281136356-target",
"selected": false,
"source": "1750400203722",
"sourceHandle": "source",
"target": "1751281136356",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "knowledge-index"
},
"id": "1751338398711-source-1750400198569-target",
"selected": false,
"source": "1751338398711",
"sourceHandle": "source",
"target": "1750400198569",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
},
{
"data": {
"isInLoop": false,
"sourceType": "tool",
"targetType": "tool"
},
"id": "1751281136356-source-1751338398711-target",
"selected": false,
"source": "1751281136356",
"sourceHandle": "source",
"target": "1751338398711",
"targetHandle": "target",
"type": "custom",
"zIndex": 0
}
],
"nodes": [
{
"data": {
"chunk_structure": "hierarchical_model",
"embedding_model": "jina-embeddings-v2-base-en",
"embedding_model_provider": "langgenius\/jina\/jina",
"index_chunk_variable_selector": [
"1751338398711",
"result"
],
"indexing_technique": "high_quality",
"keyword_number": 10,
"retrieval_model": {
"reranking_enable": true,
"reranking_mode": "reranking_model",
"reranking_model": {
"reranking_model_name": "jina-reranker-v1-base-en",
"reranking_provider_name": "langgenius\/jina\/jina"
},
"score_threshold": 0,
"score_threshold_enabled": false,
"search_method": "hybrid_search",
"top_k": 3,
"weights": null
},
"selected": true,
"title": "Knowledge Base",
"type": "knowledge-index"
},
"height": 114,
"id": "1750400198569",
"position": {
"x": 355.92518399555183,
"y": 282
},
"positionAbsolute": {
"x": 355.92518399555183,
"y": 282
},
"selected": true,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"datasource_configurations": {},
"datasource_label": "File",
"datasource_name": "upload-file",
"datasource_parameters": {},
"fileExtensions": [
"txt",
"markdown",
"mdx",
"pdf",
"html",
"xlsx",
"xls",
"vtt",
"properties",
"doc",
"docx",
"csv",
"eml",
"msg",
"pptx",
"xml",
"epub",
"ppt",
"md"
],
"plugin_id": "langgenius\/file",
"provider_name": "file",
"provider_type": "local_file",
"selected": false,
"title": "File Upload",
"type": "datasource"
},
"height": 52,
"id": "1750400203722",
"position": {
"x": -579,
"y": 282
},
"positionAbsolute": {
"x": -579,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 337,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 358
},
"height": 337,
"id": "1751264451381",
"position": {
"x": -990.8091030156684,
"y": 282
},
"positionAbsolute": {
"x": -990.8091030156684,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 358
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 260,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source \u2192 use extractor to extract document content \u2192 split and clean content into structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 1182
},
"height": 260,
"id": "1751266376760",
"position": {
"x": -579,
"y": -22.64803881585007
},
"positionAbsolute": {
"x": -579,
"y": -22.64803881585007
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 1182
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 541,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor for large language models (LLMs) like MinerU is a tool that preprocesses and converts diverse document types into structured, clean, and machine-readable data. This structured data can then be used to train or augment LLMs and retrieval-augmented generation (RAG) systems by providing them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is an advanced open-source document extractor designed specifically to convert complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into high-quality, machine-readable formats like Markdown and JSON. MinerU addresses challenges in document parsing such as layout detection, formula recognition, and multi-language support, which are critical for generating high-quality training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 541,
"id": "1751266402561",
"position": {
"x": -263.7680017647218,
"y": 558.328085421591
},
"positionAbsolute": {
"x": -263.7680017647218,
"y": 558.328085421591
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 554,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such as a paragraph, a section, or even an entire document\u2014that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 554,
"id": "1751266447821",
"position": {
"x": 42.95253988413964,
"y": 366.1915342509804
},
"positionAbsolute": {
"x": 42.95253988413964,
"y": 366.1915342509804
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"author": "TenTen",
"desc": "",
"height": 411,
"selected": false,
"showAuthor": true,
"text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
"theme": "blue",
"title": "",
"type": "",
"width": 240
},
"height": 411,
"id": "1751266580099",
"position": {
"x": 355.92518399555183,
"y": 434.6494699299023
},
"positionAbsolute": {
"x": 355.92518399555183,
"y": 434.6494699299023
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom-note",
"width": 240
},
{
"data": {
"credential_id": "fd1cbc33-1481-47ee-9af2-954b53d350e0",
"is_team_authorization": false,
"output_schema": {
"properties": {
"full_zip_url": {
"description": "The zip URL of the complete parsed result",
"type": "string"
},
"images": {
"description": "The images extracted from the file",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"ja_JP": "\u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)",
"pt_BR": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
},
"label": {
"en_US": "file",
"ja_JP": "file",
"pt_BR": "file",
"zh_Hans": "file"
},
"llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
"max": null,
"min": null,
"name": "file",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "file"
},
{
"auto_generate": null,
"default": "auto",
"form": "form",
"human_description": {
"en_US": "(For local deployment service)Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
"ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044",
"pt_BR": "(For local deployment service)Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
"zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr"
},
"label": {
"en_US": "parse method",
"ja_JP": "\u89e3\u6790\u65b9\u6cd5",
"pt_BR": "parse method",
"zh_Hans": "\u89e3\u6790\u65b9\u6cd5"
},
"llm_description": "Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
"max": null,
"min": null,
"name": "parse_method",
"options": [
{
"label": {
"en_US": "auto",
"ja_JP": "auto",
"pt_BR": "auto",
"zh_Hans": "auto"
},
"value": "auto"
},
{
"label": {
"en_US": "ocr",
"ja_JP": "ocr",
"pt_BR": "ocr",
"zh_Hans": "ocr"
},
"value": "ocr"
},
{
"label": {
"en_US": "txt",
"ja_JP": "txt",
"pt_BR": "txt",
"zh_Hans": "txt"
},
"value": "txt"
}
],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": 1,
"form": "form",
"human_description": {
"en_US": "(For official API) Whether to enable formula recognition",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
"pt_BR": "(For official API) Whether to enable formula recognition",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
},
"label": {
"en_US": "Enable formula recognition",
"ja_JP": "\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
"pt_BR": "Enable formula recognition",
"zh_Hans": "\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
},
"llm_description": "(For official API) Whether to enable formula recognition",
"max": null,
"min": null,
"name": "enable_formula",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": 1,
"form": "form",
"human_description": {
"en_US": "(For official API) Whether to enable table recognition",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
"pt_BR": "(For official API) Whether to enable table recognition",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b"
},
"label": {
"en_US": "Enable table recognition",
"ja_JP": "\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
"pt_BR": "Enable table recognition",
"zh_Hans": "\u5f00\u542f\u8868\u683c\u8bc6\u522b"
},
"llm_description": "(For official API) Whether to enable table recognition",
"max": null,
"min": null,
"name": "enable_table",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": "doclayout_yolo",
"form": "form",
"human_description": {
"en_US": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model with better effect",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044",
"pt_BR": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model with better effect",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d"
},
"label": {
"en_US": "Layout model",
"ja_JP": "\u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb",
"pt_BR": "Layout model",
"zh_Hans": "\u5e03\u5c40\u68c0\u6d4b\u6a21\u578b"
},
"llm_description": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model withbetter effect",
"max": null,
"min": null,
"name": "layout_model",
"options": [
{
"label": {
"en_US": "doclayout_yolo",
"ja_JP": "doclayout_yolo",
"pt_BR": "doclayout_yolo",
"zh_Hans": "doclayout_yolo"
},
"value": "doclayout_yolo"
},
{
"label": {
"en_US": "layoutlmv3",
"ja_JP": "layoutlmv3",
"pt_BR": "layoutlmv3",
"zh_Hans": "layoutlmv3"
},
"value": "layoutlmv3"
}
],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": "auto",
"form": "form",
"human_description": {
"en_US": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
"pt_BR": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5"
},
"label": {
"en_US": "Document language",
"ja_JP": "\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e",
"pt_BR": "Document language",
"zh_Hans": "\u6587\u6863\u8bed\u8a00"
},
"llm_description": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
"max": null,
"min": null,
"name": "language",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 0,
"form": "form",
"human_description": {
"en_US": "(For official API) Whether to enable OCR recognition",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
"pt_BR": "(For official API) Whether to enable OCR recognition",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b"
},
"label": {
"en_US": "Enable OCR recognition",
"ja_JP": "OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
"pt_BR": "Enable OCR recognition",
"zh_Hans": "\u5f00\u542fOCR\u8bc6\u522b"
},
"llm_description": "(For official API) Whether to enable OCR recognition",
"max": null,
"min": null,
"name": "enable_ocr",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": "[]",
"form": "form",
"human_description": {
"en_US": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
"ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059",
"pt_BR": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
"zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a"
},
"label": {
"en_US": "Extra export formats",
"ja_JP": "\u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f",
"pt_BR": "Extra export formats",
"zh_Hans": "\u989d\u5916\u5bfc\u51fa\u683c\u5f0f"
},
"llm_description": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
"max": null,
"min": null,
"name": "extra_formats",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
}
],
"params": {
"enable_formula": "",
"enable_ocr": "",
"enable_table": "",
"extra_formats": "",
"file": "",
"language": "",
"layout_model": "",
"parse_method": ""
},
"provider_id": "langgenius\/mineru\/mineru",
"provider_name": "langgenius\/mineru\/mineru",
"provider_type": "builtin",
"selected": false,
"title": "MinerU",
"tool_configurations": {
"enable_formula": {
"type": "constant",
"value": 1
},
"enable_ocr": {
"type": "constant",
"value": 0
},
"enable_table": {
"type": "constant",
"value": 1
},
"extra_formats": {
"type": "constant",
"value": "[]"
},
"language": {
"type": "constant",
"value": "auto"
},
"layout_model": {
"type": "constant",
"value": "doclayout_yolo"
},
"parse_method": {
"type": "constant",
"value": "auto"
}
},
"tool_description": "a tool for parsing text, tables, and images, supporting multiple formats such as pdf, pptx, docx, etc. supporting multiple languages such as English, Chinese, etc.",
"tool_label": "Parse File",
"tool_name": "parse-file",
"tool_node_version": "2",
"tool_parameters": {
"file": {
"type": "variable",
"value": [
"1750400203722",
"file"
]
}
},
"type": "tool"
},
"height": 244,
"id": "1751281136356",
"position": {
"x": -263.7680017647218,
"y": 282
},
"positionAbsolute": {
"x": -263.7680017647218,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
},
{
"data": {
"is_team_authorization": true,
"output_schema": {
"properties": {
"result": {
"description": "Parent child chunks result",
"items": {
"type": "object"
},
"type": "array"
}
},
"type": "object"
},
"paramSchemas": [
{
"auto_generate": null,
"default": null,
"form": "llm",
"human_description": {
"en_US": "",
"ja_JP": "",
"pt_BR": "",
"zh_Hans": ""
},
"label": {
"en_US": "Input Content",
"ja_JP": "Input Content",
"pt_BR": "Conte\u00fado de Entrada",
"zh_Hans": "\u8f93\u5165\u6587\u672c"
},
"llm_description": "The text you want to chunk.",
"max": null,
"min": null,
"name": "input_text",
"options": [],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": "paragraph",
"form": "llm",
"human_description": {
"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.",
"zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002"
},
"label": {
"en_US": "Parent Mode",
"ja_JP": "Parent Mode",
"pt_BR": "Modo Pai",
"zh_Hans": "\u7236\u5757\u6a21\u5f0f"
},
"llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
"max": null,
"min": null,
"name": "parent_mode",
"options": [
{
"label": {
"en_US": "Paragraph",
"ja_JP": "Paragraph",
"pt_BR": "Par\u00e1grafo",
"zh_Hans": "\u6bb5\u843d"
},
"value": "paragraph"
},
{
"label": {
"en_US": "Full Document",
"ja_JP": "Full Document",
"pt_BR": "Documento Completo",
"zh_Hans": "\u5168\u6587"
},
"value": "full_doc"
}
],
"placeholder": null,
"precision": null,
"required": true,
"scope": null,
"template": null,
"type": "select"
},
{
"auto_generate": null,
"default": "\n\n",
"form": "llm",
"human_description": {
"en_US": "Separator used for chunking",
"ja_JP": "Separator used for chunking",
"pt_BR": "Separador usado para divis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26"
},
"label": {
"en_US": "Parent Delimiter",
"ja_JP": "Parent Delimiter",
"pt_BR": "Separador de Pai",
"zh_Hans": "\u7236\u5757\u5206\u9694\u7b26"
},
"llm_description": "The separator used to split chunks",
"max": null,
"min": null,
"name": "separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 1024,
"form": "llm",
"human_description": {
"en_US": "Maximum length for chunking",
"ja_JP": "Maximum length for chunking",
"pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
},
"label": {
"en_US": "Maximum Parent Chunk Length",
"ja_JP": "Maximum Parent Chunk Length",
"pt_BR": "Comprimento M\u00e1ximo do Bloco Pai",
"zh_Hans": "\u6700\u5927\u7236\u5757\u957f\u5ea6"
},
"llm_description": "Maximum length allowed per chunk",
"max": null,
"min": null,
"name": "max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": ". ",
"form": "llm",
"human_description": {
"en_US": "Separator used for subchunking",
"ja_JP": "Separator used for subchunking",
"pt_BR": "Separador usado para subdivis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26"
},
"label": {
"en_US": "Child Delimiter",
"ja_JP": "Child Delimiter",
"pt_BR": "Separador de Subdivis\u00e3o",
"zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26"
},
"llm_description": "The separator used to split subchunks",
"max": null,
"min": null,
"name": "subchunk_separator",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "string"
},
{
"auto_generate": null,
"default": 512,
"form": "llm",
"human_description": {
"en_US": "Maximum length for subchunking",
"ja_JP": "Maximum length for subchunking",
"pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o",
"zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
},
"label": {
"en_US": "Maximum Child Chunk Length",
"ja_JP": "Maximum Child Chunk Length",
"pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o",
"zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6"
},
"llm_description": "Maximum length allowed per subchunk",
"max": null,
"min": null,
"name": "subchunk_max_length",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "number"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove consecutive spaces, newlines and tabs",
"ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
"pt_BR": "Se deve remover espa\u00e7os extras no texto",
"zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
},
"label": {
"en_US": "Replace consecutive spaces, newlines and tabs",
"ja_JP": "Replace consecutive spaces, newlines and tabs",
"pt_BR": "Substituir espa\u00e7os consecutivos, novas linhas e guias",
"zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
},
"llm_description": "Whether to remove consecutive spaces, newlines and tabs",
"max": null,
"min": null,
"name": "remove_extra_spaces",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
},
{
"auto_generate": null,
"default": 0,
"form": "llm",
"human_description": {
"en_US": "Whether to remove URLs and emails in the text",
"ja_JP": "Whether to remove URLs and emails in the text",
"pt_BR": "Se deve remover URLs e e-mails no texto",
"zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
},
"label": {
"en_US": "Delete all URLs and email addresses",
"ja_JP": "Delete all URLs and email addresses",
"pt_BR": "Remover todas as URLs e e-mails",
"zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
},
"llm_description": "Whether to remove URLs and emails in the text",
"max": null,
"min": null,
"name": "remove_urls_emails",
"options": [],
"placeholder": null,
"precision": null,
"required": false,
"scope": null,
"template": null,
"type": "boolean"
}
],
"params": {
"input_text": "",
"max_length": "",
"parent_mode": "",
"remove_extra_spaces": "",
"remove_urls_emails": "",
"separator": "",
"subchunk_max_length": "",
"subchunk_separator": ""
},
"provider_id": "langgenius\/parentchild_chunker\/parentchild_chunker",
"provider_name": "langgenius\/parentchild_chunker\/parentchild_chunker",
"provider_type": "builtin",
"selected": false,
"title": "Parent-child Chunker",
"tool_configurations": {},
"tool_description": "Process documents into parent-child chunk structures",
"tool_label": "Parent-child Chunker",
"tool_name": "parentchild_chunker",
"tool_node_version": "2",
"tool_parameters": {
"input_text": {
"type": "mixed",
"value": "{{#1751281136356.text#}}"
},
"max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Parent_Length"
]
},
"parent_mode": {
"type": "variable",
"value": [
"rag",
"shared",
"Parent_Mode"
]
},
"remove_extra_spaces": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_1"
]
},
"remove_urls_emails": {
"type": "variable",
"value": [
"rag",
"shared",
"clean_2"
]
},
"separator": {
"type": "mixed",
"value": "{{#rag.shared.Parent_Delimiter#}}"
},
"subchunk_max_length": {
"type": "variable",
"value": [
"rag",
"shared",
"Maximum_Child_Length"
]
},
"subchunk_separator": {
"type": "mixed",
"value": "{{#rag.shared.Child_Delimiter#}}"
}
},
"type": "tool"
},
"height": 52,
"id": "1751338398711",
"position": {
"x": 42.95253988413964,
"y": 282
},
"positionAbsolute": {
"x": 42.95253988413964,
"y": 282
},
"selected": false,
"sourcePosition": "right",
"targetPosition": "left",
"type": "custom",
"width": 242
}
],
"viewport": {
"x": 628.3302331655243,
"y": 120.08894361588159,
"zoom": 0.7027501395646496
}
},
"icon_info": {
"icon": "87426868-91d6-4774-a535-5fd4595a77b3",
"icon_background": null,
"icon_type": "image",
"icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII="
},
"id": "629cb5b8-490a-48bc-808b-ffc13085cb4f",
"name": "Complex PDF with Images & Tables"
}
}
}