dify

2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions
--- a/dify/api/core/rag/retrieval/init.py
+++ b/dify/api/core/rag/retrieval/init.py
--- a/dify/api/core/rag/retrieval/dataset_retrieval.py
+++ b/dify/api/core/rag/retrieval/dataset_retrieval.py
--- a/dify/api/core/rag/retrieval/output_parser/init.py
+++ b/dify/api/core/rag/retrieval/output_parser/init.py
--- a/dify/api/core/rag/retrieval/output_parser/react_output.py
+++ b/dify/api/core/rag/retrieval/output_parser/react_output.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import NamedTuple, Union
+
+
+@dataclass
+class ReactAction:
+    """A full description of an action for an ReactAction to execute."""
+
+    tool: str
+    """The name of the Tool to execute."""
+    tool_input: Union[str, dict]
+    """The input to pass in to the Tool."""
+    log: str
+    """Additional information to log about the action."""
+
+
+class ReactFinish(NamedTuple):
+    """The final return value of an ReactFinish."""
+
+    return_values: dict
+    """Dictionary of return values."""
+    log: str
+    """Additional information to log about the return value"""
--- a/dify/api/core/rag/retrieval/output_parser/structured_chat.py
+++ b/dify/api/core/rag/retrieval/output_parser/structured_chat.py
@@ -0,0 +1,23 @@
+import json
+import re
+from typing import Union
+
+from core.rag.retrieval.output_parser.react_output import ReactAction, ReactFinish
+
+
+class StructuredChatOutputParser:
+    def parse(self, text: str) -> Union[ReactAction, ReactFinish]:
+        try:
+            action_match = re.search(r"```(\w*)\n?({.*?)```", text, re.DOTALL)
+            if action_match is not None:
+                response = json.loads(action_match.group(2).strip(), strict=False)
+                if isinstance(response, list):
+                    response = response[0]
+                if response["action"] == "Final Answer":
+                    return ReactFinish({"output": response["action_input"]}, text)
+                else:
+                    return ReactAction(response["action"], response.get("action_input", {}), text)
+            else:
+                return ReactFinish({"output": text}, text)
+        except Exception:
+            raise ValueError(f"Could not parse LLM output: {text}")
--- a/dify/api/core/rag/retrieval/retrieval_methods.py
+++ b/dify/api/core/rag/retrieval/retrieval_methods.py
@@ -0,0 +1,16 @@
+from enum import StrEnum
+
+
+class RetrievalMethod(StrEnum):
+    SEMANTIC_SEARCH = "semantic_search"
+    FULL_TEXT_SEARCH = "full_text_search"
+    HYBRID_SEARCH = "hybrid_search"
+    KEYWORD_SEARCH = "keyword_search"
+
+    @staticmethod
+    def is_support_semantic_search(retrieval_method: str) -> bool:
+        return retrieval_method in {RetrievalMethod.SEMANTIC_SEARCH, RetrievalMethod.HYBRID_SEARCH}
+
+    @staticmethod
+    def is_support_fulltext_search(retrieval_method: str) -> bool:
+        return retrieval_method in {RetrievalMethod.FULL_TEXT_SEARCH, RetrievalMethod.HYBRID_SEARCH}
--- a/dify/api/core/rag/retrieval/router/multi_dataset_function_call_router.py
+++ b/dify/api/core/rag/retrieval/router/multi_dataset_function_call_router.py
@@ -0,0 +1,43 @@
+from typing import Union
+
+from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
+from core.model_manager import ModelInstance
+from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
+from core.model_runtime.entities.message_entities import PromptMessageTool, SystemPromptMessage, UserPromptMessage
+
+
+class FunctionCallMultiDatasetRouter:
+    def invoke(
+        self,
+        query: str,
+        dataset_tools: list[PromptMessageTool],
+        model_config: ModelConfigWithCredentialsEntity,
+        model_instance: ModelInstance,
+    ) -> tuple[Union[str, None], LLMUsage]:
+        """Given input, decided what to do.
+        Returns:
+            Action specifying what tool to use.
+        """
+        if len(dataset_tools) == 0:
+            return None, LLMUsage.empty_usage()
+        elif len(dataset_tools) == 1:
+            return dataset_tools[0].name, LLMUsage.empty_usage()
+
+        try:
+            prompt_messages = [
+                SystemPromptMessage(content="You are a helpful AI assistant."),
+                UserPromptMessage(content=query),
+            ]
+            result: LLMResult = model_instance.invoke_llm(
+                prompt_messages=prompt_messages,
+                tools=dataset_tools,
+                stream=False,
+                model_parameters={"temperature": 0.2, "top_p": 0.3, "max_tokens": 1500},
+            )
+            usage = result.usage or LLMUsage.empty_usage()
+            if result.message.tool_calls:
+                # get retrieval model config
+                return result.message.tool_calls[0].function.name, usage
+            return None, usage
+        except Exception:
+            return None, LLMUsage.empty_usage()
--- a/dify/api/core/rag/retrieval/router/multi_dataset_react_route.py
+++ b/dify/api/core/rag/retrieval/router/multi_dataset_react_route.py
@@ -0,0 +1,248 @@
+from collections.abc import Generator, Sequence
+from typing import Union
+
+from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
+from core.model_manager import ModelInstance
+from core.model_runtime.entities.llm_entities import LLMResult, LLMUsage
+from core.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool
+from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
+from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate
+from core.rag.retrieval.output_parser.react_output import ReactAction
+from core.rag.retrieval.output_parser.structured_chat import StructuredChatOutputParser
+from core.workflow.nodes.llm import llm_utils
+
+PREFIX = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:"""
+
+SUFFIX = """Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+Thought:"""  # noqa: E501
+
+FORMAT_INSTRUCTIONS = """Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
+The nouns in the format of "Thought", "Action", "Action Input", "Final Answer" must be expressed in English.
+Valid "action" values: "Final Answer" or {tool_names}
+
+Provide only ONE action per $JSON_BLOB, as shown:
+
+```
+{{
+  "action": $TOOL_NAME,
+  "action_input": $INPUT
+}}
+```
+
+Follow this format:
+
+Question: input question to answer
+Thought: consider previous and subsequent steps
+Action:
+```
+$JSON_BLOB
+```
+Observation: action result
+... (repeat Thought/Action/Observation N times)
+Thought: I know what to respond
+Action:
+```
+{{
+  "action": "Final Answer",
+  "action_input": "Final response to human"
+}}
+```"""  # noqa: E501
+
+
+class ReactMultiDatasetRouter:
+    def invoke(
+        self,
+        query: str,
+        dataset_tools: list[PromptMessageTool],
+        model_config: ModelConfigWithCredentialsEntity,
+        model_instance: ModelInstance,
+        user_id: str,
+        tenant_id: str,
+    ) -> tuple[Union[str, None], LLMUsage]:
+        """Given input, decided what to do.
+        Returns:
+            Action specifying what tool to use.
+        """
+        if len(dataset_tools) == 0:
+            return None, LLMUsage.empty_usage()
+        elif len(dataset_tools) == 1:
+            return dataset_tools[0].name, LLMUsage.empty_usage()
+
+        try:
+            return self._react_invoke(
+                query=query,
+                model_config=model_config,
+                model_instance=model_instance,
+                tools=dataset_tools,
+                user_id=user_id,
+                tenant_id=tenant_id,
+            )
+        except Exception:
+            return None, LLMUsage.empty_usage()
+
+    def _react_invoke(
+        self,
+        query: str,
+        model_config: ModelConfigWithCredentialsEntity,
+        model_instance: ModelInstance,
+        tools: Sequence[PromptMessageTool],
+        user_id: str,
+        tenant_id: str,
+        prefix: str = PREFIX,
+        suffix: str = SUFFIX,
+        format_instructions: str = FORMAT_INSTRUCTIONS,
+    ) -> tuple[Union[str, None], LLMUsage]:
+        prompt: Union[list[ChatModelMessage], CompletionModelPromptTemplate]
+        if model_config.mode == "chat":
+            prompt = self.create_chat_prompt(
+                query=query,
+                tools=tools,
+                prefix=prefix,
+                suffix=suffix,
+                format_instructions=format_instructions,
+            )
+        else:
+            prompt = self.create_completion_prompt(
+                tools=tools,
+                prefix=prefix,
+                format_instructions=format_instructions,
+            )
+        stop = ["Observation:"]
+        # handle invoke result
+        prompt_transform = AdvancedPromptTransform()
+        prompt_messages = prompt_transform.get_prompt(
+            prompt_template=prompt,
+            inputs={},
+            query="",
+            files=[],
+            context="",
+            memory_config=None,
+            memory=None,
+            model_config=model_config,
+        )
+        result_text, usage = self._invoke_llm(
+            completion_param=model_config.parameters,
+            model_instance=model_instance,
+            prompt_messages=prompt_messages,
+            stop=stop,
+            user_id=user_id,
+            tenant_id=tenant_id,
+        )
+        output_parser = StructuredChatOutputParser()
+        react_decision = output_parser.parse(result_text)
+        if isinstance(react_decision, ReactAction):
+            return react_decision.tool, usage
+        return None, usage
+
+    def _invoke_llm(
+        self,
+        completion_param: dict,
+        model_instance: ModelInstance,
+        prompt_messages: list[PromptMessage],
+        stop: list[str],
+        user_id: str,
+        tenant_id: str,
+    ) -> tuple[str, LLMUsage]:
+        """
+        Invoke large language model
+        :param model_instance: model instance
+        :param prompt_messages: prompt messages
+        :param stop: stop
+        :return:
+        """
+        invoke_result: Generator[LLMResult, None, None] = model_instance.invoke_llm(
+            prompt_messages=prompt_messages,
+            model_parameters=completion_param,
+            stop=stop,
+            stream=True,
+            user=user_id,
+        )
+
+        # handle invoke result
+        text, usage = self._handle_invoke_result(invoke_result=invoke_result)
+
+        # deduct quota
+        llm_utils.deduct_llm_quota(tenant_id=tenant_id, model_instance=model_instance, usage=usage)
+
+        return text, usage
+
+    def _handle_invoke_result(self, invoke_result: Generator) -> tuple[str, LLMUsage]:
+        """
+        Handle invoke result
+        :param invoke_result: invoke result
+        :return:
+        """
+        model = None
+        prompt_messages: list[PromptMessage] = []
+        full_text = ""
+        usage = None
+        for result in invoke_result:
+            text = result.delta.message.content
+            full_text += text
+
+            if not model:
+                model = result.model
+
+            if not prompt_messages:
+                prompt_messages = result.prompt_messages
+
+            if not usage and result.delta.usage:
+                usage = result.delta.usage
+
+        if not usage:
+            usage = LLMUsage.empty_usage()
+
+        return full_text, usage
+
+    def create_chat_prompt(
+        self,
+        query: str,
+        tools: Sequence[PromptMessageTool],
+        prefix: str = PREFIX,
+        suffix: str = SUFFIX,
+        format_instructions: str = FORMAT_INSTRUCTIONS,
+    ) -> list[ChatModelMessage]:
+        tool_strings = []
+        for tool in tools:
+            tool_strings.append(
+                f"{tool.name}: {tool.description}, args: {{'query': {{'title': 'Query',"
+                f" 'description': 'Query for the dataset to be used to retrieve the dataset.', 'type': 'string'}}}}"
+            )
+        formatted_tools = "\n".join(tool_strings)
+        unique_tool_names = {tool.name for tool in tools}
+        tool_names = ", ".join('"' + name + '"' for name in unique_tool_names)
+        format_instructions = format_instructions.format(tool_names=tool_names)
+        template = "\n\n".join([prefix, formatted_tools, format_instructions, suffix])
+        prompt_messages = []
+        system_prompt_messages = ChatModelMessage(role=PromptMessageRole.SYSTEM, text=template)
+        prompt_messages.append(system_prompt_messages)
+        user_prompt_message = ChatModelMessage(role=PromptMessageRole.USER, text=query)
+        prompt_messages.append(user_prompt_message)
+        return prompt_messages
+
+    def create_completion_prompt(
+        self,
+        tools: Sequence[PromptMessageTool],
+        prefix: str = PREFIX,
+        format_instructions: str = FORMAT_INSTRUCTIONS,
+    ) -> CompletionModelPromptTemplate:
+        """Create prompt in the style of the zero shot agent.
+
+        Args:
+            tools: List of tools the agent will have access to, used to format the
+                prompt.
+            prefix: String to put before the list of tools.
+            format_instructions: The format instruction prompt.
+        Returns:
+            A PromptTemplate with the template assembled from the pieces here.
+        """
+        suffix = """Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+Question: {input}
+Thought: {agent_scratchpad}
+"""  # noqa: E501
+
+        tool_strings = "\n".join([f"{tool.name}: {tool.description}" for tool in tools])
+        tool_names = ", ".join([tool.name for tool in tools])
+        format_instructions = format_instructions.format(tool_names=tool_names)
+        template = "\n\n".join([prefix, tool_strings, format_instructions, suffix])
+        return CompletionModelPromptTemplate(text=template)
--- a/dify/api/core/rag/retrieval/template_prompts.py
+++ b/dify/api/core/rag/retrieval/template_prompts.py
@@ -0,0 +1,66 @@
+METADATA_FILTER_SYSTEM_PROMPT = """
+    ### Job Description',
+    You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
+    ### Task
+    Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["contains", "not contains", "start with", "end with", "is", "is not", "empty", "not empty", "=", "≠", ">", "<", "≥", "≤", "before", "after"] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
+    ### Format
+    The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
+    ### Constraint
+    DO NOT include anything other than the JSON array in your response.
+"""  # noqa: E501
+
+METADATA_FILTER_USER_PROMPT_1 = """
+    { "input_text": "I want to know which company’s email address test@example.com is?",
+    "metadata_fields": ["filename", "email", "phone", "address"]
+    }
+"""
+
+METADATA_FILTER_ASSISTANT_PROMPT_1 = """
+```json
+    {"metadata_map": [
+        {"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}
+    ]
+    }
+```
+"""
+
+METADATA_FILTER_USER_PROMPT_2 = """
+    {"input_text": "What are the movies with a score of more than 9 in 2024?",
+    "metadata_fields": ["name", "year", "rating", "country"]}
+"""
+
+METADATA_FILTER_ASSISTANT_PROMPT_2 = """
+```json
+    {"metadata_map": [
+        {"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="},
+        {"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"},
+    ]}
+```
+"""
+
+METADATA_FILTER_USER_PROMPT_3 = """
+    '{{"input_text": "{input_text}",',
+    '"metadata_fields": {metadata_fields}}}'
+"""
+
+METADATA_FILTER_COMPLETION_PROMPT = """
+### Job Description
+You are a text metadata extract engine that extract text's metadata based on user input and set the metadata value
+### Task
+# Your task is to ONLY extract the metadatas that exist in the input text from the provided metadata list and Use the following operators ["=", "!=", ">", "<", ">=", "<="] to express logical relationships, then return result in JSON format with the key "metadata_fields" and value "metadata_field_value" and comparison operator "comparison_operator".
+### Format
+The input text is in the variable input_text. Metadata are specified as a list in the variable metadata_fields.
+### Constraint
+DO NOT include anything other than the JSON array in your response.
+### Example
+Here is the chat example between human and assistant, inside <example></example> XML tags.
+<example>
+User:{{"input_text": ["I want to know which company’s email address test@example.com is?"], "metadata_fields": ["filename", "email", "phone", "address"]}}
+Assistant:{{"metadata_map": [{{"metadata_field_name": "email", "metadata_field_value": "test@example.com", "comparison_operator": "="}}]}}
+User:{{"input_text": "What are the movies with a score of more than 9 in 2024?", "metadata_fields": ["name", "year", "rating", "country"]}}
+Assistant:{{"metadata_map": [{{"metadata_field_name": "year", "metadata_field_value": "2024", "comparison_operator": "="}, {{"metadata_field_name": "rating", "metadata_field_value": "9", "comparison_operator": ">"}}]}}
+</example>
+### User Input
+{{"input_text" : "{input_text}", "metadata_fields" : {metadata_fields}}}
+### Assistant Output
+"""  # noqa: E501