dify
This commit is contained in:
0
dify/api/core/llm_generator/__init__.py
Normal file
0
dify/api/core/llm_generator/__init__.py
Normal file
564
dify/api/core/llm_generator/llm_generator.py
Normal file
564
dify/api/core/llm_generator/llm_generator.py
Normal file
@@ -0,0 +1,564 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from typing import Protocol, cast
|
||||
|
||||
import json_repair
|
||||
|
||||
from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser
|
||||
from core.llm_generator.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
|
||||
from core.llm_generator.prompts import (
|
||||
CONVERSATION_TITLE_PROMPT,
|
||||
GENERATOR_QA_PROMPT,
|
||||
JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE,
|
||||
LLM_MODIFY_CODE_SYSTEM,
|
||||
LLM_MODIFY_PROMPT_SYSTEM,
|
||||
PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE,
|
||||
SYSTEM_STRUCTURED_OUTPUT_GENERATE,
|
||||
WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
|
||||
)
|
||||
from core.model_manager import ModelManager
|
||||
from core.model_runtime.entities.llm_entities import LLMResult
|
||||
from core.model_runtime.entities.message_entities import PromptMessage, SystemPromptMessage, UserPromptMessage
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
|
||||
from core.ops.entities.trace_entity import TraceTaskName
|
||||
from core.ops.ops_trace_manager import TraceQueueManager, TraceTask
|
||||
from core.ops.utils import measure_time
|
||||
from core.prompt.utils.prompt_template_parser import PromptTemplateParser
|
||||
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from models import App, Message, WorkflowNodeExecutionModel
|
||||
from models.workflow import Workflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WorkflowServiceInterface(Protocol):
|
||||
def get_draft_workflow(self, app_model: App, workflow_id: str | None = None) -> Workflow | None:
|
||||
pass
|
||||
|
||||
def get_node_last_run(self, app_model: App, workflow: Workflow, node_id: str) -> WorkflowNodeExecutionModel | None:
|
||||
pass
|
||||
|
||||
|
||||
class LLMGenerator:
|
||||
@classmethod
|
||||
def generate_conversation_name(
|
||||
cls, tenant_id: str, query, conversation_id: str | None = None, app_id: str | None = None
|
||||
):
|
||||
prompt = CONVERSATION_TITLE_PROMPT
|
||||
|
||||
if len(query) > 2000:
|
||||
query = query[:300] + "...[TRUNCATED]..." + query[-300:]
|
||||
|
||||
query = query.replace("\n", " ")
|
||||
|
||||
prompt += query + "\n"
|
||||
|
||||
model_manager = ModelManager()
|
||||
model_instance = model_manager.get_default_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
)
|
||||
prompts = [UserPromptMessage(content=prompt)]
|
||||
|
||||
with measure_time() as timer:
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompts), model_parameters={"max_tokens": 500, "temperature": 1}, stream=False
|
||||
)
|
||||
answer = cast(str, response.message.content)
|
||||
cleaned_answer = re.sub(r"^.*(\{.*\}).*$", r"\1", answer, flags=re.DOTALL)
|
||||
if cleaned_answer is None:
|
||||
return ""
|
||||
try:
|
||||
result_dict = json.loads(cleaned_answer)
|
||||
answer = result_dict["Your Output"]
|
||||
except json.JSONDecodeError:
|
||||
logger.exception("Failed to generate name after answer, use query instead")
|
||||
answer = query
|
||||
name = answer.strip()
|
||||
|
||||
if len(name) > 75:
|
||||
name = name[:75] + "..."
|
||||
|
||||
# get tracing instance
|
||||
trace_manager = TraceQueueManager(app_id=app_id)
|
||||
trace_manager.add_trace_task(
|
||||
TraceTask(
|
||||
TraceTaskName.GENERATE_NAME_TRACE,
|
||||
conversation_id=conversation_id,
|
||||
generate_conversation_name=name,
|
||||
inputs=prompt,
|
||||
timer=timer,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
)
|
||||
|
||||
return name
|
||||
|
||||
@classmethod
|
||||
def generate_suggested_questions_after_answer(cls, tenant_id: str, histories: str) -> Sequence[str]:
|
||||
output_parser = SuggestedQuestionsAfterAnswerOutputParser()
|
||||
format_instructions = output_parser.get_format_instructions()
|
||||
|
||||
prompt_template = PromptTemplateParser(template="{{histories}}\n{{format_instructions}}\nquestions:\n")
|
||||
|
||||
prompt = prompt_template.format({"histories": histories, "format_instructions": format_instructions})
|
||||
|
||||
try:
|
||||
model_manager = ModelManager()
|
||||
model_instance = model_manager.get_default_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
)
|
||||
except InvokeAuthorizationError:
|
||||
return []
|
||||
|
||||
prompt_messages = [UserPromptMessage(content=prompt)]
|
||||
|
||||
questions: Sequence[str] = []
|
||||
|
||||
try:
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages),
|
||||
model_parameters={"max_tokens": 256, "temperature": 0},
|
||||
stream=False,
|
||||
)
|
||||
|
||||
text_content = response.message.get_text_content()
|
||||
questions = output_parser.parse(text_content) if text_content else []
|
||||
except InvokeError:
|
||||
questions = []
|
||||
except Exception:
|
||||
logger.exception("Failed to generate suggested questions after answer")
|
||||
questions = []
|
||||
|
||||
return questions
|
||||
|
||||
@classmethod
|
||||
def generate_rule_config(cls, tenant_id: str, instruction: str, model_config: dict, no_variable: bool):
|
||||
output_parser = RuleConfigGeneratorOutputParser()
|
||||
|
||||
error = ""
|
||||
error_step = ""
|
||||
rule_config = {"prompt": "", "variables": [], "opening_statement": "", "error": ""}
|
||||
model_parameters = model_config.get("completion_params", {})
|
||||
if no_variable:
|
||||
prompt_template = PromptTemplateParser(WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE)
|
||||
|
||||
prompt_generate = prompt_template.format(
|
||||
inputs={
|
||||
"TASK_DESCRIPTION": instruction,
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
|
||||
prompt_messages = [UserPromptMessage(content=prompt_generate)]
|
||||
|
||||
model_manager = ModelManager()
|
||||
|
||||
model_instance = model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
provider=model_config.get("provider", ""),
|
||||
model=model_config.get("name", ""),
|
||||
)
|
||||
|
||||
try:
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
|
||||
rule_config["prompt"] = cast(str, response.message.content)
|
||||
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
error_step = "generate rule config"
|
||||
except Exception as e:
|
||||
logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
|
||||
rule_config["error"] = str(e)
|
||||
|
||||
rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
|
||||
|
||||
return rule_config
|
||||
|
||||
# get rule config prompt, parameter and statement
|
||||
prompt_generate, parameter_generate, statement_generate = output_parser.get_format_instructions()
|
||||
|
||||
prompt_template = PromptTemplateParser(prompt_generate)
|
||||
|
||||
parameter_template = PromptTemplateParser(parameter_generate)
|
||||
|
||||
statement_template = PromptTemplateParser(statement_generate)
|
||||
|
||||
# format the prompt_generate_prompt
|
||||
prompt_generate_prompt = prompt_template.format(
|
||||
inputs={
|
||||
"TASK_DESCRIPTION": instruction,
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
prompt_messages = [UserPromptMessage(content=prompt_generate_prompt)]
|
||||
|
||||
# get model instance
|
||||
model_manager = ModelManager()
|
||||
model_instance = model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
provider=model_config.get("provider", ""),
|
||||
model=model_config.get("name", ""),
|
||||
)
|
||||
|
||||
try:
|
||||
try:
|
||||
# the first step to generate the task prompt
|
||||
prompt_content: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
error_step = "generate prefix prompt"
|
||||
rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
|
||||
|
||||
return rule_config
|
||||
|
||||
rule_config["prompt"] = cast(str, prompt_content.message.content)
|
||||
|
||||
if not isinstance(prompt_content.message.content, str):
|
||||
raise NotImplementedError("prompt content is not a string")
|
||||
parameter_generate_prompt = parameter_template.format(
|
||||
inputs={
|
||||
"INPUT_TEXT": prompt_content.message.content,
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
parameter_messages = [UserPromptMessage(content=parameter_generate_prompt)]
|
||||
|
||||
# the second step to generate the task_parameter and task_statement
|
||||
statement_generate_prompt = statement_template.format(
|
||||
inputs={
|
||||
"TASK_DESCRIPTION": instruction,
|
||||
"INPUT_TEXT": prompt_content.message.content,
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
statement_messages = [UserPromptMessage(content=statement_generate_prompt)]
|
||||
|
||||
try:
|
||||
parameter_content: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(parameter_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
rule_config["variables"] = re.findall(r'"\s*([^"]+)\s*"', cast(str, parameter_content.message.content))
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
error_step = "generate variables"
|
||||
|
||||
try:
|
||||
statement_content: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(statement_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
rule_config["opening_statement"] = cast(str, statement_content.message.content)
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
error_step = "generate conversation opener"
|
||||
|
||||
except Exception as e:
|
||||
logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
|
||||
rule_config["error"] = str(e)
|
||||
|
||||
rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
|
||||
|
||||
return rule_config
|
||||
|
||||
@classmethod
|
||||
def generate_code(cls, tenant_id: str, instruction: str, model_config: dict, code_language: str = "javascript"):
|
||||
if code_language == "python":
|
||||
prompt_template = PromptTemplateParser(PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE)
|
||||
else:
|
||||
prompt_template = PromptTemplateParser(JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE)
|
||||
|
||||
prompt = prompt_template.format(
|
||||
inputs={
|
||||
"INSTRUCTION": instruction,
|
||||
"CODE_LANGUAGE": code_language,
|
||||
},
|
||||
remove_template_variables=False,
|
||||
)
|
||||
|
||||
model_manager = ModelManager()
|
||||
model_instance = model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
provider=model_config.get("provider", ""),
|
||||
model=model_config.get("name", ""),
|
||||
)
|
||||
|
||||
prompt_messages = [UserPromptMessage(content=prompt)]
|
||||
model_parameters = model_config.get("completion_params", {})
|
||||
try:
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
|
||||
generated_code = cast(str, response.message.content)
|
||||
return {"code": generated_code, "language": code_language, "error": ""}
|
||||
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"}
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language
|
||||
)
|
||||
return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"}
|
||||
|
||||
@classmethod
|
||||
def generate_qa_document(cls, tenant_id: str, query, document_language: str):
|
||||
prompt = GENERATOR_QA_PROMPT.format(language=document_language)
|
||||
|
||||
model_manager = ModelManager()
|
||||
model_instance = model_manager.get_default_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
)
|
||||
|
||||
prompt_messages: list[PromptMessage] = [SystemPromptMessage(content=prompt), UserPromptMessage(content=query)]
|
||||
|
||||
# Explicitly use the non-streaming overload
|
||||
result = model_instance.invoke_llm(
|
||||
prompt_messages=prompt_messages,
|
||||
model_parameters={"temperature": 0.01, "max_tokens": 2000},
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Runtime type check since pyright has issues with the overload
|
||||
if not isinstance(result, LLMResult):
|
||||
raise TypeError("Expected LLMResult when stream=False")
|
||||
response = result
|
||||
|
||||
answer = cast(str, response.message.content)
|
||||
return answer.strip()
|
||||
|
||||
@classmethod
|
||||
def generate_structured_output(cls, tenant_id: str, instruction: str, model_config: dict):
|
||||
model_manager = ModelManager()
|
||||
model_instance = model_manager.get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
provider=model_config.get("provider", ""),
|
||||
model=model_config.get("name", ""),
|
||||
)
|
||||
|
||||
prompt_messages = [
|
||||
SystemPromptMessage(content=SYSTEM_STRUCTURED_OUTPUT_GENERATE),
|
||||
UserPromptMessage(content=instruction),
|
||||
]
|
||||
model_parameters = model_config.get("model_parameters", {})
|
||||
|
||||
try:
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
|
||||
raw_content = response.message.content
|
||||
|
||||
if not isinstance(raw_content, str):
|
||||
raise ValueError(f"LLM response content must be a string, got: {type(raw_content)}")
|
||||
|
||||
try:
|
||||
parsed_content = json.loads(raw_content)
|
||||
except json.JSONDecodeError:
|
||||
parsed_content = json_repair.loads(raw_content)
|
||||
|
||||
if not isinstance(parsed_content, dict | list):
|
||||
raise ValueError(f"Failed to parse structured output from llm: {raw_content}")
|
||||
|
||||
generated_json_schema = json.dumps(parsed_content, indent=2, ensure_ascii=False)
|
||||
return {"output": generated_json_schema, "error": ""}
|
||||
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"}
|
||||
except Exception as e:
|
||||
logger.exception("Failed to invoke LLM model, model: %s", model_config.get("name"))
|
||||
return {"output": "", "error": f"An unexpected error occurred: {str(e)}"}
|
||||
|
||||
@staticmethod
|
||||
def instruction_modify_legacy(
|
||||
tenant_id: str, flow_id: str, current: str, instruction: str, model_config: dict, ideal_output: str | None
|
||||
):
|
||||
last_run: Message | None = (
|
||||
db.session.query(Message).where(Message.app_id == flow_id).order_by(Message.created_at.desc()).first()
|
||||
)
|
||||
if not last_run:
|
||||
return LLMGenerator.__instruction_modify_common(
|
||||
tenant_id=tenant_id,
|
||||
model_config=model_config,
|
||||
last_run=None,
|
||||
current=current,
|
||||
error_message="",
|
||||
instruction=instruction,
|
||||
node_type="llm",
|
||||
ideal_output=ideal_output,
|
||||
)
|
||||
last_run_dict = {
|
||||
"query": last_run.query,
|
||||
"answer": last_run.answer,
|
||||
"error": last_run.error,
|
||||
}
|
||||
return LLMGenerator.__instruction_modify_common(
|
||||
tenant_id=tenant_id,
|
||||
model_config=model_config,
|
||||
last_run=last_run_dict,
|
||||
current=current,
|
||||
error_message=str(last_run.error),
|
||||
instruction=instruction,
|
||||
node_type="llm",
|
||||
ideal_output=ideal_output,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def instruction_modify_workflow(
|
||||
tenant_id: str,
|
||||
flow_id: str,
|
||||
node_id: str,
|
||||
current: str,
|
||||
instruction: str,
|
||||
model_config: dict,
|
||||
ideal_output: str | None,
|
||||
workflow_service: WorkflowServiceInterface,
|
||||
):
|
||||
session = db.session()
|
||||
|
||||
app: App | None = session.query(App).where(App.id == flow_id).first()
|
||||
if not app:
|
||||
raise ValueError("App not found.")
|
||||
workflow = workflow_service.get_draft_workflow(app_model=app)
|
||||
if not workflow:
|
||||
raise ValueError("Workflow not found for the given app model.")
|
||||
last_run = workflow_service.get_node_last_run(app_model=app, workflow=workflow, node_id=node_id)
|
||||
try:
|
||||
node_type = cast(WorkflowNodeExecutionModel, last_run).node_type
|
||||
except Exception:
|
||||
try:
|
||||
node_type = [it for it in workflow.graph_dict["graph"]["nodes"] if it["id"] == node_id][0]["data"][
|
||||
"type"
|
||||
]
|
||||
except Exception:
|
||||
node_type = "llm"
|
||||
|
||||
if not last_run: # Node is not executed yet
|
||||
return LLMGenerator.__instruction_modify_common(
|
||||
tenant_id=tenant_id,
|
||||
model_config=model_config,
|
||||
last_run=None,
|
||||
current=current,
|
||||
error_message="",
|
||||
instruction=instruction,
|
||||
node_type=node_type,
|
||||
ideal_output=ideal_output,
|
||||
)
|
||||
|
||||
def agent_log_of(node_execution: WorkflowNodeExecutionModel) -> Sequence:
|
||||
raw_agent_log = node_execution.execution_metadata_dict.get(WorkflowNodeExecutionMetadataKey.AGENT_LOG, [])
|
||||
if not raw_agent_log:
|
||||
return []
|
||||
|
||||
return [
|
||||
{
|
||||
"status": event["status"],
|
||||
"error": event["error"],
|
||||
"data": event["data"],
|
||||
}
|
||||
for event in raw_agent_log
|
||||
]
|
||||
|
||||
inputs = last_run.load_full_inputs(session, storage)
|
||||
last_run_dict = {
|
||||
"inputs": inputs,
|
||||
"status": last_run.status,
|
||||
"error": last_run.error,
|
||||
"agent_log": agent_log_of(last_run),
|
||||
}
|
||||
|
||||
return LLMGenerator.__instruction_modify_common(
|
||||
tenant_id=tenant_id,
|
||||
model_config=model_config,
|
||||
last_run=last_run_dict,
|
||||
current=current,
|
||||
error_message=last_run.error,
|
||||
instruction=instruction,
|
||||
node_type=last_run.node_type,
|
||||
ideal_output=ideal_output,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def __instruction_modify_common(
|
||||
tenant_id: str,
|
||||
model_config: dict,
|
||||
last_run: dict | None,
|
||||
current: str | None,
|
||||
error_message: str | None,
|
||||
instruction: str,
|
||||
node_type: str,
|
||||
ideal_output: str | None,
|
||||
):
|
||||
LAST_RUN = "{{#last_run#}}"
|
||||
CURRENT = "{{#current#}}"
|
||||
ERROR_MESSAGE = "{{#error_message#}}"
|
||||
injected_instruction = instruction
|
||||
if LAST_RUN in injected_instruction:
|
||||
injected_instruction = injected_instruction.replace(LAST_RUN, json.dumps(last_run))
|
||||
if CURRENT in injected_instruction:
|
||||
injected_instruction = injected_instruction.replace(CURRENT, current or "null")
|
||||
if ERROR_MESSAGE in injected_instruction:
|
||||
injected_instruction = injected_instruction.replace(ERROR_MESSAGE, error_message or "null")
|
||||
model_instance = ModelManager().get_model_instance(
|
||||
tenant_id=tenant_id,
|
||||
model_type=ModelType.LLM,
|
||||
provider=model_config.get("provider", ""),
|
||||
model=model_config.get("name", ""),
|
||||
)
|
||||
match node_type:
|
||||
case "llm" | "agent":
|
||||
system_prompt = LLM_MODIFY_PROMPT_SYSTEM
|
||||
case "code":
|
||||
system_prompt = LLM_MODIFY_CODE_SYSTEM
|
||||
case _:
|
||||
system_prompt = LLM_MODIFY_PROMPT_SYSTEM
|
||||
prompt_messages = [
|
||||
SystemPromptMessage(content=system_prompt),
|
||||
UserPromptMessage(
|
||||
content=json.dumps(
|
||||
{
|
||||
"current": current,
|
||||
"last_run": last_run,
|
||||
"instruction": injected_instruction,
|
||||
"ideal_output": ideal_output,
|
||||
}
|
||||
)
|
||||
),
|
||||
]
|
||||
model_parameters = {"temperature": 0.4}
|
||||
|
||||
try:
|
||||
response: LLMResult = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
|
||||
)
|
||||
|
||||
generated_raw = cast(str, response.message.content)
|
||||
first_brace = generated_raw.find("{")
|
||||
last_brace = generated_raw.rfind("}")
|
||||
return {**json.loads(generated_raw[first_brace : last_brace + 1])}
|
||||
|
||||
except InvokeError as e:
|
||||
error = str(e)
|
||||
return {"error": f"Failed to generate code. Error: {error}"}
|
||||
except Exception as e:
|
||||
logger.exception(
|
||||
"Failed to invoke LLM model, model: %s", json.dumps(model_config.get("name")), exc_info=True
|
||||
)
|
||||
return {"error": f"An unexpected error occurred: {str(e)}"}
|
||||
2
dify/api/core/llm_generator/output_parser/errors.py
Normal file
2
dify/api/core/llm_generator/output_parser/errors.py
Normal file
@@ -0,0 +1,2 @@
|
||||
class OutputParserError(ValueError):
|
||||
pass
|
||||
@@ -0,0 +1,30 @@
|
||||
from core.llm_generator.output_parser.errors import OutputParserError
|
||||
from core.llm_generator.prompts import (
|
||||
RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE,
|
||||
RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
|
||||
RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE,
|
||||
)
|
||||
from libs.json_in_md_parser import parse_and_check_json_markdown
|
||||
|
||||
|
||||
class RuleConfigGeneratorOutputParser:
|
||||
def get_format_instructions(self) -> tuple[str, str, str]:
|
||||
return (
|
||||
RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
|
||||
RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE,
|
||||
RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE,
|
||||
)
|
||||
|
||||
def parse(self, text: str):
|
||||
try:
|
||||
expected_keys = ["prompt", "variables", "opening_statement"]
|
||||
parsed = parse_and_check_json_markdown(text, expected_keys)
|
||||
if not isinstance(parsed["prompt"], str):
|
||||
raise ValueError("Expected 'prompt' to be a string.")
|
||||
if not isinstance(parsed["variables"], list):
|
||||
raise ValueError("Expected 'variables' to be a list.")
|
||||
if not isinstance(parsed["opening_statement"], str):
|
||||
raise ValueError("Expected 'opening_statement' to be a str.")
|
||||
return parsed
|
||||
except Exception as e:
|
||||
raise OutputParserError(f"Parsing text\n{text}\n of rule config generator raised following error:\n{e}")
|
||||
378
dify/api/core/llm_generator/output_parser/structured_output.py
Normal file
378
dify/api/core/llm_generator/output_parser/structured_output.py
Normal file
@@ -0,0 +1,378 @@
|
||||
import json
|
||||
from collections.abc import Generator, Mapping, Sequence
|
||||
from copy import deepcopy
|
||||
from enum import StrEnum
|
||||
from typing import Any, Literal, cast, overload
|
||||
|
||||
import json_repair
|
||||
from pydantic import TypeAdapter, ValidationError
|
||||
|
||||
from core.llm_generator.output_parser.errors import OutputParserError
|
||||
from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.callbacks.base_callback import Callback
|
||||
from core.model_runtime.entities.llm_entities import (
|
||||
LLMResult,
|
||||
LLMResultChunk,
|
||||
LLMResultChunkDelta,
|
||||
LLMResultChunkWithStructuredOutput,
|
||||
LLMResultWithStructuredOutput,
|
||||
)
|
||||
from core.model_runtime.entities.message_entities import (
|
||||
AssistantPromptMessage,
|
||||
PromptMessage,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
TextPromptMessageContent,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ParameterRule
|
||||
|
||||
|
||||
class ResponseFormat(StrEnum):
|
||||
"""Constants for model response formats"""
|
||||
|
||||
JSON_SCHEMA = "json_schema" # model's structured output mode. some model like gemini, gpt-4o, support this mode.
|
||||
JSON = "JSON" # model's json mode. some model like claude support this mode.
|
||||
JSON_OBJECT = "json_object" # json mode's another alias. some model like deepseek-chat, qwen use this alias.
|
||||
|
||||
|
||||
class SpecialModelType(StrEnum):
|
||||
"""Constants for identifying model types"""
|
||||
|
||||
GEMINI = "gemini"
|
||||
OLLAMA = "ollama"
|
||||
|
||||
|
||||
@overload
|
||||
def invoke_llm_with_structured_output(
|
||||
*,
|
||||
provider: str,
|
||||
model_schema: AIModelEntity,
|
||||
model_instance: ModelInstance,
|
||||
prompt_messages: Sequence[PromptMessage],
|
||||
json_schema: Mapping[str, Any],
|
||||
model_parameters: Mapping | None = None,
|
||||
tools: Sequence[PromptMessageTool] | None = None,
|
||||
stop: list[str] | None = None,
|
||||
stream: Literal[True],
|
||||
user: str | None = None,
|
||||
callbacks: list[Callback] | None = None,
|
||||
) -> Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
|
||||
@overload
|
||||
def invoke_llm_with_structured_output(
|
||||
*,
|
||||
provider: str,
|
||||
model_schema: AIModelEntity,
|
||||
model_instance: ModelInstance,
|
||||
prompt_messages: Sequence[PromptMessage],
|
||||
json_schema: Mapping[str, Any],
|
||||
model_parameters: Mapping | None = None,
|
||||
tools: Sequence[PromptMessageTool] | None = None,
|
||||
stop: list[str] | None = None,
|
||||
stream: Literal[False],
|
||||
user: str | None = None,
|
||||
callbacks: list[Callback] | None = None,
|
||||
) -> LLMResultWithStructuredOutput: ...
|
||||
@overload
|
||||
def invoke_llm_with_structured_output(
|
||||
*,
|
||||
provider: str,
|
||||
model_schema: AIModelEntity,
|
||||
model_instance: ModelInstance,
|
||||
prompt_messages: Sequence[PromptMessage],
|
||||
json_schema: Mapping[str, Any],
|
||||
model_parameters: Mapping | None = None,
|
||||
tools: Sequence[PromptMessageTool] | None = None,
|
||||
stop: list[str] | None = None,
|
||||
stream: bool = True,
|
||||
user: str | None = None,
|
||||
callbacks: list[Callback] | None = None,
|
||||
) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
|
||||
def invoke_llm_with_structured_output(
|
||||
*,
|
||||
provider: str,
|
||||
model_schema: AIModelEntity,
|
||||
model_instance: ModelInstance,
|
||||
prompt_messages: Sequence[PromptMessage],
|
||||
json_schema: Mapping[str, Any],
|
||||
model_parameters: Mapping | None = None,
|
||||
tools: Sequence[PromptMessageTool] | None = None,
|
||||
stop: list[str] | None = None,
|
||||
stream: bool = True,
|
||||
user: str | None = None,
|
||||
callbacks: list[Callback] | None = None,
|
||||
) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]:
|
||||
"""
|
||||
Invoke large language model with structured output
|
||||
1. This method invokes model_instance.invoke_llm with json_schema
|
||||
2. Try to parse the result as structured output
|
||||
|
||||
:param prompt_messages: prompt messages
|
||||
:param json_schema: json schema
|
||||
:param model_parameters: model parameters
|
||||
:param tools: tools for tool calling
|
||||
:param stop: stop words
|
||||
:param stream: is stream response
|
||||
:param user: unique user id
|
||||
:param callbacks: callbacks
|
||||
:return: full response or stream response chunk generator result
|
||||
"""
|
||||
|
||||
# handle native json schema
|
||||
model_parameters_with_json_schema: dict[str, Any] = {
|
||||
**(model_parameters or {}),
|
||||
}
|
||||
|
||||
if model_schema.support_structure_output:
|
||||
model_parameters = _handle_native_json_schema(
|
||||
provider, model_schema, json_schema, model_parameters_with_json_schema, model_schema.parameter_rules
|
||||
)
|
||||
else:
|
||||
# Set appropriate response format based on model capabilities
|
||||
_set_response_format(model_parameters_with_json_schema, model_schema.parameter_rules)
|
||||
|
||||
# handle prompt based schema
|
||||
prompt_messages = _handle_prompt_based_schema(
|
||||
prompt_messages=prompt_messages,
|
||||
structured_output_schema=json_schema,
|
||||
)
|
||||
|
||||
llm_result = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages),
|
||||
model_parameters=model_parameters_with_json_schema,
|
||||
tools=tools,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
user=user,
|
||||
callbacks=callbacks,
|
||||
)
|
||||
|
||||
if isinstance(llm_result, LLMResult):
|
||||
if not isinstance(llm_result.message.content, str):
|
||||
raise OutputParserError(
|
||||
f"Failed to parse structured output, LLM result is not a string: {llm_result.message.content}"
|
||||
)
|
||||
|
||||
return LLMResultWithStructuredOutput(
|
||||
structured_output=_parse_structured_output(llm_result.message.content),
|
||||
model=llm_result.model,
|
||||
message=llm_result.message,
|
||||
usage=llm_result.usage,
|
||||
system_fingerprint=llm_result.system_fingerprint,
|
||||
prompt_messages=llm_result.prompt_messages,
|
||||
)
|
||||
else:
|
||||
|
||||
def generator() -> Generator[LLMResultChunkWithStructuredOutput, None, None]:
|
||||
result_text: str = ""
|
||||
prompt_messages: Sequence[PromptMessage] = []
|
||||
system_fingerprint: str | None = None
|
||||
for event in llm_result:
|
||||
if isinstance(event, LLMResultChunk):
|
||||
prompt_messages = event.prompt_messages
|
||||
system_fingerprint = event.system_fingerprint
|
||||
|
||||
if isinstance(event.delta.message.content, str):
|
||||
result_text += event.delta.message.content
|
||||
elif isinstance(event.delta.message.content, list):
|
||||
for item in event.delta.message.content:
|
||||
if isinstance(item, TextPromptMessageContent):
|
||||
result_text += item.data
|
||||
|
||||
yield LLMResultChunkWithStructuredOutput(
|
||||
model=model_schema.model,
|
||||
prompt_messages=prompt_messages,
|
||||
system_fingerprint=system_fingerprint,
|
||||
delta=event.delta,
|
||||
)
|
||||
|
||||
yield LLMResultChunkWithStructuredOutput(
|
||||
structured_output=_parse_structured_output(result_text),
|
||||
model=model_schema.model,
|
||||
prompt_messages=prompt_messages,
|
||||
system_fingerprint=system_fingerprint,
|
||||
delta=LLMResultChunkDelta(
|
||||
index=0,
|
||||
message=AssistantPromptMessage(content=""),
|
||||
usage=None,
|
||||
finish_reason=None,
|
||||
),
|
||||
)
|
||||
|
||||
return generator()
|
||||
|
||||
|
||||
def _handle_native_json_schema(
|
||||
provider: str,
|
||||
model_schema: AIModelEntity,
|
||||
structured_output_schema: Mapping,
|
||||
model_parameters: dict,
|
||||
rules: list[ParameterRule],
|
||||
):
|
||||
"""
|
||||
Handle structured output for models with native JSON schema support.
|
||||
|
||||
:param model_parameters: Model parameters to update
|
||||
:param rules: Model parameter rules
|
||||
:return: Updated model parameters with JSON schema configuration
|
||||
"""
|
||||
# Process schema according to model requirements
|
||||
schema_json = _prepare_schema_for_model(provider, model_schema, structured_output_schema)
|
||||
|
||||
# Set JSON schema in parameters
|
||||
model_parameters["json_schema"] = json.dumps(schema_json, ensure_ascii=False)
|
||||
|
||||
# Set appropriate response format if required by the model
|
||||
for rule in rules:
|
||||
if rule.name == "response_format" and ResponseFormat.JSON_SCHEMA in rule.options:
|
||||
model_parameters["response_format"] = ResponseFormat.JSON_SCHEMA
|
||||
|
||||
return model_parameters
|
||||
|
||||
|
||||
def _set_response_format(model_parameters: dict, rules: list):
|
||||
"""
|
||||
Set the appropriate response format parameter based on model rules.
|
||||
|
||||
:param model_parameters: Model parameters to update
|
||||
:param rules: Model parameter rules
|
||||
"""
|
||||
for rule in rules:
|
||||
if rule.name == "response_format":
|
||||
if ResponseFormat.JSON in rule.options:
|
||||
model_parameters["response_format"] = ResponseFormat.JSON
|
||||
elif ResponseFormat.JSON_OBJECT in rule.options:
|
||||
model_parameters["response_format"] = ResponseFormat.JSON_OBJECT
|
||||
|
||||
|
||||
def _handle_prompt_based_schema(
|
||||
prompt_messages: Sequence[PromptMessage], structured_output_schema: Mapping
|
||||
) -> list[PromptMessage]:
|
||||
"""
|
||||
Handle structured output for models without native JSON schema support.
|
||||
This function modifies the prompt messages to include schema-based output requirements.
|
||||
|
||||
Args:
|
||||
prompt_messages: Original sequence of prompt messages
|
||||
|
||||
Returns:
|
||||
list[PromptMessage]: Updated prompt messages with structured output requirements
|
||||
"""
|
||||
# Convert schema to string format
|
||||
schema_str = json.dumps(structured_output_schema, ensure_ascii=False)
|
||||
|
||||
# Find existing system prompt with schema placeholder
|
||||
system_prompt = next(
|
||||
(prompt for prompt in prompt_messages if isinstance(prompt, SystemPromptMessage)),
|
||||
None,
|
||||
)
|
||||
structured_output_prompt = STRUCTURED_OUTPUT_PROMPT.replace("{{schema}}", schema_str)
|
||||
# Prepare system prompt content
|
||||
system_prompt_content = (
|
||||
structured_output_prompt + "\n\n" + system_prompt.content
|
||||
if system_prompt and isinstance(system_prompt.content, str)
|
||||
else structured_output_prompt
|
||||
)
|
||||
system_prompt = SystemPromptMessage(content=system_prompt_content)
|
||||
|
||||
# Extract content from the last user message
|
||||
|
||||
filtered_prompts = [prompt for prompt in prompt_messages if not isinstance(prompt, SystemPromptMessage)]
|
||||
updated_prompt = [system_prompt] + filtered_prompts
|
||||
|
||||
return updated_prompt
|
||||
|
||||
|
||||
def _parse_structured_output(result_text: str) -> Mapping[str, Any]:
|
||||
structured_output: Mapping[str, Any] = {}
|
||||
parsed: Mapping[str, Any] = {}
|
||||
try:
|
||||
parsed = TypeAdapter(Mapping).validate_json(result_text)
|
||||
if not isinstance(parsed, dict):
|
||||
raise OutputParserError(f"Failed to parse structured output: {result_text}")
|
||||
structured_output = parsed
|
||||
except ValidationError:
|
||||
# if the result_text is not a valid json, try to repair it
|
||||
temp_parsed = json_repair.loads(result_text)
|
||||
if not isinstance(temp_parsed, dict):
|
||||
# handle reasoning model like deepseek-r1 got '<think>\n\n</think>\n' prefix
|
||||
if isinstance(temp_parsed, list):
|
||||
temp_parsed = next((item for item in temp_parsed if isinstance(item, dict)), {})
|
||||
else:
|
||||
raise OutputParserError(f"Failed to parse structured output: {result_text}")
|
||||
structured_output = cast(dict, temp_parsed)
|
||||
return structured_output
|
||||
|
||||
|
||||
def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema: Mapping):
|
||||
"""
|
||||
Prepare JSON schema based on model requirements.
|
||||
|
||||
Different models have different requirements for JSON schema formatting.
|
||||
This function handles these differences.
|
||||
|
||||
:param schema: The original JSON schema
|
||||
:return: Processed schema compatible with the current model
|
||||
"""
|
||||
|
||||
# Deep copy to avoid modifying the original schema
|
||||
processed_schema = dict(deepcopy(schema))
|
||||
|
||||
# Convert boolean types to string types (common requirement)
|
||||
convert_boolean_to_string(processed_schema)
|
||||
|
||||
# Apply model-specific transformations
|
||||
if SpecialModelType.GEMINI in model_schema.model:
|
||||
remove_additional_properties(processed_schema)
|
||||
return processed_schema
|
||||
elif SpecialModelType.OLLAMA in provider:
|
||||
return processed_schema
|
||||
else:
|
||||
# Default format with name field
|
||||
return {"schema": processed_schema, "name": "llm_response"}
|
||||
|
||||
|
||||
def remove_additional_properties(schema: dict):
|
||||
"""
|
||||
Remove additionalProperties fields from JSON schema.
|
||||
Used for models like Gemini that don't support this property.
|
||||
|
||||
:param schema: JSON schema to modify in-place
|
||||
"""
|
||||
if not isinstance(schema, dict):
|
||||
return
|
||||
|
||||
# Remove additionalProperties at current level
|
||||
schema.pop("additionalProperties", None)
|
||||
|
||||
# Process nested structures recursively
|
||||
for value in schema.values():
|
||||
if isinstance(value, dict):
|
||||
remove_additional_properties(value)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
remove_additional_properties(item)
|
||||
|
||||
|
||||
def convert_boolean_to_string(schema: dict):
|
||||
"""
|
||||
Convert boolean type specifications to string in JSON schema.
|
||||
|
||||
:param schema: JSON schema to modify in-place
|
||||
"""
|
||||
if not isinstance(schema, dict):
|
||||
return
|
||||
|
||||
# Check for boolean type at current level
|
||||
if schema.get("type") == "boolean":
|
||||
schema["type"] = "string"
|
||||
|
||||
# Process nested dictionaries and lists recursively
|
||||
for value in schema.values():
|
||||
if isinstance(value, dict):
|
||||
convert_boolean_to_string(value)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
convert_boolean_to_string(item)
|
||||
@@ -0,0 +1,26 @@
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
|
||||
from core.llm_generator.prompts import SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SuggestedQuestionsAfterAnswerOutputParser:
|
||||
def get_format_instructions(self) -> str:
|
||||
return SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT
|
||||
|
||||
def parse(self, text: str) -> Sequence[str]:
|
||||
action_match = re.search(r"\[.*?\]", text.strip(), re.DOTALL)
|
||||
questions: list[str] = []
|
||||
if action_match is not None:
|
||||
try:
|
||||
json_obj = json.loads(action_match.group(0).strip())
|
||||
except json.JSONDecodeError as exc:
|
||||
logger.warning("Failed to decode suggested questions payload: %s", exc)
|
||||
else:
|
||||
if isinstance(json_obj, list):
|
||||
questions = [question for question in json_obj if isinstance(question, str)]
|
||||
return questions
|
||||
424
dify/api/core/llm_generator/prompts.py
Normal file
424
dify/api/core/llm_generator/prompts.py
Normal file
@@ -0,0 +1,424 @@
|
||||
# Written by YORKI MINAKO🤡, Edited by Xiaoyi, Edited by yasu-oh
|
||||
CONVERSATION_TITLE_PROMPT = """You are asked to generate a concise chat title by decomposing the user’s input into two parts: “Intention” and “Subject”.
|
||||
|
||||
1. Detect Input Language
|
||||
Automatically identify the language of the user’s input (e.g. English, Chinese, Italian, Español, Arabic, Japanese, French, and etc.).
|
||||
|
||||
2. Generate Title
|
||||
- Combine Intention + Subject into a single, as-short-as-possible phrase.
|
||||
- The title must be natural, friendly, and in the same language as the input.
|
||||
- If the input is a direct question to the model, you may add an emoji at the end.
|
||||
|
||||
3. Output Format
|
||||
Return **only** a valid JSON object with these exact keys and no additional text:
|
||||
{
|
||||
"Language Type": "<Detected language>",
|
||||
"Your Reasoning": "<Brief explanation in that language>",
|
||||
"Your Output": "<Intention + Subject>"
|
||||
}
|
||||
|
||||
User Input:
|
||||
""" # noqa: E501
|
||||
|
||||
PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE = (
|
||||
"You are an expert programmer. Generate code based on the following instructions:\n\n"
|
||||
"Instructions: {{INSTRUCTION}}\n\n"
|
||||
"Write the code in {{CODE_LANGUAGE}}.\n\n"
|
||||
"Please ensure that you meet the following requirements:\n"
|
||||
"1. Define a function named 'main'.\n"
|
||||
"2. The 'main' function must return a dictionary (dict).\n"
|
||||
"3. You may modify the arguments of the 'main' function, but include appropriate type hints.\n"
|
||||
"4. The returned dictionary should contain at least one key-value pair.\n\n"
|
||||
"5. You may ONLY use the following libraries in your code: \n"
|
||||
"- json\n"
|
||||
"- datetime\n"
|
||||
"- math\n"
|
||||
"- random\n"
|
||||
"- re\n"
|
||||
"- string\n"
|
||||
"- sys\n"
|
||||
"- time\n"
|
||||
"- traceback\n"
|
||||
"- uuid\n"
|
||||
"- os\n"
|
||||
"- base64\n"
|
||||
"- hashlib\n"
|
||||
"- hmac\n"
|
||||
"- binascii\n"
|
||||
"- collections\n"
|
||||
"- functools\n"
|
||||
"- operator\n"
|
||||
"- itertools\n\n"
|
||||
"Example:\n"
|
||||
"def main(arg1: str, arg2: int) -> dict:\n"
|
||||
" return {\n"
|
||||
' "result": arg1 * arg2,\n'
|
||||
" }\n\n"
|
||||
"IMPORTANT:\n"
|
||||
"- Provide ONLY the code without any additional explanations, comments, or markdown formatting.\n"
|
||||
"- DO NOT use markdown code blocks (``` or ``` python). Return the raw code directly.\n"
|
||||
"- The code should start immediately after this instruction, without any preceding newlines or spaces.\n"
|
||||
"- The code should be complete, functional, and follow best practices for {{CODE_LANGUAGE}}.\n\n"
|
||||
"- Always use the format return {'result': ...} for the output.\n\n"
|
||||
"Generated Code:\n"
|
||||
)
|
||||
JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = (
|
||||
"You are an expert programmer. Generate code based on the following instructions:\n\n"
|
||||
"Instructions: {{INSTRUCTION}}\n\n"
|
||||
"Write the code in {{CODE_LANGUAGE}}.\n\n"
|
||||
"Please ensure that you meet the following requirements:\n"
|
||||
"1. Define a function named 'main'.\n"
|
||||
"2. The 'main' function must return an object.\n"
|
||||
"3. You may modify the arguments of the 'main' function, but include appropriate JSDoc annotations.\n"
|
||||
"4. The returned object should contain at least one key-value pair.\n\n"
|
||||
"5. The returned object should always be in the format: {result: ...}\n\n"
|
||||
"Example:\n"
|
||||
"/**\n"
|
||||
" * Multiplies two numbers together.\n"
|
||||
" *\n"
|
||||
" * @param {number} arg1 - The first number to multiply.\n"
|
||||
" * @param {number} arg2 - The second number to multiply.\n"
|
||||
" * @returns {{ result: number }} The result of the multiplication.\n"
|
||||
" */\n"
|
||||
"function main(arg1, arg2) {\n"
|
||||
" return {\n"
|
||||
" result: arg1 * arg2\n"
|
||||
" };\n"
|
||||
"}\n\n"
|
||||
"IMPORTANT:\n"
|
||||
"- Provide ONLY the code without any additional explanations, comments, or markdown formatting.\n"
|
||||
"- DO NOT use markdown code blocks (``` or ``` javascript). Return the raw code directly.\n"
|
||||
"- The code should start immediately after this instruction, without any preceding newlines or spaces.\n"
|
||||
"- The code should be complete, functional, and follow best practices for {{CODE_LANGUAGE}}.\n\n"
|
||||
"Generated Code:\n"
|
||||
)
|
||||
|
||||
|
||||
SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
|
||||
"Please help me predict the three most likely questions that human would ask, "
|
||||
"and keep each question under 20 characters.\n"
|
||||
"MAKE SURE your output is the SAME language as the Assistant's latest response. "
|
||||
"The output must be an array in JSON format following the specified schema:\n"
|
||||
'["question1","question2","question3"]\n'
|
||||
)
|
||||
|
||||
GENERATOR_QA_PROMPT = (
|
||||
"<Task> The user will send a long text. Generate a Question and Answer pairs only using the knowledge"
|
||||
" in the long text. Please think step by step."
|
||||
"Step 1: Understand and summarize the main content of this text.\n"
|
||||
"Step 2: What key information or concepts are mentioned in this text?\n"
|
||||
"Step 3: Decompose or combine multiple pieces of information and concepts.\n"
|
||||
"Step 4: Generate questions and answers based on these key information and concepts.\n"
|
||||
"<Constraints> The questions should be clear and detailed, and the answers should be detailed and complete. "
|
||||
"You must answer in {language}, in a style that is clear and detailed in {language}."
|
||||
" No language other than {language} should be used. \n"
|
||||
"<Format> Use the following format: Q1:\nA1:\nQ2:\nA2:...\n"
|
||||
"<QA Pairs>"
|
||||
)
|
||||
|
||||
WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE = """
|
||||
Here is a task description for which I would like you to create a high-quality prompt template for:
|
||||
<task_description>
|
||||
{{TASK_DESCRIPTION}}
|
||||
</task_description>
|
||||
Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include:
|
||||
- Do not include <input> or <output> section and variables in the prompt, assume user will add them at their own will.
|
||||
- Clear instructions for the AI that will be using this prompt, demarcated with <instruction> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag.
|
||||
- Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not include variables in the prompt. Give three pairs of input and output examples.
|
||||
- Include other relevant sections demarcated with appropriate XML tags like <examples>, <instruction>.
|
||||
- Use the same language as task description.
|
||||
- Output in ``` xml ``` and start with <instruction>
|
||||
Please generate the full prompt template with at least 300 words and output only the prompt template.
|
||||
""" # noqa: E501
|
||||
|
||||
RULE_CONFIG_PROMPT_GENERATE_TEMPLATE = """
|
||||
Here is a task description for which I would like you to create a high-quality prompt template for:
|
||||
<task_description>
|
||||
{{TASK_DESCRIPTION}}
|
||||
</task_description>
|
||||
Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include:
|
||||
- Descriptive variable names surrounded by {{ }} (two curly brackets) to indicate where the actual values will be substituted in. Choose variable names that clearly indicate the type of value expected. Variable names have to be composed of number, english alphabets and underline and nothing else.
|
||||
- Clear instructions for the AI that will be using this prompt, demarcated with <instruction> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag.
|
||||
- Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not use curly brackets any other than in <instruction> section.
|
||||
- Any other relevant sections demarcated with appropriate XML tags like <input>, <output>, etc.
|
||||
- Use the same language as task description.
|
||||
- Output in ``` xml ``` and start with <instruction>
|
||||
Please generate the full prompt template and output only the prompt template.
|
||||
""" # noqa: E501
|
||||
|
||||
RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE = """
|
||||
I need to extract the following information from the input text. The <information to be extracted> tag specifies the 'type', 'description' and 'required' of the information to be extracted.
|
||||
<information to be extracted>
|
||||
variables name bounded two double curly brackets. Variable name has to be composed of number, english alphabets and underline and nothing else.
|
||||
</information to be extracted>
|
||||
|
||||
Step 1: Carefully read the input and understand the structure of the expected output.
|
||||
Step 2: Extract relevant parameters from the provided text based on the name and description of object.
|
||||
Step 3: Structure the extracted parameters to JSON object as specified in <structure>.
|
||||
Step 4: Ensure that the list of variable_names is properly formatted and valid. The output should not contain any XML tags. Output an empty list if there is no valid variable name in input text.
|
||||
|
||||
### Structure
|
||||
Here is the structure of the expected output, I should always follow the output structure.
|
||||
["variable_name_1", "variable_name_2"]
|
||||
|
||||
### Input Text
|
||||
Inside <text></text> XML tags, there is a text that I should extract parameters and convert to a JSON object.
|
||||
<text>
|
||||
{{INPUT_TEXT}}
|
||||
</text>
|
||||
|
||||
### Answer
|
||||
I should always output a valid list. Output nothing other than the list of variable_name. Output an empty list if there is no variable name in input text.
|
||||
""" # noqa: E501
|
||||
|
||||
RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE = """
|
||||
<instruction>
|
||||
Step 1: Identify the purpose of the chatbot from the variable {{TASK_DESCRIPTION}} and infer chatbot's tone (e.g., friendly, professional, etc.) to add personality traits.
|
||||
Step 2: Create a coherent and engaging opening statement.
|
||||
Step 3: Ensure the output is welcoming and clearly explains what the chatbot is designed to do. Do not include any XML tags in the output.
|
||||
Please use the same language as the user's input language. If user uses chinese then generate opening statement in chinese, if user uses english then generate opening statement in english.
|
||||
Example Input:
|
||||
Provide customer support for an e-commerce website
|
||||
Example Output:
|
||||
Welcome! I'm here to assist you with any questions or issues you might have with your shopping experience. Whether you're looking for product information, need help with your order, or have any other inquiries, feel free to ask. I'm friendly, helpful, and ready to support you in any way I can.
|
||||
<Task>
|
||||
Here is the task description: {{INPUT_TEXT}}
|
||||
|
||||
You just need to generate the output
|
||||
""" # noqa: E501
|
||||
|
||||
SYSTEM_STRUCTURED_OUTPUT_GENERATE = """
|
||||
Your task is to convert simple user descriptions into properly formatted JSON Schema definitions. When a user describes data fields they need, generate a complete, valid JSON Schema that accurately represents those fields with appropriate types and requirements.
|
||||
|
||||
## Instructions:
|
||||
|
||||
1. Analyze the user's description of their data needs
|
||||
2. Identify each property that should be included in the schema
|
||||
3. Determine the appropriate data type for each property
|
||||
4. Decide which properties should be required
|
||||
5. Generate a complete JSON Schema with proper syntax
|
||||
6. Include appropriate constraints when specified (min/max values, patterns, formats)
|
||||
7. Provide ONLY the JSON Schema without any additional explanations, comments, or markdown formatting.
|
||||
8. DO NOT use markdown code blocks (``` or ``` json). Return the raw JSON Schema directly.
|
||||
|
||||
## Examples:
|
||||
|
||||
### Example 1:
|
||||
**User Input:** I need name and age
|
||||
**JSON Schema Output:**
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": { "type": "string" },
|
||||
"age": { "type": "number" }
|
||||
},
|
||||
"required": ["name", "age"]
|
||||
}
|
||||
|
||||
### Example 2:
|
||||
**User Input:** I want to store information about books including title, author, publication year and optional page count
|
||||
**JSON Schema Output:**
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"title": { "type": "string" },
|
||||
"author": { "type": "string" },
|
||||
"publicationYear": { "type": "integer" },
|
||||
"pageCount": { "type": "integer" }
|
||||
},
|
||||
"required": ["title", "author", "publicationYear"]
|
||||
}
|
||||
|
||||
### Example 3:
|
||||
**User Input:** Create a schema for user profiles with email, password, and age (must be at least 18)
|
||||
**JSON Schema Output:**
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"email": {
|
||||
"type": "string",
|
||||
"format": "email"
|
||||
},
|
||||
"password": {
|
||||
"type": "string",
|
||||
"minLength": 8
|
||||
},
|
||||
"age": {
|
||||
"type": "integer",
|
||||
"minimum": 18
|
||||
}
|
||||
},
|
||||
"required": ["email", "password", "age"]
|
||||
}
|
||||
|
||||
### Example 4:
|
||||
**User Input:** I need album schema, the ablum has songs, and each song has name, duration, and artist.
|
||||
**JSON Schema Output:**
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"songs": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"duration": {
|
||||
"type": "string"
|
||||
},
|
||||
"aritst": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"id",
|
||||
"duration",
|
||||
"aritst"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"songs"
|
||||
]
|
||||
}
|
||||
|
||||
Now, generate a JSON Schema based on my description
|
||||
""" # noqa: E501
|
||||
|
||||
STRUCTURED_OUTPUT_PROMPT = """You’re a helpful AI assistant. You could answer questions and output in JSON format.
|
||||
constraints:
|
||||
- You must output in JSON format.
|
||||
- Do not output boolean value, use string type instead.
|
||||
- Do not output integer or float value, use number type instead.
|
||||
eg:
|
||||
Here is the JSON schema:
|
||||
{"additionalProperties": false, "properties": {"age": {"type": "number"}, "name": {"type": "string"}}, "required": ["name", "age"], "type": "object"}
|
||||
|
||||
Here is the user's question:
|
||||
My name is John Doe and I am 30 years old.
|
||||
|
||||
output:
|
||||
{"name": "John Doe", "age": 30}
|
||||
Here is the JSON schema:
|
||||
{{schema}}
|
||||
""" # noqa: E501
|
||||
|
||||
LLM_MODIFY_PROMPT_SYSTEM = """
|
||||
Both your input and output should be in JSON format.
|
||||
|
||||
! Below is the schema for input content !
|
||||
{
|
||||
"type": "object",
|
||||
"description": "The user is trying to process some content with a prompt, but the output is not as expected. They hope to achieve their goal by modifying the prompt.",
|
||||
"properties": {
|
||||
"current": {
|
||||
"type": "string",
|
||||
"description": "The prompt before modification, where placeholders {{}} will be replaced with actual values for the large language model. The content in the placeholders should not be changed."
|
||||
},
|
||||
"last_run": {
|
||||
"type": "object",
|
||||
"description": "The output result from the large language model after receiving the prompt.",
|
||||
},
|
||||
"instruction": {
|
||||
"type": "string",
|
||||
"description": "User's instruction to edit the current prompt"
|
||||
},
|
||||
"ideal_output": {
|
||||
"type": "string",
|
||||
"description": "The ideal output that the user expects from the large language model after modifying the prompt. You should compare the last output with the ideal output and make changes to the prompt to achieve the goal."
|
||||
}
|
||||
}
|
||||
}
|
||||
! Above is the schema for input content !
|
||||
|
||||
! Below is the schema for output content !
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Your feedback to the user after they provide modification suggestions.",
|
||||
"properties": {
|
||||
"modified": {
|
||||
"type": "string",
|
||||
"description": "Your modified prompt. You should change the original prompt as little as possible to achieve the goal. Keep the language of prompt if not asked to change"
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "Your feedback to the user, in the user's language, explaining what you did and your thought process in text, providing sufficient emotional value to the user."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"modified",
|
||||
"message"
|
||||
]
|
||||
}
|
||||
! Above is the schema for output content !
|
||||
|
||||
Your output must strictly follow the schema format, do not output any content outside of the JSON body.
|
||||
""" # noqa: E501
|
||||
|
||||
LLM_MODIFY_CODE_SYSTEM = """
|
||||
Both your input and output should be in JSON format.
|
||||
|
||||
! Below is the schema for input content !
|
||||
{
|
||||
"type": "object",
|
||||
"description": "The user is trying to process some data with a code snippet, but the result is not as expected. They hope to achieve their goal by modifying the code.",
|
||||
"properties": {
|
||||
"current": {
|
||||
"type": "string",
|
||||
"description": "The code before modification."
|
||||
},
|
||||
"last_run": {
|
||||
"type": "object",
|
||||
"description": "The result of the code.",
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "User's instruction to edit the current code"
|
||||
}
|
||||
}
|
||||
}
|
||||
! Above is the schema for input content !
|
||||
|
||||
! Below is the schema for output content !
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Your feedback to the user after they provide modification suggestions.",
|
||||
"properties": {
|
||||
"modified": {
|
||||
"type": "string",
|
||||
"description": "Your modified code. You should change the original code as little as possible to achieve the goal. Keep the programming language of code if not asked to change"
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
"description": "Your feedback to the user, in the user's language, explaining what you did and your thought process in text, providing sufficient emotional value to the user."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"modified",
|
||||
"message"
|
||||
]
|
||||
}
|
||||
! Above is the schema for output content !
|
||||
|
||||
When you are modifying the code, you should remember:
|
||||
- Do not use print, this not work in dify sandbox.
|
||||
- Do not try dangerous call like deleting files. It's PROHIBITED.
|
||||
- Do not use any library that is not built-in in with Python.
|
||||
- Get inputs from the parameters of the function and have explicit type annotations.
|
||||
- Write proper imports at the top of the code.
|
||||
- Use return statement to return the result.
|
||||
- You should return a `dict`. If you need to return a `result: str`, you should `return {"result": result}`.
|
||||
Your output must strictly follow the schema format, do not output any content outside of the JSON body.
|
||||
""" # noqa: E501
|
||||
|
||||
INSTRUCTION_GENERATE_TEMPLATE_PROMPT = """The output of this prompt is not as expected: {{#last_run#}}.
|
||||
You should edit the prompt according to the IDEAL OUTPUT."""
|
||||
|
||||
INSTRUCTION_GENERATE_TEMPLATE_CODE = """Please fix the errors in the {{#error_message#}}."""
|
||||
Reference in New Issue
Block a user