This commit is contained in:
2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions

View File

View File

@@ -0,0 +1,3 @@
from .code_executor import CodeExecutor, CodeLanguage
__all__ = ["CodeExecutor", "CodeLanguage"]

View File

@@ -0,0 +1,161 @@
import logging
from collections.abc import Mapping
from enum import StrEnum
from threading import Lock
from typing import Any
import httpx
from pydantic import BaseModel
from yarl import URL
from configs import dify_config
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
from core.helper.code_executor.template_transformer import TemplateTransformer
from core.helper.http_client_pooling import get_pooled_http_client
logger = logging.getLogger(__name__)
code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT))
CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY
_CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits(
max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS,
max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS,
keepalive_expiry=dify_config.CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY,
)
_CODE_EXECUTOR_CLIENT_KEY = "code_executor:http_client"
class CodeExecutionError(Exception):
pass
class CodeExecutionResponse(BaseModel):
class Data(BaseModel):
stdout: str | None = None
error: str | None = None
code: int
message: str
data: Data
class CodeLanguage(StrEnum):
PYTHON3 = "python3"
JINJA2 = "jinja2"
JAVASCRIPT = "javascript"
def _build_code_executor_client() -> httpx.Client:
return httpx.Client(
verify=CODE_EXECUTION_SSL_VERIFY,
limits=_CODE_EXECUTOR_CLIENT_LIMITS,
)
class CodeExecutor:
dependencies_cache: dict[str, str] = {}
dependencies_cache_lock = Lock()
code_template_transformers: dict[CodeLanguage, type[TemplateTransformer]] = {
CodeLanguage.PYTHON3: Python3TemplateTransformer,
CodeLanguage.JINJA2: Jinja2TemplateTransformer,
CodeLanguage.JAVASCRIPT: NodeJsTemplateTransformer,
}
code_language_to_running_language = {
CodeLanguage.JAVASCRIPT: "nodejs",
CodeLanguage.JINJA2: CodeLanguage.PYTHON3,
CodeLanguage.PYTHON3: CodeLanguage.PYTHON3,
}
supported_dependencies_languages: set[CodeLanguage] = {CodeLanguage.PYTHON3}
@classmethod
def execute_code(cls, language: CodeLanguage, preload: str, code: str) -> str:
"""
Execute code
:param language: code language
:param preload: the preload script
:param code: code
:return:
"""
url = code_execution_endpoint_url / "v1" / "sandbox" / "run"
headers = {"X-Api-Key": dify_config.CODE_EXECUTION_API_KEY}
data = {
"language": cls.code_language_to_running_language.get(language),
"code": code,
"preload": preload,
"enable_network": True,
}
timeout = httpx.Timeout(
connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
pool=None,
)
client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client)
try:
response = client.post(
str(url),
json=data,
headers=headers,
timeout=timeout,
)
if response.status_code == 503:
raise CodeExecutionError("Code execution service is unavailable")
elif response.status_code != 200:
raise Exception(
f"Failed to execute code, got status code {response.status_code},"
f" please check if the sandbox service is running"
)
except CodeExecutionError as e:
raise e
except Exception as e:
raise CodeExecutionError(
"Failed to execute code, which is likely a network issue,"
" please check if the sandbox service is running."
f" ( Error: {str(e)} )"
)
try:
response_data = response.json()
except Exception as e:
raise CodeExecutionError("Failed to parse response") from e
if (code := response_data.get("code")) != 0:
raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}")
response_code = CodeExecutionResponse.model_validate(response_data)
if response_code.data.error:
raise CodeExecutionError(response_code.data.error)
return response_code.data.stdout or ""
@classmethod
def execute_workflow_code_template(cls, language: CodeLanguage, code: str, inputs: Mapping[str, Any]):
"""
Execute code
:param language: code language
:param code: code
:param inputs: inputs
:return:
"""
template_transformer = cls.code_template_transformers.get(language)
if not template_transformer:
raise CodeExecutionError(f"Unsupported language {language}")
runner, preload = template_transformer.transform_caller(code, inputs)
try:
response = cls.execute_code(language, preload, runner)
except CodeExecutionError as e:
raise e
return template_transformer.transform_response(response)

View File

@@ -0,0 +1,61 @@
from abc import ABC, abstractmethod
from collections.abc import Mapping, Sequence
from typing import TypedDict
from pydantic import BaseModel
class VariableConfig(TypedDict):
variable: str
value_selector: Sequence[str | int]
class OutputConfig(TypedDict):
type: str
children: None
class CodeConfig(TypedDict):
variables: Sequence[VariableConfig]
code_language: str
code: str
outputs: Mapping[str, OutputConfig]
class DefaultConfig(TypedDict):
type: str
config: CodeConfig
class CodeNodeProvider(BaseModel, ABC):
@staticmethod
@abstractmethod
def get_language() -> str:
pass
@classmethod
def is_accept_language(cls, language: str) -> bool:
return language == cls.get_language()
@classmethod
@abstractmethod
def get_default_code(cls) -> str:
"""
get default code in specific programming language for the code node
"""
pass
@classmethod
def get_default_config(cls) -> DefaultConfig:
return {
"type": "code",
"config": {
"variables": [
{"variable": "arg1", "value_selector": []},
{"variable": "arg2", "value_selector": []},
],
"code_language": cls.get_language(),
"code": cls.get_default_code(),
"outputs": {"result": {"type": "string", "children": None}},
},
}

View File

@@ -0,0 +1,22 @@
from textwrap import dedent
from core.helper.code_executor.code_executor import CodeLanguage
from core.helper.code_executor.code_node_provider import CodeNodeProvider
class JavascriptCodeProvider(CodeNodeProvider):
@staticmethod
def get_language() -> str:
return CodeLanguage.JAVASCRIPT
@classmethod
def get_default_code(cls) -> str:
return dedent(
"""
function main({arg1, arg2}) {
return {
result: arg1 + arg2
}
}
"""
)

View File

@@ -0,0 +1,22 @@
from textwrap import dedent
from core.helper.code_executor.template_transformer import TemplateTransformer
class NodeJsTemplateTransformer(TemplateTransformer):
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f""" {cls._code_placeholder}
// decode and prepare input object
var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8'))
// execute main function
var output_obj = main(inputs_obj)
// convert output to json and print
var output_json = JSON.stringify(output_obj)
var result = `<<RESULT>>${{output_json}}<<RESULT>>`
console.log(result)
""")
return runner_script

View File

@@ -0,0 +1,16 @@
from collections.abc import Mapping
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
class Jinja2Formatter:
@classmethod
def format(cls, template: str, inputs: Mapping[str, str]) -> str:
"""
Format template
:param template: template
:param inputs: inputs
:return:
"""
result = CodeExecutor.execute_workflow_code_template(language=CodeLanguage.JINJA2, code=template, inputs=inputs)
return str(result.get("result", ""))

View File

@@ -0,0 +1,57 @@
from textwrap import dedent
from core.helper.code_executor.template_transformer import TemplateTransformer
class Jinja2TemplateTransformer(TemplateTransformer):
@classmethod
def transform_response(cls, response: str):
"""
Transform response to dict
:param response: response
:return:
"""
return {"result": cls.extract_result_str_from_response(response)}
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f"""
# declare main function
def main(**inputs):
import jinja2
template = jinja2.Template('''{cls._code_placeholder}''')
return template.render(**inputs)
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output = main(**inputs_obj)
# convert output and print
result = f'''<<RESULT>>{{output}}<<RESULT>>'''
print(result)
""")
return runner_script
@classmethod
def get_preload_script(cls) -> str:
preload_script = dedent("""
import jinja2
from base64 import b64decode
def _jinja2_preload_():
# prepare jinja2 environment, load template and render before to avoid sandbox issue
template = jinja2.Template('{{s}}')
template.render(s='a')
if __name__ == '__main__':
_jinja2_preload_()
""")
return preload_script

View File

@@ -0,0 +1,21 @@
from textwrap import dedent
from core.helper.code_executor.code_executor import CodeLanguage
from core.helper.code_executor.code_node_provider import CodeNodeProvider
class Python3CodeProvider(CodeNodeProvider):
@staticmethod
def get_language() -> str:
return CodeLanguage.PYTHON3
@classmethod
def get_default_code(cls) -> str:
return dedent(
"""
def main(arg1: str, arg2: str):
return {
"result": arg1 + arg2,
}
"""
)

View File

@@ -0,0 +1,25 @@
from textwrap import dedent
from core.helper.code_executor.template_transformer import TemplateTransformer
class Python3TemplateTransformer(TemplateTransformer):
@classmethod
def get_runner_script(cls) -> str:
runner_script = dedent(f""" {cls._code_placeholder}
import json
from base64 import b64decode
# decode and prepare input dict
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
# execute main function
output_obj = main(**inputs_obj)
# convert output to json and print
output_json = json.dumps(output_obj, indent=4)
result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
print(result)
""")
return runner_script

View File

@@ -0,0 +1,114 @@
import json
import re
from abc import ABC, abstractmethod
from base64 import b64encode
from collections.abc import Mapping
from typing import Any
from core.variables.utils import dumps_with_segments
class TemplateTransformer(ABC):
_code_placeholder: str = "{{code}}"
_inputs_placeholder: str = "{{inputs}}"
_result_tag: str = "<<RESULT>>"
@classmethod
def transform_caller(cls, code: str, inputs: Mapping[str, Any]) -> tuple[str, str]:
"""
Transform code to python runner
:param code: code
:param inputs: inputs
:return: runner, preload
"""
runner_script = cls.assemble_runner_script(code, inputs)
preload_script = cls.get_preload_script()
return runner_script, preload_script
@classmethod
def extract_result_str_from_response(cls, response: str):
result = re.search(rf"{cls._result_tag}(.*){cls._result_tag}", response, re.DOTALL)
if not result:
raise ValueError(f"Failed to parse result: no result tag found in response. Response: {response[:200]}...")
return result.group(1)
@classmethod
def transform_response(cls, response: str) -> Mapping[str, Any]:
"""
Transform response to dict
:param response: response
:return:
"""
try:
result_str = cls.extract_result_str_from_response(response)
result = json.loads(result_str)
except json.JSONDecodeError as e:
raise ValueError(f"Failed to parse JSON response: {str(e)}.")
except ValueError as e:
# Re-raise ValueError from extract_result_str_from_response
raise e
except Exception as e:
raise ValueError(f"Unexpected error during response transformation: {str(e)}")
if not isinstance(result, dict):
raise ValueError(f"Result must be a dict, got {type(result).__name__}")
if not all(isinstance(k, str) for k in result):
raise ValueError("Result keys must be strings")
# Post-process the result to convert scientific notation strings back to numbers
result = cls._post_process_result(result)
return result
@classmethod
def _post_process_result(cls, result: dict[Any, Any]) -> dict[Any, Any]:
"""
Post-process the result to convert scientific notation strings back to numbers
"""
def convert_scientific_notation(value):
if isinstance(value, str):
# Check if the string looks like scientific notation
if re.match(r"^-?\d+\.?\d*e[+-]\d+$", value, re.IGNORECASE):
try:
return float(value)
except ValueError:
pass
elif isinstance(value, dict):
return {k: convert_scientific_notation(v) for k, v in value.items()}
elif isinstance(value, list):
return [convert_scientific_notation(v) for v in value]
return value
return convert_scientific_notation(result) # type: ignore[no-any-return]
@classmethod
@abstractmethod
def get_runner_script(cls) -> str:
"""
Get runner script
"""
pass
@classmethod
def serialize_inputs(cls, inputs: Mapping[str, Any]) -> str:
inputs_json_str = dumps_with_segments(inputs, ensure_ascii=False).encode()
input_base64_encoded = b64encode(inputs_json_str).decode("utf-8")
return input_base64_encoded
@classmethod
def assemble_runner_script(cls, code: str, inputs: Mapping[str, Any]) -> str:
# assemble runner script
script = cls.get_runner_script()
script = script.replace(cls._code_placeholder, code)
inputs_str = cls.serialize_inputs(inputs)
script = script.replace(cls._inputs_placeholder, inputs_str)
return script
@classmethod
def get_preload_script(cls) -> str:
"""
Get preload script
"""
return ""

View File

@@ -0,0 +1,75 @@
"""
Credential utility functions for checking credential existence and policy compliance.
"""
from services.enterprise.plugin_manager_service import PluginCredentialType
def is_credential_exists(credential_id: str, credential_type: "PluginCredentialType") -> bool:
"""
Check if the credential still exists in the database.
:param credential_id: The credential ID to check
:param credential_type: The type of credential (MODEL or TOOL)
:return: True if credential exists, False otherwise
"""
from sqlalchemy import select
from sqlalchemy.orm import Session
from extensions.ext_database import db
from models.provider import ProviderCredential, ProviderModelCredential
from models.tools import BuiltinToolProvider
with Session(db.engine) as session:
if credential_type == PluginCredentialType.MODEL:
# Check both pre-defined and custom model credentials using a single UNION query
stmt = (
select(ProviderCredential.id)
.where(ProviderCredential.id == credential_id)
.union(select(ProviderModelCredential.id).where(ProviderModelCredential.id == credential_id))
)
return session.scalar(stmt) is not None
if credential_type == PluginCredentialType.TOOL:
return (
session.scalar(select(BuiltinToolProvider.id).where(BuiltinToolProvider.id == credential_id))
is not None
)
return False
def check_credential_policy_compliance(
credential_id: str, provider: str, credential_type: "PluginCredentialType", check_existence: bool = True
) -> None:
"""
Check credential policy compliance for the given credential ID.
:param credential_id: The credential ID to check
:param provider: The provider name
:param credential_type: The type of credential (MODEL or TOOL)
:param check_existence: Whether to check if credential exists in database first
:raises ValueError: If credential policy compliance check fails
"""
from services.enterprise.plugin_manager_service import (
CheckCredentialPolicyComplianceRequest,
PluginManagerService,
)
from services.feature_service import FeatureService
if not FeatureService.get_system_features().plugin_manager.enabled or not credential_id:
return
# Check if credential exists in database first (if requested)
if check_existence:
if not is_credential_exists(credential_id, credential_type):
raise ValueError(f"Credential with id {credential_id} for provider {provider} not found.")
# Check policy compliance
PluginManagerService.check_credential_policy_compliance(
CheckCredentialPolicyComplianceRequest(
dify_credential_id=credential_id,
provider=provider,
credential_type=credential_type,
)
)

View File

@@ -0,0 +1,17 @@
from core.helper import ssrf_proxy
def download_with_size_limit(url, max_download_size: int, **kwargs):
response = ssrf_proxy.get(url, follow_redirects=True, **kwargs)
if response.status_code == 404:
raise ValueError("file not found")
total_size = 0
chunks = []
for chunk in response.iter_bytes():
total_size += len(chunk)
if total_size > max_download_size:
raise ValueError("Max file size reached")
chunks.append(chunk)
content = b"".join(chunks)
return content

View File

@@ -0,0 +1,44 @@
import base64
from libs import rsa
def obfuscated_token(token: str) -> str:
if not token:
return token
if len(token) <= 8:
return "*" * 20
return token[:6] + "*" * 12 + token[-2:]
def full_mask_token(token_length=20):
return "*" * token_length
def encrypt_token(tenant_id: str, token: str):
from extensions.ext_database import db
from models.account import Tenant
if not (tenant := db.session.query(Tenant).where(Tenant.id == tenant_id).first()):
raise ValueError(f"Tenant with id {tenant_id} not found")
assert tenant.encrypt_public_key is not None
encrypted_token = rsa.encrypt(token, tenant.encrypt_public_key)
return base64.b64encode(encrypted_token).decode()
def decrypt_token(tenant_id: str, token: str) -> str:
return rsa.decrypt(base64.b64decode(token), tenant_id)
def batch_decrypt_token(tenant_id: str, tokens: list[str]):
rsa_key, cipher_rsa = rsa.get_decrypt_decoding(tenant_id)
return [rsa.decrypt_token_with_decoding(base64.b64decode(token), rsa_key, cipher_rsa) for token in tokens]
def get_decrypt_decoding(tenant_id: str):
return rsa.get_decrypt_decoding(tenant_id)
def decrypt_token_with_decoding(token: str, rsa_key, cipher_rsa):
return rsa.decrypt_token_with_decoding(base64.b64decode(token), rsa_key, cipher_rsa)

View File

@@ -0,0 +1,59 @@
"""HTTP client pooling utilities."""
from __future__ import annotations
import atexit
import threading
from collections.abc import Callable
import httpx
ClientBuilder = Callable[[], httpx.Client]
class HttpClientPoolFactory:
"""Thread-safe factory that maintains reusable HTTP client instances."""
def __init__(self) -> None:
self._clients: dict[str, httpx.Client] = {}
self._lock = threading.Lock()
def get_or_create(self, key: str, builder: ClientBuilder) -> httpx.Client:
"""Return a pooled client associated with ``key`` creating it on demand."""
client = self._clients.get(key)
if client is not None:
return client
with self._lock:
client = self._clients.get(key)
if client is None:
client = builder()
self._clients[key] = client
return client
def close_all(self) -> None:
"""Close all pooled clients and clear the pool."""
with self._lock:
for client in self._clients.values():
client.close()
self._clients.clear()
_factory = HttpClientPoolFactory()
def get_pooled_http_client(key: str, builder: ClientBuilder) -> httpx.Client:
"""Return a pooled client for the given ``key`` using ``builder`` when missing."""
return _factory.get_or_create(key, builder)
def close_all_pooled_clients() -> None:
"""Close every client created through the pooling factory."""
_factory.close_all()
def _register_shutdown_hook() -> None:
atexit.register(close_all_pooled_clients)
_register_shutdown_hook()

View File

@@ -0,0 +1,66 @@
from collections.abc import Sequence
import httpx
from yarl import URL
from configs import dify_config
from core.helper.download import download_with_size_limit
from core.plugin.entities.marketplace import MarketplacePluginDeclaration
marketplace_api_url = URL(str(dify_config.MARKETPLACE_API_URL))
def get_plugin_pkg_url(plugin_unique_identifier: str) -> str:
return str((marketplace_api_url / "api/v1/plugins/download").with_query(unique_identifier=plugin_unique_identifier))
def download_plugin_pkg(plugin_unique_identifier: str):
return download_with_size_limit(get_plugin_pkg_url(plugin_unique_identifier), dify_config.PLUGIN_MAX_PACKAGE_SIZE)
def batch_fetch_plugin_manifests(plugin_ids: list[str]) -> Sequence[MarketplacePluginDeclaration]:
if len(plugin_ids) == 0:
return []
url = str(marketplace_api_url / "api/v1/plugins/batch")
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
response.raise_for_status()
return [MarketplacePluginDeclaration.model_validate(plugin) for plugin in response.json()["data"]["plugins"]]
def batch_fetch_plugin_by_ids(plugin_ids: list[str]) -> list[dict]:
if not plugin_ids:
return []
url = str(marketplace_api_url / "api/v1/plugins/batch")
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
response.raise_for_status()
data = response.json()
return data.get("data", {}).get("plugins", [])
def batch_fetch_plugin_manifests_ignore_deserialization_error(
plugin_ids: list[str],
) -> Sequence[MarketplacePluginDeclaration]:
if len(plugin_ids) == 0:
return []
url = str(marketplace_api_url / "api/v1/plugins/batch")
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
response.raise_for_status()
result: list[MarketplacePluginDeclaration] = []
for plugin in response.json()["data"]["plugins"]:
try:
result.append(MarketplacePluginDeclaration.model_validate(plugin))
except Exception:
pass
return result
def record_install_plugin_event(plugin_unique_identifier: str):
url = str(marketplace_api_url / "api/v1/stats/plugins/install_count")
response = httpx.post(url, json={"unique_identifier": plugin_unique_identifier})
response.raise_for_status()

View File

@@ -0,0 +1,51 @@
import json
from enum import StrEnum
from json import JSONDecodeError
from extensions.ext_redis import redis_client
class ProviderCredentialsCacheType(StrEnum):
PROVIDER = "provider"
MODEL = "provider_model"
LOAD_BALANCING_MODEL = "load_balancing_provider_model"
class ProviderCredentialsCache:
def __init__(self, tenant_id: str, identity_id: str, cache_type: ProviderCredentialsCacheType):
self.cache_key = f"{cache_type}_credentials:tenant_id:{tenant_id}:id:{identity_id}"
def get(self) -> dict | None:
"""
Get cached model provider credentials.
:return:
"""
cached_provider_credentials = redis_client.get(self.cache_key)
if cached_provider_credentials:
try:
cached_provider_credentials = cached_provider_credentials.decode("utf-8")
cached_provider_credentials = json.loads(cached_provider_credentials)
except JSONDecodeError:
return None
return dict(cached_provider_credentials)
else:
return None
def set(self, credentials: dict):
"""
Cache model provider credentials.
:param credentials: provider credentials
:return:
"""
redis_client.setex(self.cache_key, 86400, json.dumps(credentials))
def delete(self):
"""
Delete cached model provider credentials.
:return:
"""
redis_client.delete(self.cache_key)

View File

@@ -0,0 +1,61 @@
import logging
import secrets
from typing import cast
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
from core.entities import DEFAULT_PLUGIN_ID
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
from extensions.ext_hosting_provider import hosting_configuration
from models.provider import ProviderType
logger = logging.getLogger(__name__)
def check_moderation(tenant_id: str, model_config: ModelConfigWithCredentialsEntity, text: str) -> bool:
moderation_config = hosting_configuration.moderation_config
openai_provider_name = f"{DEFAULT_PLUGIN_ID}/openai/openai"
if (
moderation_config
and moderation_config.enabled is True
and openai_provider_name in hosting_configuration.provider_map
and hosting_configuration.provider_map[openai_provider_name].enabled is True
):
using_provider_type = model_config.provider_model_bundle.configuration.using_provider_type
provider_name = model_config.provider
if using_provider_type == ProviderType.SYSTEM and provider_name in moderation_config.providers:
hosting_openai_config = hosting_configuration.provider_map[openai_provider_name]
if hosting_openai_config.credentials is None:
return False
# 2000 text per chunk
length = 2000
text_chunks = [text[i : i + length] for i in range(0, len(text), length)]
if len(text_chunks) == 0:
return True
text_chunk = secrets.choice(text_chunks)
try:
model_provider_factory = ModelProviderFactory(tenant_id)
# Get model instance of LLM
model_type_instance = model_provider_factory.get_model_type_instance(
provider=openai_provider_name, model_type=ModelType.MODERATION
)
model_type_instance = cast(ModerationModel, model_type_instance)
moderation_result = model_type_instance.invoke(
model="omni-moderation-latest", credentials=hosting_openai_config.credentials, text=text_chunk
)
if moderation_result is True:
return True
except Exception:
logger.exception("Fails to check moderation, provider_name: %s", provider_name)
raise InvokeBadRequestError("Rate limit exceeded, please try again later.")
return False

View File

@@ -0,0 +1,65 @@
import importlib.util
import logging
import sys
from types import ModuleType
from typing import AnyStr
logger = logging.getLogger(__name__)
def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_lazy_loader: bool = False) -> ModuleType:
"""
Importing a module from the source file directly
"""
try:
existed_spec = importlib.util.find_spec(module_name)
if existed_spec:
spec = existed_spec
if not spec.loader:
raise Exception(f"Failed to load module {module_name} from {py_file_path!r}")
else:
# Refer to: https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
# FIXME: mypy does not support the type of spec.loader
spec = importlib.util.spec_from_file_location(module_name, py_file_path) # type: ignore[assignment]
if not spec or not spec.loader:
raise Exception(f"Failed to load module {module_name} from {py_file_path!r}")
if use_lazy_loader:
# Refer to: https://docs.python.org/3/library/importlib.html#implementing-lazy-imports
spec.loader = importlib.util.LazyLoader(spec.loader)
module = importlib.util.module_from_spec(spec)
if not existed_spec:
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
except Exception as e:
logger.exception("Failed to load module %s from script file '%s'", module_name, repr(py_file_path))
raise e
def get_subclasses_from_module(mod: ModuleType, parent_type: type) -> list[type]:
"""
Get all the subclasses of the parent type from the module
"""
classes = [
x for _, x in vars(mod).items() if isinstance(x, type) and x != parent_type and issubclass(x, parent_type)
]
return classes
def load_single_subclass_from_source(
*, module_name: str, script_path: str, parent_type: type, use_lazy_loader: bool = False
) -> type:
"""
Load a single subclass from the source
"""
module = import_module_from_source(
module_name=module_name, py_file_path=script_path, use_lazy_loader=use_lazy_loader
)
subclasses = get_subclasses_from_module(module, parent_type)
match len(subclasses):
case 1:
return subclasses[0]
case 0:
raise Exception(f"Missing subclass of {parent_type.__name__} in {script_path!r}")
case _:
raise Exception(f"Multiple subclasses of {parent_type.__name__} in {script_path!r}")

View File

@@ -0,0 +1,42 @@
import logging
import re
from collections.abc import Sequence
from typing import Any
from core.plugin.entities.plugin_daemon import CredentialType
logger = logging.getLogger(__name__)
def generate_provider_name(
providers: Sequence[Any], credential_type: CredentialType, fallback_context: str = "provider"
) -> str:
try:
return generate_incremental_name(
[provider.name for provider in providers],
f"{credential_type.get_name()}",
)
except Exception as e:
logger.warning("Error generating next provider name for %r: %r", fallback_context, e)
return f"{credential_type.get_name()} 1"
def generate_incremental_name(
names: Sequence[str],
default_pattern: str,
) -> str:
pattern = rf"^{re.escape(default_pattern)}\s+(\d+)$"
numbers = []
for name in names:
if not name:
continue
match = re.match(pattern, name.strip())
if match:
numbers.append(int(match.group(1)))
if not numbers:
return f"{default_pattern} 1"
max_number = max(numbers)
return f"{default_pattern} {max_number + 1}"

View File

@@ -0,0 +1,133 @@
import os
from collections import OrderedDict
from collections.abc import Callable
from functools import lru_cache
from typing import TypeVar
from configs import dify_config
from core.tools.utils.yaml_utils import load_yaml_file_cached
@lru_cache(maxsize=128)
def get_position_map(folder_path: str, *, file_name: str = "_position.yaml") -> dict[str, int]:
"""
Get the mapping from name to index from a YAML file
:param folder_path:
:param file_name: the YAML file name, default to '_position.yaml'
:return: a dict with name as key and index as value
"""
# FIXME(-LAN-): Cache position maps to prevent file descriptor exhaustion during high-load benchmarks
position_file_path = os.path.join(folder_path, file_name)
try:
yaml_content = load_yaml_file_cached(file_path=position_file_path)
except Exception:
yaml_content = []
positions = [item.strip() for item in yaml_content if item and isinstance(item, str) and item.strip()]
return {name: index for index, name in enumerate(positions)}
@lru_cache(maxsize=128)
def get_tool_position_map(folder_path: str, file_name: str = "_position.yaml") -> dict[str, int]:
"""
Get the mapping for tools from name to index from a YAML file.
:param folder_path:
:param file_name: the YAML file name, default to '_position.yaml'
:return: a dict with name as key and index as value
"""
position_map = get_position_map(folder_path, file_name=file_name)
return pin_position_map(
position_map,
pin_list=dify_config.POSITION_TOOL_PINS_LIST,
)
def pin_position_map(original_position_map: dict[str, int], pin_list: list[str]) -> dict[str, int]:
"""
Pin the items in the pin list to the beginning of the position map.
Overall logic: exclude > include > pin
:param original_position_map: the position map to be sorted and filtered
:param pin_list: the list of pins to be put at the beginning
:return: the sorted position map
"""
positions = sorted(original_position_map.keys(), key=lambda x: original_position_map[x])
# Add pins to position map
position_map = {name: idx for idx, name in enumerate(pin_list)}
# Add remaining positions to position map
start_idx = len(position_map)
for name in positions:
if name not in position_map:
position_map[name] = start_idx
start_idx += 1
return position_map
T = TypeVar("T")
def is_filtered(
include_set: set[str],
exclude_set: set[str],
data: T,
name_func: Callable[[T], str],
) -> bool:
"""
Check if the object should be filtered out.
Overall logic: exclude > include > pin
:param include_set: the set of names to be included
:param exclude_set: the set of names to be excluded
:param name_func: the function to get the name of the object
:param data: the data to be filtered
:return: True if the object should be filtered out, False otherwise
"""
if not data:
return False
if not include_set and not exclude_set:
return False
name = name_func(data)
if name in exclude_set: # exclude_set is prioritized
return True
if include_set and name not in include_set: # filter out only if include_set is not empty
return True
return False
def sort_by_position_map(
position_map: dict[str, int],
data: list[T],
name_func: Callable[[T], str],
):
"""
Sort the objects by the position map.
If the name of the object is not in the position map, it will be put at the end.
:param position_map: the map holding positions in the form of {name: index}
:param name_func: the function to get the name of the object
:param data: the data to be sorted
:return: the sorted objects
"""
if not position_map or not data:
return data
return sorted(data, key=lambda x: position_map.get(name_func(x), float("inf")))
def sort_to_dict_by_position_map(
position_map: dict[str, int],
data: list[T],
name_func: Callable[[T], str],
):
"""
Sort the objects into a ordered dict by the position map.
If the name of the object is not in the position map, it will be put at the end.
:param position_map: the map holding positions in the form of {name: index}
:param name_func: the function to get the name of the object
:param data: the data to be sorted
:return: an OrderedDict with the sorted pairs of name and object
"""
sorted_items = sort_by_position_map(position_map, data, name_func)
return OrderedDict((name_func(item), item) for item in sorted_items)

View File

@@ -0,0 +1,84 @@
import json
from abc import ABC, abstractmethod
from json import JSONDecodeError
from typing import Any
from extensions.ext_redis import redis_client
class ProviderCredentialsCache(ABC):
"""Base class for provider credentials cache"""
def __init__(self, **kwargs):
self.cache_key = self._generate_cache_key(**kwargs)
@abstractmethod
def _generate_cache_key(self, **kwargs) -> str:
"""Generate cache key based on subclass implementation"""
pass
def get(self) -> dict | None:
"""Get cached provider credentials"""
cached_credentials = redis_client.get(self.cache_key)
if cached_credentials:
try:
cached_credentials = cached_credentials.decode("utf-8")
return dict(json.loads(cached_credentials))
except JSONDecodeError:
return None
return None
def set(self, config: dict[str, Any]):
"""Cache provider credentials"""
redis_client.setex(self.cache_key, 86400, json.dumps(config))
def delete(self):
"""Delete cached provider credentials"""
redis_client.delete(self.cache_key)
class SingletonProviderCredentialsCache(ProviderCredentialsCache):
"""Cache for tool single provider credentials"""
def __init__(self, tenant_id: str, provider_type: str, provider_identity: str):
super().__init__(
tenant_id=tenant_id,
provider_type=provider_type,
provider_identity=provider_identity,
)
def _generate_cache_key(self, **kwargs) -> str:
tenant_id = kwargs["tenant_id"]
provider_type = kwargs["provider_type"]
identity_name = kwargs["provider_identity"]
identity_id = f"{provider_type}.{identity_name}"
return f"{provider_type}_credentials:tenant_id:{tenant_id}:id:{identity_id}"
class ToolProviderCredentialsCache(ProviderCredentialsCache):
"""Cache for tool provider credentials"""
def __init__(self, tenant_id: str, provider: str, credential_id: str):
super().__init__(tenant_id=tenant_id, provider=provider, credential_id=credential_id)
def _generate_cache_key(self, **kwargs) -> str:
tenant_id = kwargs["tenant_id"]
provider = kwargs["provider"]
credential_id = kwargs["credential_id"]
return f"tool_credentials:tenant_id:{tenant_id}:provider:{provider}:credential_id:{credential_id}"
class NoOpProviderCredentialCache:
"""No-op provider credential cache"""
def get(self) -> dict | None:
"""Get cached provider credentials"""
return None
def set(self, config: dict[str, Any]):
"""Cache provider credentials"""
pass
def delete(self):
"""Delete cached provider credentials"""
pass

View File

@@ -0,0 +1,129 @@
import contextlib
from collections.abc import Mapping
from copy import deepcopy
from typing import Any, Protocol
from core.entities.provider_entities import BasicProviderConfig
from core.helper import encrypter
class ProviderConfigCache(Protocol):
"""
Interface for provider configuration cache operations
"""
def get(self) -> dict[str, Any] | None:
"""Get cached provider configuration"""
...
def set(self, config: dict[str, Any]) -> None:
"""Cache provider configuration"""
...
def delete(self) -> None:
"""Delete cached provider configuration"""
...
class ProviderConfigEncrypter:
tenant_id: str
config: list[BasicProviderConfig]
provider_config_cache: ProviderConfigCache
def __init__(
self,
tenant_id: str,
config: list[BasicProviderConfig],
provider_config_cache: ProviderConfigCache,
):
self.tenant_id = tenant_id
self.config = config
self.provider_config_cache = provider_config_cache
def _deep_copy(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
"""
deep copy data
"""
return deepcopy(data)
def encrypt(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
"""
encrypt tool credentials with tenant id
return a deep copy of credentials with encrypted values
"""
data = dict(self._deep_copy(data))
# get fields need to be decrypted
fields = dict[str, BasicProviderConfig]()
for credential in self.config:
fields[credential.name] = credential
for field_name, field in fields.items():
if field.type == BasicProviderConfig.Type.SECRET_INPUT:
if field_name in data:
encrypted = encrypter.encrypt_token(self.tenant_id, data[field_name] or "")
data[field_name] = encrypted
return data
def mask_credentials(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
"""
mask credentials
return a deep copy of credentials with masked values
"""
data = dict(self._deep_copy(data))
# get fields need to be decrypted
fields = dict[str, BasicProviderConfig]()
for credential in self.config:
fields[credential.name] = credential
for field_name, field in fields.items():
if field.type == BasicProviderConfig.Type.SECRET_INPUT:
if field_name in data:
if len(data[field_name]) > 6:
data[field_name] = (
data[field_name][:2] + "*" * (len(data[field_name]) - 4) + data[field_name][-2:]
)
else:
data[field_name] = "*" * len(data[field_name])
return data
def mask_plugin_credentials(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
return self.mask_credentials(data)
def decrypt(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
"""
decrypt tool credentials with tenant id
return a deep copy of credentials with decrypted values
"""
cached_credentials = self.provider_config_cache.get()
if cached_credentials:
return cached_credentials
data = dict(self._deep_copy(data))
# get fields need to be decrypted
fields = dict[str, BasicProviderConfig]()
for credential in self.config:
fields[credential.name] = credential
for field_name, field in fields.items():
if field.type == BasicProviderConfig.Type.SECRET_INPUT:
if field_name in data:
with contextlib.suppress(Exception):
# if the value is None or empty string, skip decrypt
if not data[field_name]:
continue
data[field_name] = encrypter.decrypt_token(self.tenant_id, data[field_name])
self.provider_config_cache.set(dict(data))
return data
def create_provider_encrypter(tenant_id: str, config: list[BasicProviderConfig], cache: ProviderConfigCache):
return ProviderConfigEncrypter(tenant_id=tenant_id, config=config, provider_config_cache=cache), cache

View File

@@ -0,0 +1,138 @@
"""
Proxy requests to avoid SSRF
"""
import logging
import time
import httpx
from configs import dify_config
from core.helper.http_client_pooling import get_pooled_http_client
logger = logging.getLogger(__name__)
SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
BACKOFF_FACTOR = 0.5
STATUS_FORCELIST = [429, 500, 502, 503, 504]
_SSL_VERIFIED_POOL_KEY = "ssrf:verified"
_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
_SSRF_CLIENT_LIMITS = httpx.Limits(
max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
)
class MaxRetriesExceededError(ValueError):
"""Raised when the maximum number of retries is exceeded."""
pass
def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
return {
"http://": httpx.HTTPTransport(
proxy=dify_config.SSRF_PROXY_HTTP_URL,
),
"https://": httpx.HTTPTransport(
proxy=dify_config.SSRF_PROXY_HTTPS_URL,
),
}
def _build_ssrf_client(verify: bool) -> httpx.Client:
if dify_config.SSRF_PROXY_ALL_URL:
return httpx.Client(
proxy=dify_config.SSRF_PROXY_ALL_URL,
verify=verify,
limits=_SSRF_CLIENT_LIMITS,
)
if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
return httpx.Client(
mounts=_create_proxy_mounts(),
verify=verify,
limits=_SSRF_CLIENT_LIMITS,
)
return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)
def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
if not isinstance(ssl_verify_enabled, bool):
raise ValueError("SSRF client verify flag must be a boolean")
return get_pooled_http_client(
_SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
lambda: _build_ssrf_client(verify=ssl_verify_enabled),
)
def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
if "allow_redirects" in kwargs:
allow_redirects = kwargs.pop("allow_redirects")
if "follow_redirects" not in kwargs:
kwargs["follow_redirects"] = allow_redirects
if "timeout" not in kwargs:
kwargs["timeout"] = httpx.Timeout(
timeout=dify_config.SSRF_DEFAULT_TIME_OUT,
connect=dify_config.SSRF_DEFAULT_CONNECT_TIME_OUT,
read=dify_config.SSRF_DEFAULT_READ_TIME_OUT,
write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
)
# prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
client = _get_ssrf_client(verify_option)
retries = 0
while retries <= max_retries:
try:
response = client.request(method=method, url=url, **kwargs)
if response.status_code not in STATUS_FORCELIST:
return response
else:
logger.warning(
"Received status code %s for URL %s which is in the force list",
response.status_code,
url,
)
except httpx.RequestError as e:
logger.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e)
if max_retries == 0:
raise
retries += 1
if retries <= max_retries:
time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1)))
raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
return make_request("GET", url, max_retries=max_retries, **kwargs)
def post(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
return make_request("POST", url, max_retries=max_retries, **kwargs)
def put(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
return make_request("PUT", url, max_retries=max_retries, **kwargs)
def patch(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
return make_request("PATCH", url, max_retries=max_retries, **kwargs)
def delete(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
return make_request("DELETE", url, max_retries=max_retries, **kwargs)
def head(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
return make_request("HEAD", url, max_retries=max_retries, **kwargs)

View File

@@ -0,0 +1,49 @@
import json
from enum import StrEnum
from json import JSONDecodeError
from extensions.ext_redis import redis_client
class ToolParameterCacheType(StrEnum):
PARAMETER = "tool_parameter"
class ToolParameterCache:
def __init__(
self, tenant_id: str, provider: str, tool_name: str, cache_type: ToolParameterCacheType, identity_id: str
):
self.cache_key = (
f"{cache_type}_secret:tenant_id:{tenant_id}:provider:{provider}:tool_name:{tool_name}"
f":identity_id:{identity_id}"
)
def get(self) -> dict | None:
"""
Get cached model provider credentials.
:return:
"""
cached_tool_parameter = redis_client.get(self.cache_key)
if cached_tool_parameter:
try:
cached_tool_parameter = cached_tool_parameter.decode("utf-8")
cached_tool_parameter = json.loads(cached_tool_parameter)
except JSONDecodeError:
return None
return dict(cached_tool_parameter)
else:
return None
def set(self, parameters: dict):
"""Cache model provider credentials."""
redis_client.setex(self.cache_key, 86400, json.dumps(parameters))
def delete(self):
"""
Delete cached model provider credentials.
:return:
"""
redis_client.delete(self.cache_key)

View File

@@ -0,0 +1,105 @@
import contextlib
import re
from collections.abc import Mapping
from typing import Any
def is_valid_trace_id(trace_id: str) -> bool:
"""
Check if the trace_id is valid.
Requirements: 1-128 characters, only letters, numbers, '-', and '_'.
"""
return bool(re.match(r"^[a-zA-Z0-9\-_]{1,128}$", trace_id))
def get_external_trace_id(request: Any) -> str | None:
"""
Retrieve the trace_id from the request.
Priority:
1. header ('X-Trace-Id')
2. parameters
3. JSON body
4. Current OpenTelemetry context (if enabled)
5. OpenTelemetry traceparent header (if present and valid)
Returns None if no valid trace_id is provided.
"""
trace_id = request.headers.get("X-Trace-Id")
if not trace_id:
trace_id = request.args.get("trace_id")
if not trace_id and getattr(request, "is_json", False):
json_data = getattr(request, "json", None)
if json_data:
trace_id = json_data.get("trace_id")
if not trace_id:
trace_id = get_trace_id_from_otel_context()
if not trace_id:
traceparent = request.headers.get("traceparent")
if traceparent:
trace_id = parse_traceparent_header(traceparent)
if isinstance(trace_id, str) and is_valid_trace_id(trace_id):
return trace_id
return None
def extract_external_trace_id_from_args(args: Mapping[str, Any]):
"""
Extract 'external_trace_id' from args.
Returns a dict suitable for use in extras. Returns an empty dict if not found.
"""
trace_id = args.get("external_trace_id")
if trace_id:
return {"external_trace_id": trace_id}
return {}
def get_trace_id_from_otel_context() -> str | None:
"""
Retrieve the current trace ID from the active OpenTelemetry trace context.
Returns None if:
1. OpenTelemetry SDK is not installed or enabled.
2. There is no active span or trace context.
"""
try:
from opentelemetry.trace import SpanContext, get_current_span
from opentelemetry.trace.span import INVALID_TRACE_ID
span = get_current_span()
if not span:
return None
span_context: SpanContext = span.get_span_context()
if not span_context or span_context.trace_id == INVALID_TRACE_ID:
return None
trace_id_hex = f"{span_context.trace_id:032x}"
return trace_id_hex
except Exception:
return None
def parse_traceparent_header(traceparent: str) -> str | None:
"""
Parse the `traceparent` header to extract the trace_id.
Expected format:
'version-trace_id-span_id-flags'
Reference:
W3C Trace Context Specification: https://www.w3.org/TR/trace-context/
"""
with contextlib.suppress(Exception):
parts = traceparent.split("-")
if len(parts) == 4 and len(parts[1]) == 32:
return parts[1]
return None