dify
This commit is contained in:
0
dify/api/core/helper/__init__.py
Normal file
0
dify/api/core/helper/__init__.py
Normal file
3
dify/api/core/helper/code_executor/__init__.py
Normal file
3
dify/api/core/helper/code_executor/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .code_executor import CodeExecutor, CodeLanguage
|
||||
|
||||
__all__ = ["CodeExecutor", "CodeLanguage"]
|
||||
161
dify/api/core/helper/code_executor/code_executor.py
Normal file
161
dify/api/core/helper/code_executor/code_executor.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import logging
|
||||
from collections.abc import Mapping
|
||||
from enum import StrEnum
|
||||
from threading import Lock
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from pydantic import BaseModel
|
||||
from yarl import URL
|
||||
|
||||
from configs import dify_config
|
||||
from core.helper.code_executor.javascript.javascript_transformer import NodeJsTemplateTransformer
|
||||
from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer
|
||||
from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
from core.helper.http_client_pooling import get_pooled_http_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT))
|
||||
CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY
|
||||
_CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits(
|
||||
max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS,
|
||||
max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS,
|
||||
keepalive_expiry=dify_config.CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY,
|
||||
)
|
||||
_CODE_EXECUTOR_CLIENT_KEY = "code_executor:http_client"
|
||||
|
||||
|
||||
class CodeExecutionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class CodeExecutionResponse(BaseModel):
|
||||
class Data(BaseModel):
|
||||
stdout: str | None = None
|
||||
error: str | None = None
|
||||
|
||||
code: int
|
||||
message: str
|
||||
data: Data
|
||||
|
||||
|
||||
class CodeLanguage(StrEnum):
|
||||
PYTHON3 = "python3"
|
||||
JINJA2 = "jinja2"
|
||||
JAVASCRIPT = "javascript"
|
||||
|
||||
|
||||
def _build_code_executor_client() -> httpx.Client:
|
||||
return httpx.Client(
|
||||
verify=CODE_EXECUTION_SSL_VERIFY,
|
||||
limits=_CODE_EXECUTOR_CLIENT_LIMITS,
|
||||
)
|
||||
|
||||
|
||||
class CodeExecutor:
|
||||
dependencies_cache: dict[str, str] = {}
|
||||
dependencies_cache_lock = Lock()
|
||||
|
||||
code_template_transformers: dict[CodeLanguage, type[TemplateTransformer]] = {
|
||||
CodeLanguage.PYTHON3: Python3TemplateTransformer,
|
||||
CodeLanguage.JINJA2: Jinja2TemplateTransformer,
|
||||
CodeLanguage.JAVASCRIPT: NodeJsTemplateTransformer,
|
||||
}
|
||||
|
||||
code_language_to_running_language = {
|
||||
CodeLanguage.JAVASCRIPT: "nodejs",
|
||||
CodeLanguage.JINJA2: CodeLanguage.PYTHON3,
|
||||
CodeLanguage.PYTHON3: CodeLanguage.PYTHON3,
|
||||
}
|
||||
|
||||
supported_dependencies_languages: set[CodeLanguage] = {CodeLanguage.PYTHON3}
|
||||
|
||||
@classmethod
|
||||
def execute_code(cls, language: CodeLanguage, preload: str, code: str) -> str:
|
||||
"""
|
||||
Execute code
|
||||
:param language: code language
|
||||
:param preload: the preload script
|
||||
:param code: code
|
||||
:return:
|
||||
"""
|
||||
url = code_execution_endpoint_url / "v1" / "sandbox" / "run"
|
||||
|
||||
headers = {"X-Api-Key": dify_config.CODE_EXECUTION_API_KEY}
|
||||
|
||||
data = {
|
||||
"language": cls.code_language_to_running_language.get(language),
|
||||
"code": code,
|
||||
"preload": preload,
|
||||
"enable_network": True,
|
||||
}
|
||||
|
||||
timeout = httpx.Timeout(
|
||||
connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT,
|
||||
read=dify_config.CODE_EXECUTION_READ_TIMEOUT,
|
||||
write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT,
|
||||
pool=None,
|
||||
)
|
||||
|
||||
client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client)
|
||||
|
||||
try:
|
||||
response = client.post(
|
||||
str(url),
|
||||
json=data,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
)
|
||||
if response.status_code == 503:
|
||||
raise CodeExecutionError("Code execution service is unavailable")
|
||||
elif response.status_code != 200:
|
||||
raise Exception(
|
||||
f"Failed to execute code, got status code {response.status_code},"
|
||||
f" please check if the sandbox service is running"
|
||||
)
|
||||
except CodeExecutionError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise CodeExecutionError(
|
||||
"Failed to execute code, which is likely a network issue,"
|
||||
" please check if the sandbox service is running."
|
||||
f" ( Error: {str(e)} )"
|
||||
)
|
||||
|
||||
try:
|
||||
response_data = response.json()
|
||||
except Exception as e:
|
||||
raise CodeExecutionError("Failed to parse response") from e
|
||||
|
||||
if (code := response_data.get("code")) != 0:
|
||||
raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}")
|
||||
|
||||
response_code = CodeExecutionResponse.model_validate(response_data)
|
||||
|
||||
if response_code.data.error:
|
||||
raise CodeExecutionError(response_code.data.error)
|
||||
|
||||
return response_code.data.stdout or ""
|
||||
|
||||
@classmethod
|
||||
def execute_workflow_code_template(cls, language: CodeLanguage, code: str, inputs: Mapping[str, Any]):
|
||||
"""
|
||||
Execute code
|
||||
:param language: code language
|
||||
:param code: code
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
template_transformer = cls.code_template_transformers.get(language)
|
||||
if not template_transformer:
|
||||
raise CodeExecutionError(f"Unsupported language {language}")
|
||||
|
||||
runner, preload = template_transformer.transform_caller(code, inputs)
|
||||
|
||||
try:
|
||||
response = cls.execute_code(language, preload, runner)
|
||||
except CodeExecutionError as e:
|
||||
raise e
|
||||
|
||||
return template_transformer.transform_response(response)
|
||||
61
dify/api/core/helper/code_executor/code_node_provider.py
Normal file
61
dify/api/core/helper/code_executor/code_node_provider.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Mapping, Sequence
|
||||
from typing import TypedDict
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class VariableConfig(TypedDict):
|
||||
variable: str
|
||||
value_selector: Sequence[str | int]
|
||||
|
||||
|
||||
class OutputConfig(TypedDict):
|
||||
type: str
|
||||
children: None
|
||||
|
||||
|
||||
class CodeConfig(TypedDict):
|
||||
variables: Sequence[VariableConfig]
|
||||
code_language: str
|
||||
code: str
|
||||
outputs: Mapping[str, OutputConfig]
|
||||
|
||||
|
||||
class DefaultConfig(TypedDict):
|
||||
type: str
|
||||
config: CodeConfig
|
||||
|
||||
|
||||
class CodeNodeProvider(BaseModel, ABC):
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def get_language() -> str:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def is_accept_language(cls, language: str) -> bool:
|
||||
return language == cls.get_language()
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_default_code(cls) -> str:
|
||||
"""
|
||||
get default code in specific programming language for the code node
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def get_default_config(cls) -> DefaultConfig:
|
||||
return {
|
||||
"type": "code",
|
||||
"config": {
|
||||
"variables": [
|
||||
{"variable": "arg1", "value_selector": []},
|
||||
{"variable": "arg2", "value_selector": []},
|
||||
],
|
||||
"code_language": cls.get_language(),
|
||||
"code": cls.get_default_code(),
|
||||
"outputs": {"result": {"type": "string", "children": None}},
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.code_executor import CodeLanguage
|
||||
from core.helper.code_executor.code_node_provider import CodeNodeProvider
|
||||
|
||||
|
||||
class JavascriptCodeProvider(CodeNodeProvider):
|
||||
@staticmethod
|
||||
def get_language() -> str:
|
||||
return CodeLanguage.JAVASCRIPT
|
||||
|
||||
@classmethod
|
||||
def get_default_code(cls) -> str:
|
||||
return dedent(
|
||||
"""
|
||||
function main({arg1, arg2}) {
|
||||
return {
|
||||
result: arg1 + arg2
|
||||
}
|
||||
}
|
||||
"""
|
||||
)
|
||||
@@ -0,0 +1,22 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class NodeJsTemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(f""" {cls._code_placeholder}
|
||||
|
||||
// decode and prepare input object
|
||||
var inputs_obj = JSON.parse(Buffer.from('{cls._inputs_placeholder}', 'base64').toString('utf-8'))
|
||||
|
||||
// execute main function
|
||||
var output_obj = main(inputs_obj)
|
||||
|
||||
// convert output to json and print
|
||||
var output_json = JSON.stringify(output_obj)
|
||||
var result = `<<RESULT>>${{output_json}}<<RESULT>>`
|
||||
console.log(result)
|
||||
""")
|
||||
return runner_script
|
||||
@@ -0,0 +1,16 @@
|
||||
from collections.abc import Mapping
|
||||
|
||||
from core.helper.code_executor.code_executor import CodeExecutor, CodeLanguage
|
||||
|
||||
|
||||
class Jinja2Formatter:
|
||||
@classmethod
|
||||
def format(cls, template: str, inputs: Mapping[str, str]) -> str:
|
||||
"""
|
||||
Format template
|
||||
:param template: template
|
||||
:param inputs: inputs
|
||||
:return:
|
||||
"""
|
||||
result = CodeExecutor.execute_workflow_code_template(language=CodeLanguage.JINJA2, code=template, inputs=inputs)
|
||||
return str(result.get("result", ""))
|
||||
@@ -0,0 +1,57 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class Jinja2TemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def transform_response(cls, response: str):
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
return {"result": cls.extract_result_str_from_response(response)}
|
||||
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(f"""
|
||||
# declare main function
|
||||
def main(**inputs):
|
||||
import jinja2
|
||||
template = jinja2.Template('''{cls._code_placeholder}''')
|
||||
return template.render(**inputs)
|
||||
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
# decode and prepare input dict
|
||||
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
|
||||
|
||||
# execute main function
|
||||
output = main(**inputs_obj)
|
||||
|
||||
# convert output and print
|
||||
result = f'''<<RESULT>>{{output}}<<RESULT>>'''
|
||||
print(result)
|
||||
|
||||
""")
|
||||
return runner_script
|
||||
|
||||
@classmethod
|
||||
def get_preload_script(cls) -> str:
|
||||
preload_script = dedent("""
|
||||
import jinja2
|
||||
from base64 import b64decode
|
||||
|
||||
def _jinja2_preload_():
|
||||
# prepare jinja2 environment, load template and render before to avoid sandbox issue
|
||||
template = jinja2.Template('{{s}}')
|
||||
template.render(s='a')
|
||||
|
||||
if __name__ == '__main__':
|
||||
_jinja2_preload_()
|
||||
|
||||
""")
|
||||
|
||||
return preload_script
|
||||
@@ -0,0 +1,21 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.code_executor import CodeLanguage
|
||||
from core.helper.code_executor.code_node_provider import CodeNodeProvider
|
||||
|
||||
|
||||
class Python3CodeProvider(CodeNodeProvider):
|
||||
@staticmethod
|
||||
def get_language() -> str:
|
||||
return CodeLanguage.PYTHON3
|
||||
|
||||
@classmethod
|
||||
def get_default_code(cls) -> str:
|
||||
return dedent(
|
||||
"""
|
||||
def main(arg1: str, arg2: str):
|
||||
return {
|
||||
"result": arg1 + arg2,
|
||||
}
|
||||
"""
|
||||
)
|
||||
@@ -0,0 +1,25 @@
|
||||
from textwrap import dedent
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class Python3TemplateTransformer(TemplateTransformer):
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(f""" {cls._code_placeholder}
|
||||
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
# decode and prepare input dict
|
||||
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
|
||||
|
||||
# execute main function
|
||||
output_obj = main(**inputs_obj)
|
||||
|
||||
# convert output to json and print
|
||||
output_json = json.dumps(output_obj, indent=4)
|
||||
result = f'''<<RESULT>>{{output_json}}<<RESULT>>'''
|
||||
print(result)
|
||||
""")
|
||||
return runner_script
|
||||
114
dify/api/core/helper/code_executor/template_transformer.py
Normal file
114
dify/api/core/helper/code_executor/template_transformer.py
Normal file
@@ -0,0 +1,114 @@
|
||||
import json
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from base64 import b64encode
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from core.variables.utils import dumps_with_segments
|
||||
|
||||
|
||||
class TemplateTransformer(ABC):
|
||||
_code_placeholder: str = "{{code}}"
|
||||
_inputs_placeholder: str = "{{inputs}}"
|
||||
_result_tag: str = "<<RESULT>>"
|
||||
|
||||
@classmethod
|
||||
def transform_caller(cls, code: str, inputs: Mapping[str, Any]) -> tuple[str, str]:
|
||||
"""
|
||||
Transform code to python runner
|
||||
:param code: code
|
||||
:param inputs: inputs
|
||||
:return: runner, preload
|
||||
"""
|
||||
runner_script = cls.assemble_runner_script(code, inputs)
|
||||
preload_script = cls.get_preload_script()
|
||||
|
||||
return runner_script, preload_script
|
||||
|
||||
@classmethod
|
||||
def extract_result_str_from_response(cls, response: str):
|
||||
result = re.search(rf"{cls._result_tag}(.*){cls._result_tag}", response, re.DOTALL)
|
||||
if not result:
|
||||
raise ValueError(f"Failed to parse result: no result tag found in response. Response: {response[:200]}...")
|
||||
return result.group(1)
|
||||
|
||||
@classmethod
|
||||
def transform_response(cls, response: str) -> Mapping[str, Any]:
|
||||
"""
|
||||
Transform response to dict
|
||||
:param response: response
|
||||
:return:
|
||||
"""
|
||||
|
||||
try:
|
||||
result_str = cls.extract_result_str_from_response(response)
|
||||
result = json.loads(result_str)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Failed to parse JSON response: {str(e)}.")
|
||||
except ValueError as e:
|
||||
# Re-raise ValueError from extract_result_str_from_response
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise ValueError(f"Unexpected error during response transformation: {str(e)}")
|
||||
|
||||
if not isinstance(result, dict):
|
||||
raise ValueError(f"Result must be a dict, got {type(result).__name__}")
|
||||
if not all(isinstance(k, str) for k in result):
|
||||
raise ValueError("Result keys must be strings")
|
||||
|
||||
# Post-process the result to convert scientific notation strings back to numbers
|
||||
result = cls._post_process_result(result)
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def _post_process_result(cls, result: dict[Any, Any]) -> dict[Any, Any]:
|
||||
"""
|
||||
Post-process the result to convert scientific notation strings back to numbers
|
||||
"""
|
||||
|
||||
def convert_scientific_notation(value):
|
||||
if isinstance(value, str):
|
||||
# Check if the string looks like scientific notation
|
||||
if re.match(r"^-?\d+\.?\d*e[+-]\d+$", value, re.IGNORECASE):
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError:
|
||||
pass
|
||||
elif isinstance(value, dict):
|
||||
return {k: convert_scientific_notation(v) for k, v in value.items()}
|
||||
elif isinstance(value, list):
|
||||
return [convert_scientific_notation(v) for v in value]
|
||||
return value
|
||||
|
||||
return convert_scientific_notation(result) # type: ignore[no-any-return]
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
"""
|
||||
Get runner script
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def serialize_inputs(cls, inputs: Mapping[str, Any]) -> str:
|
||||
inputs_json_str = dumps_with_segments(inputs, ensure_ascii=False).encode()
|
||||
input_base64_encoded = b64encode(inputs_json_str).decode("utf-8")
|
||||
return input_base64_encoded
|
||||
|
||||
@classmethod
|
||||
def assemble_runner_script(cls, code: str, inputs: Mapping[str, Any]) -> str:
|
||||
# assemble runner script
|
||||
script = cls.get_runner_script()
|
||||
script = script.replace(cls._code_placeholder, code)
|
||||
inputs_str = cls.serialize_inputs(inputs)
|
||||
script = script.replace(cls._inputs_placeholder, inputs_str)
|
||||
return script
|
||||
|
||||
@classmethod
|
||||
def get_preload_script(cls) -> str:
|
||||
"""
|
||||
Get preload script
|
||||
"""
|
||||
return ""
|
||||
75
dify/api/core/helper/credential_utils.py
Normal file
75
dify/api/core/helper/credential_utils.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""
|
||||
Credential utility functions for checking credential existence and policy compliance.
|
||||
"""
|
||||
|
||||
from services.enterprise.plugin_manager_service import PluginCredentialType
|
||||
|
||||
|
||||
def is_credential_exists(credential_id: str, credential_type: "PluginCredentialType") -> bool:
|
||||
"""
|
||||
Check if the credential still exists in the database.
|
||||
|
||||
:param credential_id: The credential ID to check
|
||||
:param credential_type: The type of credential (MODEL or TOOL)
|
||||
:return: True if credential exists, False otherwise
|
||||
"""
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from extensions.ext_database import db
|
||||
from models.provider import ProviderCredential, ProviderModelCredential
|
||||
from models.tools import BuiltinToolProvider
|
||||
|
||||
with Session(db.engine) as session:
|
||||
if credential_type == PluginCredentialType.MODEL:
|
||||
# Check both pre-defined and custom model credentials using a single UNION query
|
||||
stmt = (
|
||||
select(ProviderCredential.id)
|
||||
.where(ProviderCredential.id == credential_id)
|
||||
.union(select(ProviderModelCredential.id).where(ProviderModelCredential.id == credential_id))
|
||||
)
|
||||
return session.scalar(stmt) is not None
|
||||
|
||||
if credential_type == PluginCredentialType.TOOL:
|
||||
return (
|
||||
session.scalar(select(BuiltinToolProvider.id).where(BuiltinToolProvider.id == credential_id))
|
||||
is not None
|
||||
)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def check_credential_policy_compliance(
|
||||
credential_id: str, provider: str, credential_type: "PluginCredentialType", check_existence: bool = True
|
||||
) -> None:
|
||||
"""
|
||||
Check credential policy compliance for the given credential ID.
|
||||
|
||||
:param credential_id: The credential ID to check
|
||||
:param provider: The provider name
|
||||
:param credential_type: The type of credential (MODEL or TOOL)
|
||||
:param check_existence: Whether to check if credential exists in database first
|
||||
:raises ValueError: If credential policy compliance check fails
|
||||
"""
|
||||
from services.enterprise.plugin_manager_service import (
|
||||
CheckCredentialPolicyComplianceRequest,
|
||||
PluginManagerService,
|
||||
)
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
if not FeatureService.get_system_features().plugin_manager.enabled or not credential_id:
|
||||
return
|
||||
|
||||
# Check if credential exists in database first (if requested)
|
||||
if check_existence:
|
||||
if not is_credential_exists(credential_id, credential_type):
|
||||
raise ValueError(f"Credential with id {credential_id} for provider {provider} not found.")
|
||||
|
||||
# Check policy compliance
|
||||
PluginManagerService.check_credential_policy_compliance(
|
||||
CheckCredentialPolicyComplianceRequest(
|
||||
dify_credential_id=credential_id,
|
||||
provider=provider,
|
||||
credential_type=credential_type,
|
||||
)
|
||||
)
|
||||
17
dify/api/core/helper/download.py
Normal file
17
dify/api/core/helper/download.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from core.helper import ssrf_proxy
|
||||
|
||||
|
||||
def download_with_size_limit(url, max_download_size: int, **kwargs):
|
||||
response = ssrf_proxy.get(url, follow_redirects=True, **kwargs)
|
||||
if response.status_code == 404:
|
||||
raise ValueError("file not found")
|
||||
|
||||
total_size = 0
|
||||
chunks = []
|
||||
for chunk in response.iter_bytes():
|
||||
total_size += len(chunk)
|
||||
if total_size > max_download_size:
|
||||
raise ValueError("Max file size reached")
|
||||
chunks.append(chunk)
|
||||
content = b"".join(chunks)
|
||||
return content
|
||||
44
dify/api/core/helper/encrypter.py
Normal file
44
dify/api/core/helper/encrypter.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import base64
|
||||
|
||||
from libs import rsa
|
||||
|
||||
|
||||
def obfuscated_token(token: str) -> str:
|
||||
if not token:
|
||||
return token
|
||||
if len(token) <= 8:
|
||||
return "*" * 20
|
||||
return token[:6] + "*" * 12 + token[-2:]
|
||||
|
||||
|
||||
def full_mask_token(token_length=20):
|
||||
return "*" * token_length
|
||||
|
||||
|
||||
def encrypt_token(tenant_id: str, token: str):
|
||||
from extensions.ext_database import db
|
||||
from models.account import Tenant
|
||||
|
||||
if not (tenant := db.session.query(Tenant).where(Tenant.id == tenant_id).first()):
|
||||
raise ValueError(f"Tenant with id {tenant_id} not found")
|
||||
assert tenant.encrypt_public_key is not None
|
||||
encrypted_token = rsa.encrypt(token, tenant.encrypt_public_key)
|
||||
return base64.b64encode(encrypted_token).decode()
|
||||
|
||||
|
||||
def decrypt_token(tenant_id: str, token: str) -> str:
|
||||
return rsa.decrypt(base64.b64decode(token), tenant_id)
|
||||
|
||||
|
||||
def batch_decrypt_token(tenant_id: str, tokens: list[str]):
|
||||
rsa_key, cipher_rsa = rsa.get_decrypt_decoding(tenant_id)
|
||||
|
||||
return [rsa.decrypt_token_with_decoding(base64.b64decode(token), rsa_key, cipher_rsa) for token in tokens]
|
||||
|
||||
|
||||
def get_decrypt_decoding(tenant_id: str):
|
||||
return rsa.get_decrypt_decoding(tenant_id)
|
||||
|
||||
|
||||
def decrypt_token_with_decoding(token: str, rsa_key, cipher_rsa):
|
||||
return rsa.decrypt_token_with_decoding(base64.b64decode(token), rsa_key, cipher_rsa)
|
||||
59
dify/api/core/helper/http_client_pooling.py
Normal file
59
dify/api/core/helper/http_client_pooling.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""HTTP client pooling utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import atexit
|
||||
import threading
|
||||
from collections.abc import Callable
|
||||
|
||||
import httpx
|
||||
|
||||
ClientBuilder = Callable[[], httpx.Client]
|
||||
|
||||
|
||||
class HttpClientPoolFactory:
|
||||
"""Thread-safe factory that maintains reusable HTTP client instances."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._clients: dict[str, httpx.Client] = {}
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def get_or_create(self, key: str, builder: ClientBuilder) -> httpx.Client:
|
||||
"""Return a pooled client associated with ``key`` creating it on demand."""
|
||||
client = self._clients.get(key)
|
||||
if client is not None:
|
||||
return client
|
||||
|
||||
with self._lock:
|
||||
client = self._clients.get(key)
|
||||
if client is None:
|
||||
client = builder()
|
||||
self._clients[key] = client
|
||||
return client
|
||||
|
||||
def close_all(self) -> None:
|
||||
"""Close all pooled clients and clear the pool."""
|
||||
with self._lock:
|
||||
for client in self._clients.values():
|
||||
client.close()
|
||||
self._clients.clear()
|
||||
|
||||
|
||||
_factory = HttpClientPoolFactory()
|
||||
|
||||
|
||||
def get_pooled_http_client(key: str, builder: ClientBuilder) -> httpx.Client:
|
||||
"""Return a pooled client for the given ``key`` using ``builder`` when missing."""
|
||||
return _factory.get_or_create(key, builder)
|
||||
|
||||
|
||||
def close_all_pooled_clients() -> None:
|
||||
"""Close every client created through the pooling factory."""
|
||||
_factory.close_all()
|
||||
|
||||
|
||||
def _register_shutdown_hook() -> None:
|
||||
atexit.register(close_all_pooled_clients)
|
||||
|
||||
|
||||
_register_shutdown_hook()
|
||||
66
dify/api/core/helper/marketplace.py
Normal file
66
dify/api/core/helper/marketplace.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from collections.abc import Sequence
|
||||
|
||||
import httpx
|
||||
from yarl import URL
|
||||
|
||||
from configs import dify_config
|
||||
from core.helper.download import download_with_size_limit
|
||||
from core.plugin.entities.marketplace import MarketplacePluginDeclaration
|
||||
|
||||
marketplace_api_url = URL(str(dify_config.MARKETPLACE_API_URL))
|
||||
|
||||
|
||||
def get_plugin_pkg_url(plugin_unique_identifier: str) -> str:
|
||||
return str((marketplace_api_url / "api/v1/plugins/download").with_query(unique_identifier=plugin_unique_identifier))
|
||||
|
||||
|
||||
def download_plugin_pkg(plugin_unique_identifier: str):
|
||||
return download_with_size_limit(get_plugin_pkg_url(plugin_unique_identifier), dify_config.PLUGIN_MAX_PACKAGE_SIZE)
|
||||
|
||||
|
||||
def batch_fetch_plugin_manifests(plugin_ids: list[str]) -> Sequence[MarketplacePluginDeclaration]:
|
||||
if len(plugin_ids) == 0:
|
||||
return []
|
||||
|
||||
url = str(marketplace_api_url / "api/v1/plugins/batch")
|
||||
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
|
||||
response.raise_for_status()
|
||||
|
||||
return [MarketplacePluginDeclaration.model_validate(plugin) for plugin in response.json()["data"]["plugins"]]
|
||||
|
||||
|
||||
def batch_fetch_plugin_by_ids(plugin_ids: list[str]) -> list[dict]:
|
||||
if not plugin_ids:
|
||||
return []
|
||||
|
||||
url = str(marketplace_api_url / "api/v1/plugins/batch")
|
||||
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
return data.get("data", {}).get("plugins", [])
|
||||
|
||||
|
||||
def batch_fetch_plugin_manifests_ignore_deserialization_error(
|
||||
plugin_ids: list[str],
|
||||
) -> Sequence[MarketplacePluginDeclaration]:
|
||||
if len(plugin_ids) == 0:
|
||||
return []
|
||||
|
||||
url = str(marketplace_api_url / "api/v1/plugins/batch")
|
||||
response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version})
|
||||
response.raise_for_status()
|
||||
result: list[MarketplacePluginDeclaration] = []
|
||||
for plugin in response.json()["data"]["plugins"]:
|
||||
try:
|
||||
result.append(MarketplacePluginDeclaration.model_validate(plugin))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def record_install_plugin_event(plugin_unique_identifier: str):
|
||||
url = str(marketplace_api_url / "api/v1/stats/plugins/install_count")
|
||||
response = httpx.post(url, json={"unique_identifier": plugin_unique_identifier})
|
||||
response.raise_for_status()
|
||||
51
dify/api/core/helper/model_provider_cache.py
Normal file
51
dify/api/core/helper/model_provider_cache.py
Normal file
@@ -0,0 +1,51 @@
|
||||
import json
|
||||
from enum import StrEnum
|
||||
from json import JSONDecodeError
|
||||
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
|
||||
class ProviderCredentialsCacheType(StrEnum):
|
||||
PROVIDER = "provider"
|
||||
MODEL = "provider_model"
|
||||
LOAD_BALANCING_MODEL = "load_balancing_provider_model"
|
||||
|
||||
|
||||
class ProviderCredentialsCache:
|
||||
def __init__(self, tenant_id: str, identity_id: str, cache_type: ProviderCredentialsCacheType):
|
||||
self.cache_key = f"{cache_type}_credentials:tenant_id:{tenant_id}:id:{identity_id}"
|
||||
|
||||
def get(self) -> dict | None:
|
||||
"""
|
||||
Get cached model provider credentials.
|
||||
|
||||
:return:
|
||||
"""
|
||||
cached_provider_credentials = redis_client.get(self.cache_key)
|
||||
if cached_provider_credentials:
|
||||
try:
|
||||
cached_provider_credentials = cached_provider_credentials.decode("utf-8")
|
||||
cached_provider_credentials = json.loads(cached_provider_credentials)
|
||||
except JSONDecodeError:
|
||||
return None
|
||||
|
||||
return dict(cached_provider_credentials)
|
||||
else:
|
||||
return None
|
||||
|
||||
def set(self, credentials: dict):
|
||||
"""
|
||||
Cache model provider credentials.
|
||||
|
||||
:param credentials: provider credentials
|
||||
:return:
|
||||
"""
|
||||
redis_client.setex(self.cache_key, 86400, json.dumps(credentials))
|
||||
|
||||
def delete(self):
|
||||
"""
|
||||
Delete cached model provider credentials.
|
||||
|
||||
:return:
|
||||
"""
|
||||
redis_client.delete(self.cache_key)
|
||||
61
dify/api/core/helper/moderation.py
Normal file
61
dify/api/core/helper/moderation.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import logging
|
||||
import secrets
|
||||
from typing import cast
|
||||
|
||||
from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
|
||||
from core.entities import DEFAULT_PLUGIN_ID
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||
from core.model_runtime.model_providers.__base.moderation_model import ModerationModel
|
||||
from core.model_runtime.model_providers.model_provider_factory import ModelProviderFactory
|
||||
from extensions.ext_hosting_provider import hosting_configuration
|
||||
from models.provider import ProviderType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_moderation(tenant_id: str, model_config: ModelConfigWithCredentialsEntity, text: str) -> bool:
|
||||
moderation_config = hosting_configuration.moderation_config
|
||||
openai_provider_name = f"{DEFAULT_PLUGIN_ID}/openai/openai"
|
||||
if (
|
||||
moderation_config
|
||||
and moderation_config.enabled is True
|
||||
and openai_provider_name in hosting_configuration.provider_map
|
||||
and hosting_configuration.provider_map[openai_provider_name].enabled is True
|
||||
):
|
||||
using_provider_type = model_config.provider_model_bundle.configuration.using_provider_type
|
||||
provider_name = model_config.provider
|
||||
if using_provider_type == ProviderType.SYSTEM and provider_name in moderation_config.providers:
|
||||
hosting_openai_config = hosting_configuration.provider_map[openai_provider_name]
|
||||
|
||||
if hosting_openai_config.credentials is None:
|
||||
return False
|
||||
|
||||
# 2000 text per chunk
|
||||
length = 2000
|
||||
text_chunks = [text[i : i + length] for i in range(0, len(text), length)]
|
||||
|
||||
if len(text_chunks) == 0:
|
||||
return True
|
||||
|
||||
text_chunk = secrets.choice(text_chunks)
|
||||
|
||||
try:
|
||||
model_provider_factory = ModelProviderFactory(tenant_id)
|
||||
|
||||
# Get model instance of LLM
|
||||
model_type_instance = model_provider_factory.get_model_type_instance(
|
||||
provider=openai_provider_name, model_type=ModelType.MODERATION
|
||||
)
|
||||
model_type_instance = cast(ModerationModel, model_type_instance)
|
||||
moderation_result = model_type_instance.invoke(
|
||||
model="omni-moderation-latest", credentials=hosting_openai_config.credentials, text=text_chunk
|
||||
)
|
||||
|
||||
if moderation_result is True:
|
||||
return True
|
||||
except Exception:
|
||||
logger.exception("Fails to check moderation, provider_name: %s", provider_name)
|
||||
raise InvokeBadRequestError("Rate limit exceeded, please try again later.")
|
||||
|
||||
return False
|
||||
65
dify/api/core/helper/module_import_helper.py
Normal file
65
dify/api/core/helper/module_import_helper.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import importlib.util
|
||||
import logging
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from typing import AnyStr
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def import_module_from_source(*, module_name: str, py_file_path: AnyStr, use_lazy_loader: bool = False) -> ModuleType:
|
||||
"""
|
||||
Importing a module from the source file directly
|
||||
"""
|
||||
try:
|
||||
existed_spec = importlib.util.find_spec(module_name)
|
||||
if existed_spec:
|
||||
spec = existed_spec
|
||||
if not spec.loader:
|
||||
raise Exception(f"Failed to load module {module_name} from {py_file_path!r}")
|
||||
else:
|
||||
# Refer to: https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
|
||||
# FIXME: mypy does not support the type of spec.loader
|
||||
spec = importlib.util.spec_from_file_location(module_name, py_file_path) # type: ignore[assignment]
|
||||
if not spec or not spec.loader:
|
||||
raise Exception(f"Failed to load module {module_name} from {py_file_path!r}")
|
||||
if use_lazy_loader:
|
||||
# Refer to: https://docs.python.org/3/library/importlib.html#implementing-lazy-imports
|
||||
spec.loader = importlib.util.LazyLoader(spec.loader)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
if not existed_spec:
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
except Exception as e:
|
||||
logger.exception("Failed to load module %s from script file '%s'", module_name, repr(py_file_path))
|
||||
raise e
|
||||
|
||||
|
||||
def get_subclasses_from_module(mod: ModuleType, parent_type: type) -> list[type]:
|
||||
"""
|
||||
Get all the subclasses of the parent type from the module
|
||||
"""
|
||||
classes = [
|
||||
x for _, x in vars(mod).items() if isinstance(x, type) and x != parent_type and issubclass(x, parent_type)
|
||||
]
|
||||
return classes
|
||||
|
||||
|
||||
def load_single_subclass_from_source(
|
||||
*, module_name: str, script_path: str, parent_type: type, use_lazy_loader: bool = False
|
||||
) -> type:
|
||||
"""
|
||||
Load a single subclass from the source
|
||||
"""
|
||||
module = import_module_from_source(
|
||||
module_name=module_name, py_file_path=script_path, use_lazy_loader=use_lazy_loader
|
||||
)
|
||||
subclasses = get_subclasses_from_module(module, parent_type)
|
||||
match len(subclasses):
|
||||
case 1:
|
||||
return subclasses[0]
|
||||
case 0:
|
||||
raise Exception(f"Missing subclass of {parent_type.__name__} in {script_path!r}")
|
||||
case _:
|
||||
raise Exception(f"Multiple subclasses of {parent_type.__name__} in {script_path!r}")
|
||||
42
dify/api/core/helper/name_generator.py
Normal file
42
dify/api/core/helper/name_generator.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import logging
|
||||
import re
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
from core.plugin.entities.plugin_daemon import CredentialType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_provider_name(
|
||||
providers: Sequence[Any], credential_type: CredentialType, fallback_context: str = "provider"
|
||||
) -> str:
|
||||
try:
|
||||
return generate_incremental_name(
|
||||
[provider.name for provider in providers],
|
||||
f"{credential_type.get_name()}",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Error generating next provider name for %r: %r", fallback_context, e)
|
||||
return f"{credential_type.get_name()} 1"
|
||||
|
||||
|
||||
def generate_incremental_name(
|
||||
names: Sequence[str],
|
||||
default_pattern: str,
|
||||
) -> str:
|
||||
pattern = rf"^{re.escape(default_pattern)}\s+(\d+)$"
|
||||
numbers = []
|
||||
|
||||
for name in names:
|
||||
if not name:
|
||||
continue
|
||||
match = re.match(pattern, name.strip())
|
||||
if match:
|
||||
numbers.append(int(match.group(1)))
|
||||
|
||||
if not numbers:
|
||||
return f"{default_pattern} 1"
|
||||
|
||||
max_number = max(numbers)
|
||||
return f"{default_pattern} {max_number + 1}"
|
||||
133
dify/api/core/helper/position_helper.py
Normal file
133
dify/api/core/helper/position_helper.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Callable
|
||||
from functools import lru_cache
|
||||
from typing import TypeVar
|
||||
|
||||
from configs import dify_config
|
||||
from core.tools.utils.yaml_utils import load_yaml_file_cached
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def get_position_map(folder_path: str, *, file_name: str = "_position.yaml") -> dict[str, int]:
|
||||
"""
|
||||
Get the mapping from name to index from a YAML file
|
||||
:param folder_path:
|
||||
:param file_name: the YAML file name, default to '_position.yaml'
|
||||
:return: a dict with name as key and index as value
|
||||
"""
|
||||
# FIXME(-LAN-): Cache position maps to prevent file descriptor exhaustion during high-load benchmarks
|
||||
position_file_path = os.path.join(folder_path, file_name)
|
||||
try:
|
||||
yaml_content = load_yaml_file_cached(file_path=position_file_path)
|
||||
except Exception:
|
||||
yaml_content = []
|
||||
positions = [item.strip() for item in yaml_content if item and isinstance(item, str) and item.strip()]
|
||||
return {name: index for index, name in enumerate(positions)}
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def get_tool_position_map(folder_path: str, file_name: str = "_position.yaml") -> dict[str, int]:
|
||||
"""
|
||||
Get the mapping for tools from name to index from a YAML file.
|
||||
:param folder_path:
|
||||
:param file_name: the YAML file name, default to '_position.yaml'
|
||||
:return: a dict with name as key and index as value
|
||||
"""
|
||||
position_map = get_position_map(folder_path, file_name=file_name)
|
||||
|
||||
return pin_position_map(
|
||||
position_map,
|
||||
pin_list=dify_config.POSITION_TOOL_PINS_LIST,
|
||||
)
|
||||
|
||||
|
||||
def pin_position_map(original_position_map: dict[str, int], pin_list: list[str]) -> dict[str, int]:
|
||||
"""
|
||||
Pin the items in the pin list to the beginning of the position map.
|
||||
Overall logic: exclude > include > pin
|
||||
:param original_position_map: the position map to be sorted and filtered
|
||||
:param pin_list: the list of pins to be put at the beginning
|
||||
:return: the sorted position map
|
||||
"""
|
||||
positions = sorted(original_position_map.keys(), key=lambda x: original_position_map[x])
|
||||
|
||||
# Add pins to position map
|
||||
position_map = {name: idx for idx, name in enumerate(pin_list)}
|
||||
|
||||
# Add remaining positions to position map
|
||||
start_idx = len(position_map)
|
||||
for name in positions:
|
||||
if name not in position_map:
|
||||
position_map[name] = start_idx
|
||||
start_idx += 1
|
||||
|
||||
return position_map
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def is_filtered(
|
||||
include_set: set[str],
|
||||
exclude_set: set[str],
|
||||
data: T,
|
||||
name_func: Callable[[T], str],
|
||||
) -> bool:
|
||||
"""
|
||||
Check if the object should be filtered out.
|
||||
Overall logic: exclude > include > pin
|
||||
:param include_set: the set of names to be included
|
||||
:param exclude_set: the set of names to be excluded
|
||||
:param name_func: the function to get the name of the object
|
||||
:param data: the data to be filtered
|
||||
:return: True if the object should be filtered out, False otherwise
|
||||
"""
|
||||
if not data:
|
||||
return False
|
||||
if not include_set and not exclude_set:
|
||||
return False
|
||||
|
||||
name = name_func(data)
|
||||
|
||||
if name in exclude_set: # exclude_set is prioritized
|
||||
return True
|
||||
if include_set and name not in include_set: # filter out only if include_set is not empty
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def sort_by_position_map(
|
||||
position_map: dict[str, int],
|
||||
data: list[T],
|
||||
name_func: Callable[[T], str],
|
||||
):
|
||||
"""
|
||||
Sort the objects by the position map.
|
||||
If the name of the object is not in the position map, it will be put at the end.
|
||||
:param position_map: the map holding positions in the form of {name: index}
|
||||
:param name_func: the function to get the name of the object
|
||||
:param data: the data to be sorted
|
||||
:return: the sorted objects
|
||||
"""
|
||||
if not position_map or not data:
|
||||
return data
|
||||
|
||||
return sorted(data, key=lambda x: position_map.get(name_func(x), float("inf")))
|
||||
|
||||
|
||||
def sort_to_dict_by_position_map(
|
||||
position_map: dict[str, int],
|
||||
data: list[T],
|
||||
name_func: Callable[[T], str],
|
||||
):
|
||||
"""
|
||||
Sort the objects into a ordered dict by the position map.
|
||||
If the name of the object is not in the position map, it will be put at the end.
|
||||
:param position_map: the map holding positions in the form of {name: index}
|
||||
:param name_func: the function to get the name of the object
|
||||
:param data: the data to be sorted
|
||||
:return: an OrderedDict with the sorted pairs of name and object
|
||||
"""
|
||||
sorted_items = sort_by_position_map(position_map, data, name_func)
|
||||
return OrderedDict((name_func(item), item) for item in sorted_items)
|
||||
84
dify/api/core/helper/provider_cache.py
Normal file
84
dify/api/core/helper/provider_cache.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import json
|
||||
from abc import ABC, abstractmethod
|
||||
from json import JSONDecodeError
|
||||
from typing import Any
|
||||
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
|
||||
class ProviderCredentialsCache(ABC):
|
||||
"""Base class for provider credentials cache"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self.cache_key = self._generate_cache_key(**kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def _generate_cache_key(self, **kwargs) -> str:
|
||||
"""Generate cache key based on subclass implementation"""
|
||||
pass
|
||||
|
||||
def get(self) -> dict | None:
|
||||
"""Get cached provider credentials"""
|
||||
cached_credentials = redis_client.get(self.cache_key)
|
||||
if cached_credentials:
|
||||
try:
|
||||
cached_credentials = cached_credentials.decode("utf-8")
|
||||
return dict(json.loads(cached_credentials))
|
||||
except JSONDecodeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
def set(self, config: dict[str, Any]):
|
||||
"""Cache provider credentials"""
|
||||
redis_client.setex(self.cache_key, 86400, json.dumps(config))
|
||||
|
||||
def delete(self):
|
||||
"""Delete cached provider credentials"""
|
||||
redis_client.delete(self.cache_key)
|
||||
|
||||
|
||||
class SingletonProviderCredentialsCache(ProviderCredentialsCache):
|
||||
"""Cache for tool single provider credentials"""
|
||||
|
||||
def __init__(self, tenant_id: str, provider_type: str, provider_identity: str):
|
||||
super().__init__(
|
||||
tenant_id=tenant_id,
|
||||
provider_type=provider_type,
|
||||
provider_identity=provider_identity,
|
||||
)
|
||||
|
||||
def _generate_cache_key(self, **kwargs) -> str:
|
||||
tenant_id = kwargs["tenant_id"]
|
||||
provider_type = kwargs["provider_type"]
|
||||
identity_name = kwargs["provider_identity"]
|
||||
identity_id = f"{provider_type}.{identity_name}"
|
||||
return f"{provider_type}_credentials:tenant_id:{tenant_id}:id:{identity_id}"
|
||||
|
||||
|
||||
class ToolProviderCredentialsCache(ProviderCredentialsCache):
|
||||
"""Cache for tool provider credentials"""
|
||||
|
||||
def __init__(self, tenant_id: str, provider: str, credential_id: str):
|
||||
super().__init__(tenant_id=tenant_id, provider=provider, credential_id=credential_id)
|
||||
|
||||
def _generate_cache_key(self, **kwargs) -> str:
|
||||
tenant_id = kwargs["tenant_id"]
|
||||
provider = kwargs["provider"]
|
||||
credential_id = kwargs["credential_id"]
|
||||
return f"tool_credentials:tenant_id:{tenant_id}:provider:{provider}:credential_id:{credential_id}"
|
||||
|
||||
|
||||
class NoOpProviderCredentialCache:
|
||||
"""No-op provider credential cache"""
|
||||
|
||||
def get(self) -> dict | None:
|
||||
"""Get cached provider credentials"""
|
||||
return None
|
||||
|
||||
def set(self, config: dict[str, Any]):
|
||||
"""Cache provider credentials"""
|
||||
pass
|
||||
|
||||
def delete(self):
|
||||
"""Delete cached provider credentials"""
|
||||
pass
|
||||
129
dify/api/core/helper/provider_encryption.py
Normal file
129
dify/api/core/helper/provider_encryption.py
Normal file
@@ -0,0 +1,129 @@
|
||||
import contextlib
|
||||
from collections.abc import Mapping
|
||||
from copy import deepcopy
|
||||
from typing import Any, Protocol
|
||||
|
||||
from core.entities.provider_entities import BasicProviderConfig
|
||||
from core.helper import encrypter
|
||||
|
||||
|
||||
class ProviderConfigCache(Protocol):
|
||||
"""
|
||||
Interface for provider configuration cache operations
|
||||
"""
|
||||
|
||||
def get(self) -> dict[str, Any] | None:
|
||||
"""Get cached provider configuration"""
|
||||
...
|
||||
|
||||
def set(self, config: dict[str, Any]) -> None:
|
||||
"""Cache provider configuration"""
|
||||
...
|
||||
|
||||
def delete(self) -> None:
|
||||
"""Delete cached provider configuration"""
|
||||
...
|
||||
|
||||
|
||||
class ProviderConfigEncrypter:
|
||||
tenant_id: str
|
||||
config: list[BasicProviderConfig]
|
||||
provider_config_cache: ProviderConfigCache
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tenant_id: str,
|
||||
config: list[BasicProviderConfig],
|
||||
provider_config_cache: ProviderConfigCache,
|
||||
):
|
||||
self.tenant_id = tenant_id
|
||||
self.config = config
|
||||
self.provider_config_cache = provider_config_cache
|
||||
|
||||
def _deep_copy(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
"""
|
||||
deep copy data
|
||||
"""
|
||||
return deepcopy(data)
|
||||
|
||||
def encrypt(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
"""
|
||||
encrypt tool credentials with tenant id
|
||||
|
||||
return a deep copy of credentials with encrypted values
|
||||
"""
|
||||
data = dict(self._deep_copy(data))
|
||||
|
||||
# get fields need to be decrypted
|
||||
fields = dict[str, BasicProviderConfig]()
|
||||
for credential in self.config:
|
||||
fields[credential.name] = credential
|
||||
|
||||
for field_name, field in fields.items():
|
||||
if field.type == BasicProviderConfig.Type.SECRET_INPUT:
|
||||
if field_name in data:
|
||||
encrypted = encrypter.encrypt_token(self.tenant_id, data[field_name] or "")
|
||||
data[field_name] = encrypted
|
||||
|
||||
return data
|
||||
|
||||
def mask_credentials(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
"""
|
||||
mask credentials
|
||||
|
||||
return a deep copy of credentials with masked values
|
||||
"""
|
||||
data = dict(self._deep_copy(data))
|
||||
|
||||
# get fields need to be decrypted
|
||||
fields = dict[str, BasicProviderConfig]()
|
||||
for credential in self.config:
|
||||
fields[credential.name] = credential
|
||||
|
||||
for field_name, field in fields.items():
|
||||
if field.type == BasicProviderConfig.Type.SECRET_INPUT:
|
||||
if field_name in data:
|
||||
if len(data[field_name]) > 6:
|
||||
data[field_name] = (
|
||||
data[field_name][:2] + "*" * (len(data[field_name]) - 4) + data[field_name][-2:]
|
||||
)
|
||||
else:
|
||||
data[field_name] = "*" * len(data[field_name])
|
||||
|
||||
return data
|
||||
|
||||
def mask_plugin_credentials(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
return self.mask_credentials(data)
|
||||
|
||||
def decrypt(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
|
||||
"""
|
||||
decrypt tool credentials with tenant id
|
||||
|
||||
return a deep copy of credentials with decrypted values
|
||||
"""
|
||||
cached_credentials = self.provider_config_cache.get()
|
||||
if cached_credentials:
|
||||
return cached_credentials
|
||||
|
||||
data = dict(self._deep_copy(data))
|
||||
# get fields need to be decrypted
|
||||
fields = dict[str, BasicProviderConfig]()
|
||||
for credential in self.config:
|
||||
fields[credential.name] = credential
|
||||
|
||||
for field_name, field in fields.items():
|
||||
if field.type == BasicProviderConfig.Type.SECRET_INPUT:
|
||||
if field_name in data:
|
||||
with contextlib.suppress(Exception):
|
||||
# if the value is None or empty string, skip decrypt
|
||||
if not data[field_name]:
|
||||
continue
|
||||
|
||||
data[field_name] = encrypter.decrypt_token(self.tenant_id, data[field_name])
|
||||
|
||||
self.provider_config_cache.set(dict(data))
|
||||
return data
|
||||
|
||||
|
||||
def create_provider_encrypter(tenant_id: str, config: list[BasicProviderConfig], cache: ProviderConfigCache):
|
||||
return ProviderConfigEncrypter(tenant_id=tenant_id, config=config, provider_config_cache=cache), cache
|
||||
138
dify/api/core/helper/ssrf_proxy.py
Normal file
138
dify/api/core/helper/ssrf_proxy.py
Normal file
@@ -0,0 +1,138 @@
|
||||
"""
|
||||
Proxy requests to avoid SSRF
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
from configs import dify_config
|
||||
from core.helper.http_client_pooling import get_pooled_http_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
|
||||
|
||||
BACKOFF_FACTOR = 0.5
|
||||
STATUS_FORCELIST = [429, 500, 502, 503, 504]
|
||||
|
||||
_SSL_VERIFIED_POOL_KEY = "ssrf:verified"
|
||||
_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
|
||||
_SSRF_CLIENT_LIMITS = httpx.Limits(
|
||||
max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS,
|
||||
max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS,
|
||||
keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY,
|
||||
)
|
||||
|
||||
|
||||
class MaxRetriesExceededError(ValueError):
|
||||
"""Raised when the maximum number of retries is exceeded."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]:
|
||||
return {
|
||||
"http://": httpx.HTTPTransport(
|
||||
proxy=dify_config.SSRF_PROXY_HTTP_URL,
|
||||
),
|
||||
"https://": httpx.HTTPTransport(
|
||||
proxy=dify_config.SSRF_PROXY_HTTPS_URL,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _build_ssrf_client(verify: bool) -> httpx.Client:
|
||||
if dify_config.SSRF_PROXY_ALL_URL:
|
||||
return httpx.Client(
|
||||
proxy=dify_config.SSRF_PROXY_ALL_URL,
|
||||
verify=verify,
|
||||
limits=_SSRF_CLIENT_LIMITS,
|
||||
)
|
||||
|
||||
if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
|
||||
return httpx.Client(
|
||||
mounts=_create_proxy_mounts(),
|
||||
verify=verify,
|
||||
limits=_SSRF_CLIENT_LIMITS,
|
||||
)
|
||||
|
||||
return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS)
|
||||
|
||||
|
||||
def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
|
||||
if not isinstance(ssl_verify_enabled, bool):
|
||||
raise ValueError("SSRF client verify flag must be a boolean")
|
||||
|
||||
return get_pooled_http_client(
|
||||
_SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY,
|
||||
lambda: _build_ssrf_client(verify=ssl_verify_enabled),
|
||||
)
|
||||
|
||||
|
||||
def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
if "allow_redirects" in kwargs:
|
||||
allow_redirects = kwargs.pop("allow_redirects")
|
||||
if "follow_redirects" not in kwargs:
|
||||
kwargs["follow_redirects"] = allow_redirects
|
||||
|
||||
if "timeout" not in kwargs:
|
||||
kwargs["timeout"] = httpx.Timeout(
|
||||
timeout=dify_config.SSRF_DEFAULT_TIME_OUT,
|
||||
connect=dify_config.SSRF_DEFAULT_CONNECT_TIME_OUT,
|
||||
read=dify_config.SSRF_DEFAULT_READ_TIME_OUT,
|
||||
write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT,
|
||||
)
|
||||
|
||||
# prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
|
||||
verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
|
||||
client = _get_ssrf_client(verify_option)
|
||||
|
||||
retries = 0
|
||||
while retries <= max_retries:
|
||||
try:
|
||||
response = client.request(method=method, url=url, **kwargs)
|
||||
|
||||
if response.status_code not in STATUS_FORCELIST:
|
||||
return response
|
||||
else:
|
||||
logger.warning(
|
||||
"Received status code %s for URL %s which is in the force list",
|
||||
response.status_code,
|
||||
url,
|
||||
)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.warning("Request to URL %s failed on attempt %s: %s", url, retries + 1, e)
|
||||
if max_retries == 0:
|
||||
raise
|
||||
|
||||
retries += 1
|
||||
if retries <= max_retries:
|
||||
time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1)))
|
||||
raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
|
||||
|
||||
|
||||
def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
return make_request("GET", url, max_retries=max_retries, **kwargs)
|
||||
|
||||
|
||||
def post(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
return make_request("POST", url, max_retries=max_retries, **kwargs)
|
||||
|
||||
|
||||
def put(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
return make_request("PUT", url, max_retries=max_retries, **kwargs)
|
||||
|
||||
|
||||
def patch(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
return make_request("PATCH", url, max_retries=max_retries, **kwargs)
|
||||
|
||||
|
||||
def delete(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
return make_request("DELETE", url, max_retries=max_retries, **kwargs)
|
||||
|
||||
|
||||
def head(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
return make_request("HEAD", url, max_retries=max_retries, **kwargs)
|
||||
49
dify/api/core/helper/tool_parameter_cache.py
Normal file
49
dify/api/core/helper/tool_parameter_cache.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import json
|
||||
from enum import StrEnum
|
||||
from json import JSONDecodeError
|
||||
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
|
||||
class ToolParameterCacheType(StrEnum):
|
||||
PARAMETER = "tool_parameter"
|
||||
|
||||
|
||||
class ToolParameterCache:
|
||||
def __init__(
|
||||
self, tenant_id: str, provider: str, tool_name: str, cache_type: ToolParameterCacheType, identity_id: str
|
||||
):
|
||||
self.cache_key = (
|
||||
f"{cache_type}_secret:tenant_id:{tenant_id}:provider:{provider}:tool_name:{tool_name}"
|
||||
f":identity_id:{identity_id}"
|
||||
)
|
||||
|
||||
def get(self) -> dict | None:
|
||||
"""
|
||||
Get cached model provider credentials.
|
||||
|
||||
:return:
|
||||
"""
|
||||
cached_tool_parameter = redis_client.get(self.cache_key)
|
||||
if cached_tool_parameter:
|
||||
try:
|
||||
cached_tool_parameter = cached_tool_parameter.decode("utf-8")
|
||||
cached_tool_parameter = json.loads(cached_tool_parameter)
|
||||
except JSONDecodeError:
|
||||
return None
|
||||
|
||||
return dict(cached_tool_parameter)
|
||||
else:
|
||||
return None
|
||||
|
||||
def set(self, parameters: dict):
|
||||
"""Cache model provider credentials."""
|
||||
redis_client.setex(self.cache_key, 86400, json.dumps(parameters))
|
||||
|
||||
def delete(self):
|
||||
"""
|
||||
Delete cached model provider credentials.
|
||||
|
||||
:return:
|
||||
"""
|
||||
redis_client.delete(self.cache_key)
|
||||
105
dify/api/core/helper/trace_id_helper.py
Normal file
105
dify/api/core/helper/trace_id_helper.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import contextlib
|
||||
import re
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
|
||||
def is_valid_trace_id(trace_id: str) -> bool:
|
||||
"""
|
||||
Check if the trace_id is valid.
|
||||
|
||||
Requirements: 1-128 characters, only letters, numbers, '-', and '_'.
|
||||
"""
|
||||
return bool(re.match(r"^[a-zA-Z0-9\-_]{1,128}$", trace_id))
|
||||
|
||||
|
||||
def get_external_trace_id(request: Any) -> str | None:
|
||||
"""
|
||||
Retrieve the trace_id from the request.
|
||||
|
||||
Priority:
|
||||
1. header ('X-Trace-Id')
|
||||
2. parameters
|
||||
3. JSON body
|
||||
4. Current OpenTelemetry context (if enabled)
|
||||
5. OpenTelemetry traceparent header (if present and valid)
|
||||
|
||||
Returns None if no valid trace_id is provided.
|
||||
"""
|
||||
trace_id = request.headers.get("X-Trace-Id")
|
||||
|
||||
if not trace_id:
|
||||
trace_id = request.args.get("trace_id")
|
||||
|
||||
if not trace_id and getattr(request, "is_json", False):
|
||||
json_data = getattr(request, "json", None)
|
||||
if json_data:
|
||||
trace_id = json_data.get("trace_id")
|
||||
|
||||
if not trace_id:
|
||||
trace_id = get_trace_id_from_otel_context()
|
||||
|
||||
if not trace_id:
|
||||
traceparent = request.headers.get("traceparent")
|
||||
if traceparent:
|
||||
trace_id = parse_traceparent_header(traceparent)
|
||||
|
||||
if isinstance(trace_id, str) and is_valid_trace_id(trace_id):
|
||||
return trace_id
|
||||
return None
|
||||
|
||||
|
||||
def extract_external_trace_id_from_args(args: Mapping[str, Any]):
|
||||
"""
|
||||
Extract 'external_trace_id' from args.
|
||||
|
||||
Returns a dict suitable for use in extras. Returns an empty dict if not found.
|
||||
"""
|
||||
trace_id = args.get("external_trace_id")
|
||||
if trace_id:
|
||||
return {"external_trace_id": trace_id}
|
||||
return {}
|
||||
|
||||
|
||||
def get_trace_id_from_otel_context() -> str | None:
|
||||
"""
|
||||
Retrieve the current trace ID from the active OpenTelemetry trace context.
|
||||
Returns None if:
|
||||
1. OpenTelemetry SDK is not installed or enabled.
|
||||
2. There is no active span or trace context.
|
||||
"""
|
||||
try:
|
||||
from opentelemetry.trace import SpanContext, get_current_span
|
||||
from opentelemetry.trace.span import INVALID_TRACE_ID
|
||||
|
||||
span = get_current_span()
|
||||
if not span:
|
||||
return None
|
||||
|
||||
span_context: SpanContext = span.get_span_context()
|
||||
|
||||
if not span_context or span_context.trace_id == INVALID_TRACE_ID:
|
||||
return None
|
||||
|
||||
trace_id_hex = f"{span_context.trace_id:032x}"
|
||||
return trace_id_hex
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def parse_traceparent_header(traceparent: str) -> str | None:
|
||||
"""
|
||||
Parse the `traceparent` header to extract the trace_id.
|
||||
|
||||
Expected format:
|
||||
'version-trace_id-span_id-flags'
|
||||
|
||||
Reference:
|
||||
W3C Trace Context Specification: https://www.w3.org/TR/trace-context/
|
||||
"""
|
||||
with contextlib.suppress(Exception):
|
||||
parts = traceparent.split("-")
|
||||
if len(parts) == 4 and len(parts[1]) == 32:
|
||||
return parts[1]
|
||||
return None
|
||||
Reference in New Issue
Block a user