This commit is contained in:
2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
# Get the path argument if provided
PATH_TO_CHECK="$1"
# Determine CPU core count based on OS
CPU_CORES=$(
if [[ "$(uname -s)" == "Darwin" ]]; then
sysctl -n hw.ncpu 2>/dev/null
else
nproc
fi
)
# Run basedpyright checks
uv run --directory api --dev -- basedpyright --threads "$CPU_CORES" $PATH_TO_CHECK

View File

@@ -0,0 +1,20 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
# ModelRuntime
dev/pytest/pytest_model_runtime.sh
# Tools
dev/pytest/pytest_tools.sh
# Workflow
dev/pytest/pytest_workflow.sh
# Unit tests
dev/pytest/pytest_unit_tests.sh
# TestContainers tests
dev/pytest/pytest_testcontainers.sh

View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-120}"
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/artifact_tests/

View File

@@ -0,0 +1,116 @@
from pathlib import Path
import yaml # type: ignore
from dotenv import dotenv_values
BASE_API_AND_DOCKER_CONFIG_SET_DIFF = {
"APP_MAX_EXECUTION_TIME",
"BATCH_UPLOAD_LIMIT",
"CELERY_BEAT_SCHEDULER_TIME",
"CODE_EXECUTION_API_KEY",
"HTTP_REQUEST_MAX_CONNECT_TIMEOUT",
"HTTP_REQUEST_MAX_READ_TIMEOUT",
"HTTP_REQUEST_MAX_WRITE_TIMEOUT",
"INNER_API_KEY",
"INNER_API_KEY_FOR_PLUGIN",
"KEYWORD_DATA_SOURCE_TYPE",
"LOGIN_LOCKOUT_DURATION",
"LOG_FORMAT",
"OCI_ACCESS_KEY",
"OCI_BUCKET_NAME",
"OCI_ENDPOINT",
"OCI_REGION",
"OCI_SECRET_KEY",
"PLUGIN_DAEMON_KEY",
"PLUGIN_DAEMON_URL",
"PLUGIN_REMOTE_INSTALL_HOST",
"PLUGIN_REMOTE_INSTALL_PORT",
"REDIS_DB",
"RESEND_API_URL",
"RESPECT_XFORWARD_HEADERS_ENABLED",
"SENTRY_DSN",
"SSRF_DEFAULT_CONNECT_TIME_OUT",
"SSRF_DEFAULT_MAX_RETRIES",
"SSRF_DEFAULT_READ_TIME_OUT",
"SSRF_DEFAULT_TIME_OUT",
"SSRF_DEFAULT_WRITE_TIME_OUT",
"UPSTASH_VECTOR_TOKEN",
"UPSTASH_VECTOR_URL",
"USING_UGC_INDEX",
"WEAVIATE_BATCH_SIZE",
"WEAVIATE_GRPC_ENABLED",
}
BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF = {
"BATCH_UPLOAD_LIMIT",
"CELERY_BEAT_SCHEDULER_TIME",
"HTTP_REQUEST_MAX_CONNECT_TIMEOUT",
"HTTP_REQUEST_MAX_READ_TIMEOUT",
"HTTP_REQUEST_MAX_WRITE_TIMEOUT",
"INNER_API_KEY",
"INNER_API_KEY_FOR_PLUGIN",
"KEYWORD_DATA_SOURCE_TYPE",
"LOGIN_LOCKOUT_DURATION",
"LOG_FORMAT",
"OPENDAL_FS_ROOT",
"OPENDAL_S3_ACCESS_KEY_ID",
"OPENDAL_S3_BUCKET",
"OPENDAL_S3_ENDPOINT",
"OPENDAL_S3_REGION",
"OPENDAL_S3_ROOT",
"OPENDAL_S3_SECRET_ACCESS_KEY",
"OPENDAL_S3_SERVER_SIDE_ENCRYPTION",
"PGVECTOR_MAX_CONNECTION",
"PGVECTOR_MIN_CONNECTION",
"PGVECTO_RS_DATABASE",
"PGVECTO_RS_HOST",
"PGVECTO_RS_PASSWORD",
"PGVECTO_RS_PORT",
"PGVECTO_RS_USER",
"PLUGIN_DAEMON_KEY",
"PLUGIN_DAEMON_URL",
"PLUGIN_REMOTE_INSTALL_HOST",
"PLUGIN_REMOTE_INSTALL_PORT",
"RESPECT_XFORWARD_HEADERS_ENABLED",
"SCARF_NO_ANALYTICS",
"SSRF_DEFAULT_CONNECT_TIME_OUT",
"SSRF_DEFAULT_MAX_RETRIES",
"SSRF_DEFAULT_READ_TIME_OUT",
"SSRF_DEFAULT_TIME_OUT",
"SSRF_DEFAULT_WRITE_TIME_OUT",
"STORAGE_OPENDAL_SCHEME",
"SUPABASE_API_KEY",
"SUPABASE_BUCKET_NAME",
"SUPABASE_URL",
"USING_UGC_INDEX",
"VIKINGDB_CONNECTION_TIMEOUT",
"VIKINGDB_SOCKET_TIMEOUT",
"WEAVIATE_BATCH_SIZE",
"WEAVIATE_GRPC_ENABLED",
}
API_CONFIG_SET = set(dotenv_values(Path("api") / Path(".env.example")).keys())
DOCKER_CONFIG_SET = set(dotenv_values(Path("docker") / Path(".env.example")).keys())
DOCKER_COMPOSE_CONFIG_SET = set()
with open(Path("docker") / Path("docker-compose.yaml")) as f:
DOCKER_COMPOSE_CONFIG_SET = set(yaml.safe_load(f.read())["x-shared-env"].keys())
def test_yaml_config():
# python set == operator is used to compare two sets
DIFF_API_WITH_DOCKER = API_CONFIG_SET - DOCKER_CONFIG_SET - BASE_API_AND_DOCKER_CONFIG_SET_DIFF
if DIFF_API_WITH_DOCKER:
print(f"API and Docker config sets are different with key: {DIFF_API_WITH_DOCKER}")
raise Exception("API and Docker config sets are different")
DIFF_API_WITH_DOCKER_COMPOSE = (
API_CONFIG_SET - DOCKER_COMPOSE_CONFIG_SET - BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF
)
if DIFF_API_WITH_DOCKER_COMPOSE:
print(f"API and Docker Compose config sets are different with key: {DIFF_API_WITH_DOCKER_COMPOSE}")
raise Exception("API and Docker Compose config sets are different")
print("All tests passed!")
if __name__ == "__main__":
test_yaml_config()

View File

@@ -0,0 +1,18 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-180}"
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/model_runtime/anthropic \
api/tests/integration_tests/model_runtime/azure_openai \
api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
api/tests/integration_tests/model_runtime/upstage \
api/tests/integration_tests/model_runtime/fireworks \
api/tests/integration_tests/model_runtime/nomic \
api/tests/integration_tests/model_runtime/mixedbread \
api/tests/integration_tests/model_runtime/voyage

View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-120}"
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/test_containers_integration_tests

View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-120}"
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/tools

View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-20}"
# libs
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/unit_tests

View File

@@ -0,0 +1,23 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-180}"
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/vdb/chroma \
api/tests/integration_tests/vdb/milvus \
api/tests/integration_tests/vdb/pgvecto_rs \
api/tests/integration_tests/vdb/pgvector \
api/tests/integration_tests/vdb/qdrant \
api/tests/integration_tests/vdb/weaviate \
api/tests/integration_tests/vdb/elasticsearch \
api/tests/integration_tests/vdb/vikingdb \
api/tests/integration_tests/vdb/baidu \
api/tests/integration_tests/vdb/tcvectordb \
api/tests/integration_tests/vdb/upstash \
api/tests/integration_tests/vdb/couchbase \
api/tests/integration_tests/vdb/oceanbase \
api/tests/integration_tests/vdb/tidb_vector \
api/tests/integration_tests/vdb/huawei \

View File

@@ -0,0 +1,9 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../.."
PYTEST_TIMEOUT="${PYTEST_TIMEOUT:-120}"
pytest --timeout "${PYTEST_TIMEOUT}" api/tests/integration_tests/workflow

21
dify/dev/reformat Normal file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
# Import linter
uv run --directory api --dev lint-imports
# run ruff linter
uv run --directory api --dev ruff check --fix ./
# run ruff formatter
uv run --directory api --dev ruff format ./
# run dotenv-linter linter
uv run --project api --dev dotenv-linter ./api/.env.example ./web/.env.example
# run basedpyright check
dev/basedpyright-check

10
dify/dev/start-api Normal file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
uv --directory api run \
flask run --host 0.0.0.0 --port=5001 --debug

60
dify/dev/start-beat Normal file
View File

@@ -0,0 +1,60 @@
#!/bin/bash
set -x
# Help function
show_help() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " --loglevel LEVEL Log level (default: INFO)"
echo " --scheduler SCHEDULER Scheduler class (default: celery.beat:PersistentScheduler)"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0"
echo " $0 --loglevel DEBUG"
echo " $0 --scheduler django_celery_beat.schedulers:DatabaseScheduler"
echo ""
echo "Description:"
echo " Starts Celery Beat scheduler for periodic task execution."
echo " Beat sends scheduled tasks to worker queues at specified intervals."
}
# Parse command line arguments
LOGLEVEL="INFO"
SCHEDULER="celery.beat:PersistentScheduler"
while [[ $# -gt 0 ]]; do
case $1 in
--loglevel)
LOGLEVEL="$2"
shift 2
;;
--scheduler)
SCHEDULER="$2"
shift 2
;;
-h|--help)
show_help
exit 0
;;
*)
echo "Unknown option: $1"
show_help
exit 1
;;
esac
done
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
echo "Starting Celery Beat with:"
echo " Log Level: ${LOGLEVEL}"
echo " Scheduler: ${SCHEDULER}"
uv --directory api run \
celery -A app.celery beat \
--loglevel ${LOGLEVEL} \
--scheduler ${SCHEDULER}

8
dify/dev/start-web Normal file
View File

@@ -0,0 +1,8 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/../web"
pnpm install && pnpm dev

127
dify/dev/start-worker Normal file
View File

@@ -0,0 +1,127 @@
#!/bin/bash
set -x
# Help function
show_help() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " -q, --queues QUEUES Comma-separated list of queues to process"
echo " -c, --concurrency NUM Number of worker processes (default: 1)"
echo " -P, --pool POOL Pool implementation (default: gevent)"
echo " --loglevel LEVEL Log level (default: INFO)"
echo " -e, --env-file FILE Path to an env file to source before starting"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
echo " $0 --queues dataset,workflow"
echo " $0 --queues workflow_professional,workflow_team --concurrency 4"
echo " $0 --queues dataset --concurrency 2 --pool prefork"
echo ""
echo "Available queues:"
echo " dataset - RAG indexing and document processing"
echo " workflow - Workflow triggers (community edition)"
echo " workflow_professional - Professional tier workflows (cloud edition)"
echo " workflow_team - Team tier workflows (cloud edition)"
echo " workflow_sandbox - Sandbox tier workflows (cloud edition)"
echo " schedule_poller - Schedule polling tasks"
echo " schedule_executor - Schedule execution tasks"
echo " mail - Email notifications"
echo " ops_trace - Operations tracing"
echo " app_deletion - Application cleanup"
echo " plugin - Plugin operations"
echo " workflow_storage - Workflow storage tasks"
echo " conversation - Conversation tasks"
echo " priority_pipeline - High priority pipeline tasks"
echo " pipeline - Standard pipeline tasks"
echo " triggered_workflow_dispatcher - Trigger dispatcher tasks"
echo " trigger_refresh_executor - Trigger refresh tasks"
}
# Parse command line arguments
QUEUES=""
CONCURRENCY=1
POOL="gevent"
LOGLEVEL="INFO"
ENV_FILE=""
while [[ $# -gt 0 ]]; do
case $1 in
-q|--queues)
QUEUES="$2"
shift 2
;;
-c|--concurrency)
CONCURRENCY="$2"
shift 2
;;
-P|--pool)
POOL="$2"
shift 2
;;
--loglevel)
LOGLEVEL="$2"
shift 2
;;
-e|--env-file)
ENV_FILE="$2"
shift 2
;;
-h|--help)
show_help
exit 0
;;
*)
echo "Unknown option: $1"
show_help
exit 1
;;
esac
done
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
if [[ -n "${ENV_FILE}" ]]; then
if [[ ! -f "${ENV_FILE}" ]]; then
echo "Env file ${ENV_FILE} not found"
exit 1
fi
echo "Loading environment variables from ${ENV_FILE}"
# Export everything sourced from the env file
set -a
source "${ENV_FILE}"
set +a
fi
# If no queues specified, use edition-based defaults
if [[ -z "${QUEUES}" ]]; then
# Get EDITION from environment, default to SELF_HOSTED (community edition)
EDITION=${EDITION:-"SELF_HOSTED"}
# Configure queues based on edition
if [[ "${EDITION}" == "CLOUD" ]]; then
# Cloud edition: separate queues for dataset and trigger tasks
QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
else
# Community edition (SELF_HOSTED): dataset and workflow have separate queues
QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
fi
echo "No queues specified, using edition-based defaults: ${QUEUES}"
else
echo "Using specified queues: ${QUEUES}"
fi
echo "Starting Celery worker with:"
echo " Queues: ${QUEUES}"
echo " Concurrency: ${CONCURRENCY}"
echo " Pool: ${POOL}"
echo " Log Level: ${LOGLEVEL}"
uv --directory api run \
celery -A app.celery worker \
-P ${POOL} -c ${CONCURRENCY} --loglevel ${LOGLEVEL} -Q ${QUEUES}

13
dify/dev/sync-uv Normal file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
# rely on `uv` in path
if ! command -v uv &> /dev/null; then
echo "Installing uv ..."
pip install uv
fi
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
# check uv.lock in sync with pyproject.toml
uv lock --project api

10
dify/dev/ty-check Normal file
View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -x
SCRIPT_DIR="$(dirname "$(realpath "$0")")"
cd "$SCRIPT_DIR/.."
# run ty checks
uv run --directory api --dev \
ty check

22
dify/dev/update-uv Normal file
View File

@@ -0,0 +1,22 @@
#!/bin/bash
# Update dependencies in dify/api project using uv
set -e
set -o pipefail
SCRIPT_DIR="$(dirname "$0")"
REPO_ROOT="$(dirname "${SCRIPT_DIR}")"
# rely on `poetry` in path
if ! command -v uv &> /dev/null; then
echo "Installing uv ..."
pip install uv
fi
cd "${REPO_ROOT}"
# refreshing lockfile, updating locked versions
uv lock --project api --upgrade
# check uv.lock in sync with pyproject.toml
uv lock --project api --check