dify

2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions
--- a/dify/api/repositories/init.py
+++ b/dify/api/repositories/init.py
--- a/dify/api/repositories/api_workflow_node_execution_repository.py
+++ b/dify/api/repositories/api_workflow_node_execution_repository.py
@@ -0,0 +1,197 @@
+"""
+Service-layer repository protocol for WorkflowNodeExecutionModel operations.
+
+This module provides a protocol interface for service-layer operations on WorkflowNodeExecutionModel
+that abstracts database queries currently done directly in service classes. This repository is
+specifically designed for service-layer needs and is separate from the core domain repository.
+
+The service repository handles operations that require access to database-specific fields like
+tenant_id, app_id, triggered_from, etc., which are not part of the core domain model.
+"""
+
+from collections.abc import Sequence
+from datetime import datetime
+from typing import Protocol
+
+from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
+from models.workflow import WorkflowNodeExecutionModel
+
+
+class DifyAPIWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository, Protocol):
+    """
+    Protocol for service-layer operations on WorkflowNodeExecutionModel.
+
+    This repository provides database access patterns specifically needed by service classes,
+    handling queries that involve database-specific fields and multi-tenancy concerns.
+
+    Key responsibilities:
+    - Manages database operations for workflow node executions
+    - Handles multi-tenant data isolation
+    - Provides batch processing capabilities
+    - Supports execution lifecycle management
+
+    Implementation notes:
+    - Returns database models directly (WorkflowNodeExecutionModel)
+    - Handles tenant/app filtering automatically
+    - Provides service-specific query patterns
+    - Focuses on database operations without domain logic
+    - Supports cleanup and maintenance operations
+    """
+
+    def get_node_last_execution(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_id: str,
+        node_id: str,
+    ) -> WorkflowNodeExecutionModel | None:
+        """
+        Get the most recent execution for a specific node.
+
+        This method finds the latest execution of a specific node within a workflow,
+        ordered by creation time. Used primarily for debugging and inspection purposes.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_id: The workflow identifier
+            node_id: The node identifier
+
+        Returns:
+            The most recent WorkflowNodeExecutionModel for the node, or None if not found
+        """
+        ...
+
+    def get_executions_by_workflow_run(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_run_id: str,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get all node executions for a specific workflow run.
+
+        This method retrieves all node executions that belong to a specific workflow run,
+        ordered by index in descending order for proper trace visualization.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_run_id: The workflow run identifier
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
+        """
+        ...
+
+    def get_execution_by_id(
+        self,
+        execution_id: str,
+        tenant_id: str | None = None,
+    ) -> WorkflowNodeExecutionModel | None:
+        """
+        Get a workflow node execution by its ID.
+
+        This method retrieves a specific execution by its unique identifier.
+        Tenant filtering is optional for cases where the execution ID is globally unique.
+
+        When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
+        If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
+        set `tenant_id` to prevent horizontal privilege escalation.
+
+        Args:
+            execution_id: The execution identifier
+            tenant_id: Optional tenant identifier for additional filtering
+
+        Returns:
+            The WorkflowNodeExecutionModel if found, or None if not found
+        """
+        ...
+
+    def delete_expired_executions(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete workflow node executions that are older than the specified date.
+
+        This method is used for cleanup operations to remove expired executions
+        in batches to avoid overwhelming the database.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Delete executions created before this date
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The number of executions deleted
+        """
+        ...
+
+    def delete_executions_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow node executions for a specific app.
+
+        This method is used when removing an app and all its related data.
+        Executions are deleted in batches to avoid overwhelming the database.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The total number of executions deleted
+        """
+        ...
+
+    def get_expired_executions_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get a batch of expired workflow node executions for backup purposes.
+
+        This method retrieves expired executions without deleting them,
+        allowing the caller to backup the data before deletion.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Get executions created before this date
+            batch_size: Maximum number of executions to retrieve
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances
+        """
+        ...
+
+    def delete_executions_by_ids(
+        self,
+        execution_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow node executions by their IDs.
+
+        This method deletes specific executions by their IDs,
+        typically used after backing up the data.
+
+        This method does not perform tenant isolation checks. The caller is responsible for ensuring proper
+        data isolation between tenants. When execution IDs come from untrusted sources (e.g., API requests),
+        additional tenant validation should be implemented to prevent unauthorized access.
+
+        Args:
+            execution_ids: List of execution IDs to delete
+
+        Returns:
+            The number of executions deleted
+        """
+        ...
--- a/dify/api/repositories/api_workflow_run_repository.py
+++ b/dify/api/repositories/api_workflow_run_repository.py
@@ -0,0 +1,479 @@
+"""
+API WorkflowRun Repository Protocol
+
+This module defines the protocol for service-layer WorkflowRun operations.
+The repository provides an abstraction layer for WorkflowRun database operations
+used by service classes, separating service-layer concerns from core domain logic.
+
+Key Features:
+- Paginated workflow run queries with filtering
+- Bulk deletion operations with OSS backup support
+- Multi-tenant data isolation
+- Expired record cleanup with data retention
+- Service-layer specific query patterns
+
+Usage:
+    This protocol should be used by service classes that need to perform
+    WorkflowRun database operations. It provides a clean interface that
+    hides implementation details and supports dependency injection.
+
+Example:
+    ```python
+    from repositories.dify_api_repository_factory import DifyAPIRepositoryFactory
+
+    session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+    repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+    # Get paginated workflow runs
+    runs = repo.get_paginated_workflow_runs(
+        tenant_id="tenant-123",
+        app_id="app-456",
+        triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
+        limit=20
+    )
+    ```
+"""
+
+from collections.abc import Sequence
+from datetime import datetime
+from typing import Protocol
+
+from core.workflow.entities.workflow_pause import WorkflowPauseEntity
+from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
+from libs.infinite_scroll_pagination import InfiniteScrollPagination
+from models.enums import WorkflowRunTriggeredFrom
+from models.workflow import WorkflowRun
+from repositories.types import (
+    AverageInteractionStats,
+    DailyRunsStats,
+    DailyTerminalsStats,
+    DailyTokenCostStats,
+)
+
+
+class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
+    """
+    Protocol for service-layer WorkflowRun repository operations.
+
+    This protocol defines the interface for WorkflowRun database operations
+    that are specific to service-layer needs, including pagination, filtering,
+    and bulk operations with data backup support.
+    """
+
+    def get_paginated_workflow_runs(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
+        limit: int = 20,
+        last_id: str | None = None,
+        status: str | None = None,
+    ) -> InfiniteScrollPagination:
+        """
+        Get paginated workflow runs with filtering.
+
+        Retrieves workflow runs for a specific app and trigger source with
+        cursor-based pagination support. Used primarily for debugging and
+        workflow run listing in the UI.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source(s) (e.g., "debugging", "app-run", or list of values)
+            limit: Maximum number of records to return (default: 20)
+            last_id: Cursor for pagination - ID of the last record from previous page
+            status: Optional filter by status (e.g., "running", "succeeded", "failed")
+
+        Returns:
+            InfiniteScrollPagination object containing:
+            - data: List of WorkflowRun objects
+            - limit: Applied limit
+            - has_more: Boolean indicating if more records exist
+
+        Raises:
+            ValueError: If last_id is provided but the corresponding record doesn't exist
+        """
+        ...
+
+    def get_workflow_run_by_id(
+        self,
+        tenant_id: str,
+        app_id: str,
+        run_id: str,
+    ) -> WorkflowRun | None:
+        """
+        Get a specific workflow run by ID.
+
+        Retrieves a single workflow run with tenant and app isolation.
+        Used for workflow run detail views and execution tracking.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            run_id: Workflow run identifier
+
+        Returns:
+            WorkflowRun object if found, None otherwise
+        """
+        ...
+
+    def get_workflow_run_by_id_without_tenant(
+        self,
+        run_id: str,
+    ) -> WorkflowRun | None:
+        """
+        Get a specific workflow run by ID without tenant/app context.
+
+        Retrieves a single workflow run using only the run ID, without
+        requiring tenant_id or app_id. This method is intended for internal
+        system operations like tracing and monitoring where the tenant context
+        is not available upfront.
+
+        Args:
+            run_id: Workflow run identifier
+
+        Returns:
+            WorkflowRun object if found, None otherwise
+
+        Note:
+            This method bypasses tenant isolation checks and should only be used
+            in trusted system contexts like ops trace collection. For user-facing
+            operations, use get_workflow_run_by_id() with proper tenant isolation.
+        """
+        ...
+
+    def get_workflow_runs_count(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        status: str | None = None,
+        time_range: str | None = None,
+    ) -> dict[str, int]:
+        """
+        Get workflow runs count statistics.
+
+        Retrieves total count and count by status for workflow runs
+        matching the specified filters.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source (e.g., "debugging", "app-run")
+            status: Optional filter by specific status
+            time_range: Optional time range filter (e.g., "7d", "4h", "30m", "30s")
+                       Filters records based on created_at field
+
+        Returns:
+            Dictionary containing:
+            - total: Total count of all workflow runs (or filtered by status)
+            - running: Count of workflow runs with status "running"
+            - succeeded: Count of workflow runs with status "succeeded"
+            - failed: Count of workflow runs with status "failed"
+            - stopped: Count of workflow runs with status "stopped"
+            - partial_succeeded: Count of workflow runs with status "partial-succeeded"
+
+            Note: If a status is provided, 'total' will be the count for that status,
+            and the specific status count will also be set to this value, with all
+            other status counts being 0.
+        """
+        ...
+
+    def get_expired_runs_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowRun]:
+        """
+        Get a batch of expired workflow runs for cleanup.
+
+        Retrieves workflow runs created before the specified date for
+        cleanup operations. Used by scheduled tasks to remove old data
+        while maintaining data retention policies.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            before_date: Only return runs created before this date
+            batch_size: Maximum number of records to return
+
+        Returns:
+            Sequence of WorkflowRun objects to be processed for cleanup
+        """
+        ...
+
+    def delete_runs_by_ids(
+        self,
+        run_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow runs by their IDs.
+
+        Performs bulk deletion of workflow runs by ID. This method should
+        be used after backing up the data to OSS storage for retention.
+
+        Args:
+            run_ids: Sequence of workflow run IDs to delete
+
+        Returns:
+            Number of records actually deleted
+
+        Note:
+            This method performs hard deletion. Ensure data is backed up
+            to OSS storage before calling this method for compliance with
+            data retention policies.
+        """
+        ...
+
+    def delete_runs_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow runs for a specific app.
+
+        Performs bulk deletion of all workflow runs associated with an app.
+        Used during app cleanup operations. Processes records in batches
+        to avoid memory issues and long-running transactions.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            batch_size: Number of records to process in each batch
+
+        Returns:
+            Total number of records deleted across all batches
+
+        Note:
+            This method performs hard deletion without backup. Use with caution
+            and ensure proper data retention policies are followed.
+        """
+        ...
+
+    def create_workflow_pause(
+        self,
+        workflow_run_id: str,
+        state_owner_user_id: str,
+        state: str,
+    ) -> WorkflowPauseEntity:
+        """
+        Create a new workflow pause state.
+
+        Creates a pause state for a workflow run, storing the current execution
+        state and marking the workflow as paused. This is used when a workflow
+        needs to be suspended and later resumed.
+
+        Args:
+            workflow_run_id: Identifier of the workflow run to pause
+            state_owner_user_id: User ID who owns the pause state for file storage
+            state: Serialized workflow execution state (JSON string)
+
+        Returns:
+            WorkflowPauseEntity representing the created pause state
+
+        Raises:
+            ValueError: If workflow_run_id is invalid or workflow run doesn't exist
+            RuntimeError: If workflow is already paused or in invalid state
+        """
+        # NOTE: we may get rid of the `state_owner_user_id` in parameter list.
+        # However, removing it would require an extra for `Workflow` model
+        # while creating pause.
+        ...
+
+    def resume_workflow_pause(
+        self,
+        workflow_run_id: str,
+        pause_entity: WorkflowPauseEntity,
+    ) -> WorkflowPauseEntity:
+        """
+        Resume a paused workflow.
+
+        Marks a paused workflow as resumed, set the `resumed_at` field of WorkflowPauseEntity
+        and returning the workflow to running status. Returns the pause entity
+        that was resumed.
+
+        The returned `WorkflowPauseEntity` model has `resumed_at` set.
+
+        NOTE: this method does not delete the correspond `WorkflowPauseEntity` record and associated states.
+        It's the callers responsibility to clear the correspond state with `delete_workflow_pause`.
+
+        Args:
+            workflow_run_id: Identifier of the workflow run to resume
+            pause_entity: The pause entity to resume
+
+        Returns:
+            WorkflowPauseEntity representing the resumed pause state
+
+        Raises:
+            ValueError: If workflow_run_id is invalid
+            RuntimeError: If workflow is not paused or already resumed
+        """
+        ...
+
+    def delete_workflow_pause(
+        self,
+        pause_entity: WorkflowPauseEntity,
+    ) -> None:
+        """
+        Delete a workflow pause state.
+
+        Permanently removes the pause state for a workflow run, including
+        the stored state file. Used for cleanup operations when a paused
+        workflow is no longer needed.
+
+        Args:
+            pause_entity: The pause entity to delete
+
+        Raises:
+            ValueError: If pause_entity is invalid
+            RuntimeError: If workflow is not paused
+
+        Note:
+            This operation is irreversible. The stored workflow state will be
+            permanently deleted along with the pause record.
+        """
+        ...
+
+    def prune_pauses(
+        self,
+        expiration: datetime,
+        resumption_expiration: datetime,
+        limit: int | None = None,
+    ) -> Sequence[str]:
+        """
+        Clean up expired and old pause states.
+
+        Removes pause states that have expired (created before expiration time)
+        and pause states that were resumed more than resumption_duration ago.
+        This is used for maintenance and cleanup operations.
+
+        Args:
+            expiration: Remove pause states created before this time
+            resumption_expiration: Remove pause states resumed before this time
+            limit: maximum number of records deleted in one call
+
+        Returns:
+            a list of ids for pause records that were pruned
+
+        Raises:
+            ValueError: If parameters are invalid
+        """
+        ...
+
+    def get_daily_runs_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[DailyRunsStats]:
+        """
+        Get daily runs statistics.
+
+        Retrieves daily workflow runs count grouped by date for a specific app
+        and trigger source. Used for workflow statistics dashboard.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source (e.g., "app-run")
+            start_date: Optional start date filter
+            end_date: Optional end date filter
+            timezone: Timezone for date grouping (default: "UTC")
+
+        Returns:
+            List of dictionaries containing date and runs count:
+            [{"date": "2024-01-01", "runs": 10}, ...]
+        """
+        ...
+
+    def get_daily_terminals_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[DailyTerminalsStats]:
+        """
+        Get daily terminals statistics.
+
+        Retrieves daily unique terminal count grouped by date for a specific app
+        and trigger source. Used for workflow statistics dashboard.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source (e.g., "app-run")
+            start_date: Optional start date filter
+            end_date: Optional end date filter
+            timezone: Timezone for date grouping (default: "UTC")
+
+        Returns:
+            List of dictionaries containing date and terminal count:
+            [{"date": "2024-01-01", "terminal_count": 5}, ...]
+        """
+        ...
+
+    def get_daily_token_cost_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[DailyTokenCostStats]:
+        """
+        Get daily token cost statistics.
+
+        Retrieves daily total token count grouped by date for a specific app
+        and trigger source. Used for workflow statistics dashboard.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source (e.g., "app-run")
+            start_date: Optional start date filter
+            end_date: Optional end date filter
+            timezone: Timezone for date grouping (default: "UTC")
+
+        Returns:
+            List of dictionaries containing date and token count:
+            [{"date": "2024-01-01", "token_count": 1000}, ...]
+        """
+        ...
+
+    def get_average_app_interaction_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[AverageInteractionStats]:
+        """
+        Get average app interaction statistics.
+
+        Retrieves daily average interactions per user grouped by date for a specific app
+        and trigger source. Used for workflow statistics dashboard.
+
+        Args:
+            tenant_id: Tenant identifier for multi-tenant isolation
+            app_id: Application identifier
+            triggered_from: Filter by trigger source (e.g., "app-run")
+            start_date: Optional start date filter
+            end_date: Optional end date filter
+            timezone: Timezone for date grouping (default: "UTC")
+
+        Returns:
+            List of dictionaries containing date and average interactions:
+            [{"date": "2024-01-01", "interactions": 2.5}, ...]
+        """
+        ...
--- a/dify/api/repositories/factory.py
+++ b/dify/api/repositories/factory.py
@@ -0,0 +1,82 @@
+"""
+DifyAPI Repository Factory for creating repository instances.
+
+This factory is specifically designed for DifyAPI repositories that handle
+service-layer operations with dependency injection patterns.
+"""
+
+from sqlalchemy.orm import Session, sessionmaker
+
+from configs import dify_config
+from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError
+from libs.module_loading import import_string
+from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
+from repositories.api_workflow_run_repository import APIWorkflowRunRepository
+
+
+class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory):
+    """
+    Factory for creating DifyAPI repository instances based on configuration.
+
+    This factory handles the creation of repositories that are specifically designed
+    for service-layer operations and use dependency injection with sessionmaker
+    for better testability and separation of concerns.
+    """
+
+    @classmethod
+    def create_api_workflow_node_execution_repository(
+        cls, session_maker: sessionmaker[Session]
+    ) -> DifyAPIWorkflowNodeExecutionRepository:
+        """
+        Create a DifyAPIWorkflowNodeExecutionRepository instance based on configuration.
+
+        This repository is designed for service-layer operations and uses dependency injection
+        with a sessionmaker for better testability and separation of concerns. It provides
+        database access patterns specifically needed by service classes, handling queries
+        that involve database-specific fields and multi-tenancy concerns.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker to inject for database session management.
+
+        Returns:
+            Configured DifyAPIWorkflowNodeExecutionRepository instance
+
+        Raises:
+            RepositoryImportError: If the configured repository cannot be imported or instantiated
+        """
+        class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY
+
+        try:
+            repository_class = import_string(class_path)
+            return repository_class(session_maker=session_maker)
+        except (ImportError, Exception) as e:
+            raise RepositoryImportError(
+                f"Failed to create DifyAPIWorkflowNodeExecutionRepository from '{class_path}': {e}"
+            ) from e
+
+    @classmethod
+    def create_api_workflow_run_repository(cls, session_maker: sessionmaker[Session]) -> APIWorkflowRunRepository:
+        """
+        Create an APIWorkflowRunRepository instance based on configuration.
+
+        This repository is designed for service-layer WorkflowRun operations and uses dependency
+        injection with a sessionmaker for better testability and separation of concerns. It provides
+        database access patterns specifically needed by service classes for workflow run management,
+        including pagination, filtering, and bulk operations.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker to inject for database session management.
+
+        Returns:
+            Configured APIWorkflowRunRepository instance
+
+        Raises:
+            RepositoryImportError: If the configured repository cannot be imported or instantiated
+        """
+        class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY
+
+        try:
+            repository_class = import_string(class_path)
+            return repository_class(session_maker=session_maker)
+        except (ImportError, Exception) as e:
+            raise RepositoryImportError(f"Failed to create APIWorkflowRunRepository from '{class_path}': {e}") from e
--- a/dify/api/repositories/sqlalchemy_api_workflow_node_execution_repository.py
+++ b/dify/api/repositories/sqlalchemy_api_workflow_node_execution_repository.py
@@ -0,0 +1,292 @@
+"""
+SQLAlchemy implementation of WorkflowNodeExecutionServiceRepository.
+
+This module provides a concrete implementation of the service repository protocol
+using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
+"""
+
+from collections.abc import Sequence
+from datetime import datetime
+from typing import cast
+
+from sqlalchemy import asc, delete, desc, select
+from sqlalchemy.engine import CursorResult
+from sqlalchemy.orm import Session, sessionmaker
+
+from models.workflow import WorkflowNodeExecutionModel
+from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
+
+
+class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecutionRepository):
+    """
+    SQLAlchemy implementation of DifyAPIWorkflowNodeExecutionRepository.
+
+    This repository provides service-layer database operations for WorkflowNodeExecutionModel
+    using SQLAlchemy 2.0 style queries. It implements the DifyAPIWorkflowNodeExecutionRepository
+    protocol with the following features:
+
+    - Multi-tenancy data isolation through tenant_id filtering
+    - Direct database model operations without domain conversion
+    - Batch processing for efficient large-scale operations
+    - Optimized query patterns for common access patterns
+    - Dependency injection for better testability and maintainability
+    - Session management and transaction handling with proper cleanup
+    - Maintenance operations for data lifecycle management
+    - Thread-safe database operations using session-per-request pattern
+    """
+
+    def __init__(self, session_maker: sessionmaker[Session]):
+        """
+        Initialize the repository with a sessionmaker.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker for creating database sessions
+        """
+        self._session_maker = session_maker
+
+    def get_node_last_execution(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_id: str,
+        node_id: str,
+    ) -> WorkflowNodeExecutionModel | None:
+        """
+        Get the most recent execution for a specific node.
+
+        This method replicates the query pattern from WorkflowService.get_node_last_run()
+        using SQLAlchemy 2.0 style syntax.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_id: The workflow identifier
+            node_id: The node identifier
+
+        Returns:
+            The most recent WorkflowNodeExecutionModel for the node, or None if not found.
+
+            The returned WorkflowNodeExecutionModel will have `offload_data` preloaded.
+        """
+        stmt = select(WorkflowNodeExecutionModel)
+        stmt = WorkflowNodeExecutionModel.preload_offload_data(stmt)
+        stmt = (
+            stmt.where(
+                WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                WorkflowNodeExecutionModel.app_id == app_id,
+                WorkflowNodeExecutionModel.workflow_id == workflow_id,
+                WorkflowNodeExecutionModel.node_id == node_id,
+            )
+            .order_by(desc(WorkflowNodeExecutionModel.created_at))
+            .limit(1)
+        )
+
+        with self._session_maker() as session:
+            return session.scalar(stmt)
+
+    def get_executions_by_workflow_run(
+        self,
+        tenant_id: str,
+        app_id: str,
+        workflow_run_id: str,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get all node executions for a specific workflow run.
+
+        This method replicates the query pattern from WorkflowRunService.get_workflow_run_node_executions()
+        using SQLAlchemy 2.0 style syntax.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            workflow_run_id: The workflow run identifier
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
+        """
+        stmt = WorkflowNodeExecutionModel.preload_offload_data(select(WorkflowNodeExecutionModel))
+        stmt = stmt.where(
+            WorkflowNodeExecutionModel.tenant_id == tenant_id,
+            WorkflowNodeExecutionModel.app_id == app_id,
+            WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
+        ).order_by(asc(WorkflowNodeExecutionModel.created_at))
+
+        with self._session_maker() as session:
+            return session.execute(stmt).scalars().all()
+
+    def get_execution_by_id(
+        self,
+        execution_id: str,
+        tenant_id: str | None = None,
+    ) -> WorkflowNodeExecutionModel | None:
+        """
+        Get a workflow node execution by its ID.
+
+        This method replicates the query pattern from WorkflowDraftVariableService
+        and WorkflowService.single_step_run_workflow_node() using SQLAlchemy 2.0 style syntax.
+
+        When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
+        If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
+        set `tenant_id` to prevent horizontal privilege escalation.
+
+        Args:
+            execution_id: The execution identifier
+            tenant_id: Optional tenant identifier for additional filtering
+
+        Returns:
+            The WorkflowNodeExecutionModel if found, or None if not found
+        """
+        stmt = WorkflowNodeExecutionModel.preload_offload_data(select(WorkflowNodeExecutionModel))
+        stmt = stmt.where(WorkflowNodeExecutionModel.id == execution_id)
+
+        # Add tenant filtering if provided
+        if tenant_id is not None:
+            stmt = stmt.where(WorkflowNodeExecutionModel.tenant_id == tenant_id)
+
+        with self._session_maker() as session:
+            return session.scalar(stmt)
+
+    def delete_expired_executions(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete workflow node executions that are older than the specified date.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Delete executions created before this date
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The number of executions deleted
+        """
+        total_deleted = 0
+
+        while True:
+            with self._session_maker() as session:
+                # Find executions to delete in batches
+                stmt = (
+                    select(WorkflowNodeExecutionModel.id)
+                    .where(
+                        WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                        WorkflowNodeExecutionModel.created_at < before_date,
+                    )
+                    .limit(batch_size)
+                )
+
+                execution_ids = session.execute(stmt).scalars().all()
+                if not execution_ids:
+                    break
+
+                # Delete the batch
+                delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
+                result = cast(CursorResult, session.execute(delete_stmt))
+                session.commit()
+                total_deleted += result.rowcount
+
+                # If we deleted fewer than the batch size, we're done
+                if len(execution_ids) < batch_size:
+                    break
+
+        return total_deleted
+
+    def delete_executions_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow node executions for a specific app.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            batch_size: Maximum number of executions to delete in one batch
+
+        Returns:
+            The total number of executions deleted
+        """
+        total_deleted = 0
+
+        while True:
+            with self._session_maker() as session:
+                # Find executions to delete in batches
+                stmt = (
+                    select(WorkflowNodeExecutionModel.id)
+                    .where(
+                        WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                        WorkflowNodeExecutionModel.app_id == app_id,
+                    )
+                    .limit(batch_size)
+                )
+
+                execution_ids = session.execute(stmt).scalars().all()
+                if not execution_ids:
+                    break
+
+                # Delete the batch
+                delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
+                result = cast(CursorResult, session.execute(delete_stmt))
+                session.commit()
+                total_deleted += result.rowcount
+
+                # If we deleted fewer than the batch size, we're done
+                if len(execution_ids) < batch_size:
+                    break
+
+        return total_deleted
+
+    def get_expired_executions_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowNodeExecutionModel]:
+        """
+        Get a batch of expired workflow node executions for backup purposes.
+
+        Args:
+            tenant_id: The tenant identifier
+            before_date: Get executions created before this date
+            batch_size: Maximum number of executions to retrieve
+
+        Returns:
+            A sequence of WorkflowNodeExecutionModel instances
+        """
+        stmt = (
+            select(WorkflowNodeExecutionModel)
+            .where(
+                WorkflowNodeExecutionModel.tenant_id == tenant_id,
+                WorkflowNodeExecutionModel.created_at < before_date,
+            )
+            .limit(batch_size)
+        )
+
+        with self._session_maker() as session:
+            return session.execute(stmt).scalars().all()
+
+    def delete_executions_by_ids(
+        self,
+        execution_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow node executions by their IDs.
+
+        Args:
+            execution_ids: List of execution IDs to delete
+
+        Returns:
+            The number of executions deleted
+        """
+        if not execution_ids:
+            return 0
+
+        with self._session_maker() as session:
+            stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
+            result = cast(CursorResult, session.execute(stmt))
+            session.commit()
+            return result.rowcount
--- a/dify/api/repositories/sqlalchemy_api_workflow_run_repository.py
+++ b/dify/api/repositories/sqlalchemy_api_workflow_run_repository.py
@@ -0,0 +1,869 @@
+"""
+SQLAlchemy API WorkflowRun Repository Implementation
+
+This module provides the SQLAlchemy-based implementation of the APIWorkflowRunRepository
+protocol. It handles service-layer WorkflowRun database operations using SQLAlchemy 2.0
+style queries with proper session management and multi-tenant data isolation.
+
+Key Features:
+- SQLAlchemy 2.0 style queries for modern database operations
+- Cursor-based pagination for efficient large dataset handling
+- Bulk operations with batch processing for performance
+- Multi-tenant data isolation and security
+- Proper session management with dependency injection
+
+Implementation Notes:
+- Uses sessionmaker for consistent session management
+- Implements cursor-based pagination using created_at timestamps
+- Provides efficient bulk deletion with batch processing
+- Maintains data consistency with proper transaction handling
+"""
+
+import logging
+import uuid
+from collections.abc import Sequence
+from datetime import datetime
+from decimal import Decimal
+from typing import Any, cast
+
+import sqlalchemy as sa
+from sqlalchemy import and_, delete, func, null, or_, select
+from sqlalchemy.engine import CursorResult
+from sqlalchemy.orm import Session, selectinload, sessionmaker
+
+from core.workflow.entities.workflow_pause import WorkflowPauseEntity
+from core.workflow.enums import WorkflowExecutionStatus
+from extensions.ext_storage import storage
+from libs.datetime_utils import naive_utc_now
+from libs.helper import convert_datetime_to_date
+from libs.infinite_scroll_pagination import InfiniteScrollPagination
+from libs.time_parser import get_time_threshold
+from libs.uuid_utils import uuidv7
+from models.enums import WorkflowRunTriggeredFrom
+from models.workflow import WorkflowPause as WorkflowPauseModel
+from models.workflow import WorkflowRun
+from repositories.api_workflow_run_repository import APIWorkflowRunRepository
+from repositories.types import (
+    AverageInteractionStats,
+    DailyRunsStats,
+    DailyTerminalsStats,
+    DailyTokenCostStats,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class _WorkflowRunError(Exception):
+    pass
+
+
+class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
+    """
+    SQLAlchemy implementation of APIWorkflowRunRepository.
+
+    Provides service-layer WorkflowRun database operations using SQLAlchemy 2.0
+    style queries. Supports dependency injection through sessionmaker and
+    maintains proper multi-tenant data isolation.
+
+    Args:
+        session_maker: SQLAlchemy sessionmaker instance for database connections
+    """
+
+    def __init__(self, session_maker: sessionmaker[Session]):
+        """
+        Initialize the repository with a sessionmaker.
+
+        Args:
+            session_maker: SQLAlchemy sessionmaker for database connections
+        """
+        self._session_maker = session_maker
+
+    def get_paginated_workflow_runs(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
+        limit: int = 20,
+        last_id: str | None = None,
+        status: str | None = None,
+    ) -> InfiniteScrollPagination:
+        """
+        Get paginated workflow runs with filtering.
+
+        Implements cursor-based pagination using created_at timestamps for
+        efficient handling of large datasets. Filters by tenant, app, and
+        trigger source for proper data isolation.
+        """
+        with self._session_maker() as session:
+            # Build base query with filters
+            base_stmt = select(WorkflowRun).where(
+                WorkflowRun.tenant_id == tenant_id,
+                WorkflowRun.app_id == app_id,
+            )
+
+            # Handle triggered_from values
+            if isinstance(triggered_from, WorkflowRunTriggeredFrom):
+                triggered_from = [triggered_from]
+            if triggered_from:
+                base_stmt = base_stmt.where(WorkflowRun.triggered_from.in_(triggered_from))
+
+            # Add optional status filter
+            if status:
+                base_stmt = base_stmt.where(WorkflowRun.status == status)
+
+            if last_id:
+                # Get the last workflow run for cursor-based pagination
+                last_run_stmt = base_stmt.where(WorkflowRun.id == last_id)
+                last_workflow_run = session.scalar(last_run_stmt)
+
+                if not last_workflow_run:
+                    raise ValueError("Last workflow run not exists")
+
+                # Get records created before the last run's timestamp
+                base_stmt = base_stmt.where(
+                    WorkflowRun.created_at < last_workflow_run.created_at,
+                    WorkflowRun.id != last_workflow_run.id,
+                )
+
+            # First page - get most recent records
+            workflow_runs = session.scalars(base_stmt.order_by(WorkflowRun.created_at.desc()).limit(limit + 1)).all()
+
+            # Check if there are more records for pagination
+            has_more = len(workflow_runs) > limit
+            if has_more:
+                workflow_runs = workflow_runs[:-1]
+
+            return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
+
+    def get_workflow_run_by_id(
+        self,
+        tenant_id: str,
+        app_id: str,
+        run_id: str,
+    ) -> WorkflowRun | None:
+        """
+        Get a specific workflow run by ID with tenant and app isolation.
+        """
+        with self._session_maker() as session:
+            stmt = select(WorkflowRun).where(
+                WorkflowRun.tenant_id == tenant_id,
+                WorkflowRun.app_id == app_id,
+                WorkflowRun.id == run_id,
+            )
+            return session.scalar(stmt)
+
+    def get_workflow_run_by_id_without_tenant(
+        self,
+        run_id: str,
+    ) -> WorkflowRun | None:
+        """
+        Get a specific workflow run by ID without tenant/app context.
+        """
+        with self._session_maker() as session:
+            stmt = select(WorkflowRun).where(WorkflowRun.id == run_id)
+            return session.scalar(stmt)
+
+    def get_workflow_runs_count(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        status: str | None = None,
+        time_range: str | None = None,
+    ) -> dict[str, int]:
+        """
+        Get workflow runs count statistics grouped by status.
+        """
+        _initial_status_counts = {
+            "running": 0,
+            "succeeded": 0,
+            "failed": 0,
+            "stopped": 0,
+            "partial-succeeded": 0,
+        }
+
+        with self._session_maker() as session:
+            # Build base where conditions
+            base_conditions = [
+                WorkflowRun.tenant_id == tenant_id,
+                WorkflowRun.app_id == app_id,
+                WorkflowRun.triggered_from == triggered_from,
+            ]
+
+            # Add time range filter if provided
+            if time_range:
+                time_threshold = get_time_threshold(time_range)
+                if time_threshold:
+                    base_conditions.append(WorkflowRun.created_at >= time_threshold)
+
+            # If status filter is provided, return simple count
+            if status:
+                count_stmt = select(func.count(WorkflowRun.id)).where(*base_conditions, WorkflowRun.status == status)
+                total = session.scalar(count_stmt) or 0
+
+                result = {"total": total} | _initial_status_counts
+
+                # Set the count for the filtered status
+                if status in result:
+                    result[status] = total
+
+                return result
+
+            # No status filter - get counts grouped by status
+            base_stmt = (
+                select(WorkflowRun.status, func.count(WorkflowRun.id).label("count"))
+                .where(*base_conditions)
+                .group_by(WorkflowRun.status)
+            )
+
+            # Execute query
+            results = session.execute(base_stmt).all()
+
+            # Build response dictionary
+            status_counts = _initial_status_counts.copy()
+
+            total = 0
+            for status_val, count in results:
+                total += count
+                if status_val in status_counts:
+                    status_counts[status_val] = count
+
+            return {"total": total} | status_counts
+
+    def get_expired_runs_batch(
+        self,
+        tenant_id: str,
+        before_date: datetime,
+        batch_size: int = 1000,
+    ) -> Sequence[WorkflowRun]:
+        """
+        Get a batch of expired workflow runs for cleanup operations.
+        """
+        with self._session_maker() as session:
+            stmt = (
+                select(WorkflowRun)
+                .where(
+                    WorkflowRun.tenant_id == tenant_id,
+                    WorkflowRun.created_at < before_date,
+                )
+                .limit(batch_size)
+            )
+            return session.scalars(stmt).all()
+
+    def delete_runs_by_ids(
+        self,
+        run_ids: Sequence[str],
+    ) -> int:
+        """
+        Delete workflow runs by their IDs using bulk deletion.
+        """
+        if not run_ids:
+            return 0
+
+        with self._session_maker() as session:
+            stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
+            result = cast(CursorResult, session.execute(stmt))
+            session.commit()
+
+            deleted_count = result.rowcount
+            logger.info("Deleted %s workflow runs by IDs", deleted_count)
+            return deleted_count
+
+    def delete_runs_by_app(
+        self,
+        tenant_id: str,
+        app_id: str,
+        batch_size: int = 1000,
+    ) -> int:
+        """
+        Delete all workflow runs for a specific app in batches.
+        """
+        total_deleted = 0
+
+        while True:
+            with self._session_maker() as session:
+                # Get a batch of run IDs to delete
+                stmt = (
+                    select(WorkflowRun.id)
+                    .where(
+                        WorkflowRun.tenant_id == tenant_id,
+                        WorkflowRun.app_id == app_id,
+                    )
+                    .limit(batch_size)
+                )
+                run_ids = session.scalars(stmt).all()
+
+                if not run_ids:
+                    break
+
+                # Delete the batch
+                delete_stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
+                result = cast(CursorResult, session.execute(delete_stmt))
+                session.commit()
+
+                batch_deleted = result.rowcount
+                total_deleted += batch_deleted
+
+                logger.info("Deleted batch of %s workflow runs for app %s", batch_deleted, app_id)
+
+                # If we deleted fewer records than the batch size, we're done
+                if batch_deleted < batch_size:
+                    break
+
+        logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id)
+        return total_deleted
+
+    def create_workflow_pause(
+        self,
+        workflow_run_id: str,
+        state_owner_user_id: str,
+        state: str,
+    ) -> WorkflowPauseEntity:
+        """
+        Create a new workflow pause state.
+
+        Creates a pause state for a workflow run, storing the current execution
+        state and marking the workflow as paused. This is used when a workflow
+        needs to be suspended and later resumed.
+
+        Args:
+            workflow_run_id: Identifier of the workflow run to pause
+            state_owner_user_id: User ID who owns the pause state for file storage
+            state: Serialized workflow execution state (JSON string)
+
+        Returns:
+            RepositoryWorkflowPauseEntity representing the created pause state
+
+        Raises:
+            ValueError: If workflow_run_id is invalid or workflow run doesn't exist
+            RuntimeError: If workflow is already paused or in invalid state
+        """
+        previous_pause_model_query = select(WorkflowPauseModel).where(
+            WorkflowPauseModel.workflow_run_id == workflow_run_id
+        )
+        with self._session_maker() as session, session.begin():
+            # Get the workflow run
+            workflow_run = session.get(WorkflowRun, workflow_run_id)
+            if workflow_run is None:
+                raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
+
+            # Check if workflow is in RUNNING status
+            if workflow_run.status != WorkflowExecutionStatus.RUNNING:
+                raise _WorkflowRunError(
+                    f"Only WorkflowRun with RUNNING status can be paused, "
+                    f"workflow_run_id={workflow_run_id}, current_status={workflow_run.status}"
+                )
+            #
+            previous_pause = session.scalars(previous_pause_model_query).first()
+            if previous_pause:
+                self._delete_pause_model(session, previous_pause)
+                # we need to flush here to ensure that the old one is actually deleted.
+                session.flush()
+
+            state_obj_key = f"workflow-state-{uuid.uuid4()}.json"
+            storage.save(state_obj_key, state.encode())
+            # Upload the state file
+
+            # Create the pause record
+            pause_model = WorkflowPauseModel()
+            pause_model.id = str(uuidv7())
+            pause_model.workflow_id = workflow_run.workflow_id
+            pause_model.workflow_run_id = workflow_run.id
+            pause_model.state_object_key = state_obj_key
+            pause_model.created_at = naive_utc_now()
+
+            # Update workflow run status
+            workflow_run.status = WorkflowExecutionStatus.PAUSED
+
+            # Save everything in a transaction
+            session.add(pause_model)
+            session.add(workflow_run)
+
+            logger.info("Created workflow pause %s for workflow run %s", pause_model.id, workflow_run_id)
+
+            return _PrivateWorkflowPauseEntity.from_models(pause_model)
+
+    def get_workflow_pause(
+        self,
+        workflow_run_id: str,
+    ) -> WorkflowPauseEntity | None:
+        """
+        Get an existing workflow pause state.
+
+        Retrieves the pause state for a specific workflow run if it exists.
+        Used to check if a workflow is paused and to retrieve its saved state.
+
+        Args:
+            workflow_run_id: Identifier of the workflow run to get pause state for
+
+        Returns:
+            RepositoryWorkflowPauseEntity if pause state exists, None otherwise
+
+        Raises:
+            ValueError: If workflow_run_id is invalid
+        """
+        with self._session_maker() as session:
+            # Query workflow run with pause and state file
+            stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id)
+            workflow_run = session.scalar(stmt)
+
+            if workflow_run is None:
+                raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
+
+            pause_model = workflow_run.pause
+            if pause_model is None:
+                return None
+
+            return _PrivateWorkflowPauseEntity.from_models(pause_model)
+
+    def resume_workflow_pause(
+        self,
+        workflow_run_id: str,
+        pause_entity: WorkflowPauseEntity,
+    ) -> WorkflowPauseEntity:
+        """
+        Resume a paused workflow.
+
+        Marks a paused workflow as resumed, clearing the pause state and
+        returning the workflow to running status. Returns the pause entity
+        that was resumed.
+
+        Args:
+            workflow_run_id: Identifier of the workflow run to resume
+            pause_entity: The pause entity to resume
+
+        Returns:
+            RepositoryWorkflowPauseEntity representing the resumed pause state
+
+        Raises:
+            ValueError: If workflow_run_id is invalid
+            RuntimeError: If workflow is not paused or already resumed
+        """
+        with self._session_maker() as session, session.begin():
+            # Get the workflow run with pause
+            stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id)
+            workflow_run = session.scalar(stmt)
+
+            if workflow_run is None:
+                raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
+
+            if workflow_run.status != WorkflowExecutionStatus.PAUSED:
+                raise _WorkflowRunError(
+                    f"WorkflowRun is not in PAUSED status, workflow_run_id={workflow_run_id}, "
+                    f"current_status={workflow_run.status}"
+                )
+            pause_model = workflow_run.pause
+            if pause_model is None:
+                raise _WorkflowRunError(f"No pause state found for workflow run: {workflow_run_id}")
+
+            if pause_model.id != pause_entity.id:
+                raise _WorkflowRunError(
+                    "different id in WorkflowPause and WorkflowPauseEntity, "
+                    f"WorkflowPause.id={pause_model.id}, "
+                    f"WorkflowPauseEntity.id={pause_entity.id}"
+                )
+
+            if pause_model.resumed_at is not None:
+                raise _WorkflowRunError(f"Cannot resume an already resumed pause, pause_id={pause_model.id}")
+
+            # Mark as resumed
+            pause_model.resumed_at = naive_utc_now()
+            workflow_run.pause_id = None  # type: ignore
+            workflow_run.status = WorkflowExecutionStatus.RUNNING
+
+            session.add(pause_model)
+            session.add(workflow_run)
+
+            logger.info("Resumed workflow pause %s for workflow run %s", pause_model.id, workflow_run_id)
+
+            return _PrivateWorkflowPauseEntity.from_models(pause_model)
+
+    def delete_workflow_pause(
+        self,
+        pause_entity: WorkflowPauseEntity,
+    ) -> None:
+        """
+        Delete a workflow pause state.
+
+        Permanently removes the pause state for a workflow run, including
+        the stored state file. Used for cleanup operations when a paused
+        workflow is no longer needed.
+
+        Args:
+            pause_entity: The pause entity to delete
+
+        Raises:
+            ValueError: If pause_entity is invalid
+            _WorkflowRunError: If workflow is not paused
+
+        Note:
+            This operation is irreversible. The stored workflow state will be
+            permanently deleted along with the pause record.
+        """
+        with self._session_maker() as session, session.begin():
+            # Get the pause model by ID
+            pause_model = session.get(WorkflowPauseModel, pause_entity.id)
+            if pause_model is None:
+                raise _WorkflowRunError(f"WorkflowPause not found: {pause_entity.id}")
+            self._delete_pause_model(session, pause_model)
+
+    @staticmethod
+    def _delete_pause_model(session: Session, pause_model: WorkflowPauseModel):
+        storage.delete(pause_model.state_object_key)
+
+        # Delete the pause record
+        session.delete(pause_model)
+
+        logger.info("Deleted workflow pause %s for workflow run %s", pause_model.id, pause_model.workflow_run_id)
+
+    def prune_pauses(
+        self,
+        expiration: datetime,
+        resumption_expiration: datetime,
+        limit: int | None = None,
+    ) -> Sequence[str]:
+        """
+        Clean up expired and old pause states.
+
+        Removes pause states that have expired (created before expiration time)
+        and pause states that were resumed more than resumption_duration ago.
+        This is used for maintenance and cleanup operations.
+
+        Args:
+            expiration: Remove pause states created before this time
+            resumption_expiration: Remove pause states resumed before this time
+            limit: maximum number of records deleted in one call
+
+        Returns:
+            a list of ids for pause records that were pruned
+
+        Raises:
+            ValueError: If parameters are invalid
+        """
+        _limit: int = limit or 1000
+        pruned_record_ids: list[str] = []
+        cond = or_(
+            WorkflowPauseModel.created_at < expiration,
+            and_(
+                WorkflowPauseModel.resumed_at.is_not(null()),
+                WorkflowPauseModel.resumed_at < resumption_expiration,
+            ),
+        )
+        # First, collect pause records to delete with their state files
+        # Expired pauses (created before expiration time)
+        stmt = select(WorkflowPauseModel).where(cond).limit(_limit)
+
+        with self._session_maker(expire_on_commit=False) as session:
+            # Old resumed pauses (resumed more than resumption_duration ago)
+
+            # Get all records to delete
+            pauses_to_delete = session.scalars(stmt).all()
+
+        # Delete state files from storage
+        for pause in pauses_to_delete:
+            with self._session_maker(expire_on_commit=False) as session, session.begin():
+                # todo: this issues a separate query for each WorkflowPauseModel record.
+                # consider batching this lookup.
+                try:
+                    storage.delete(pause.state_object_key)
+                    logger.info(
+                        "Deleted state object for pause, pause_id=%s, object_key=%s",
+                        pause.id,
+                        pause.state_object_key,
+                    )
+                except Exception:
+                    logger.exception(
+                        "Failed to delete state file for pause, pause_id=%s, object_key=%s",
+                        pause.id,
+                        pause.state_object_key,
+                    )
+                    continue
+                session.delete(pause)
+                pruned_record_ids.append(pause.id)
+                logger.info(
+                    "workflow pause records deleted, id=%s, resumed_at=%s",
+                    pause.id,
+                    pause.resumed_at,
+                )
+
+        return pruned_record_ids
+
+    def get_daily_runs_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[DailyRunsStats]:
+        """
+        Get daily runs statistics using raw SQL for optimal performance.
+        """
+        converted_created_at = convert_datetime_to_date("created_at")
+        sql_query = f"""SELECT
+    {converted_created_at} AS date,
+    COUNT(id) AS runs
+FROM
+    workflow_runs
+WHERE
+    tenant_id = :tenant_id
+    AND app_id = :app_id
+    AND triggered_from = :triggered_from"""
+
+        arg_dict: dict[str, Any] = {
+            "tz": timezone,
+            "tenant_id": tenant_id,
+            "app_id": app_id,
+            "triggered_from": triggered_from,
+        }
+
+        if start_date:
+            sql_query += " AND created_at >= :start_date"
+            arg_dict["start_date"] = start_date
+
+        if end_date:
+            sql_query += " AND created_at < :end_date"
+            arg_dict["end_date"] = end_date
+
+        sql_query += " GROUP BY date ORDER BY date"
+
+        response_data = []
+        with self._session_maker() as session:
+            rs = session.execute(sa.text(sql_query), arg_dict)
+            for row in rs:
+                response_data.append({"date": str(row.date), "runs": row.runs})
+
+        return cast(list[DailyRunsStats], response_data)
+
+    def get_daily_terminals_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[DailyTerminalsStats]:
+        """
+        Get daily terminals statistics using raw SQL for optimal performance.
+        """
+        converted_created_at = convert_datetime_to_date("created_at")
+        sql_query = f"""SELECT
+    {converted_created_at} AS date,
+    COUNT(DISTINCT created_by) AS terminal_count
+FROM
+    workflow_runs
+WHERE
+    tenant_id = :tenant_id
+    AND app_id = :app_id
+    AND triggered_from = :triggered_from"""
+
+        arg_dict: dict[str, Any] = {
+            "tz": timezone,
+            "tenant_id": tenant_id,
+            "app_id": app_id,
+            "triggered_from": triggered_from,
+        }
+
+        if start_date:
+            sql_query += " AND created_at >= :start_date"
+            arg_dict["start_date"] = start_date
+
+        if end_date:
+            sql_query += " AND created_at < :end_date"
+            arg_dict["end_date"] = end_date
+
+        sql_query += " GROUP BY date ORDER BY date"
+
+        response_data = []
+        with self._session_maker() as session:
+            rs = session.execute(sa.text(sql_query), arg_dict)
+            for row in rs:
+                response_data.append({"date": str(row.date), "terminal_count": row.terminal_count})
+
+        return cast(list[DailyTerminalsStats], response_data)
+
+    def get_daily_token_cost_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[DailyTokenCostStats]:
+        """
+        Get daily token cost statistics using raw SQL for optimal performance.
+        """
+        converted_created_at = convert_datetime_to_date("created_at")
+        sql_query = f"""SELECT
+    {converted_created_at} AS date,
+    SUM(total_tokens) AS token_count
+FROM
+    workflow_runs
+WHERE
+    tenant_id = :tenant_id
+    AND app_id = :app_id
+    AND triggered_from = :triggered_from"""
+
+        arg_dict: dict[str, Any] = {
+            "tz": timezone,
+            "tenant_id": tenant_id,
+            "app_id": app_id,
+            "triggered_from": triggered_from,
+        }
+
+        if start_date:
+            sql_query += " AND created_at >= :start_date"
+            arg_dict["start_date"] = start_date
+
+        if end_date:
+            sql_query += " AND created_at < :end_date"
+            arg_dict["end_date"] = end_date
+
+        sql_query += " GROUP BY date ORDER BY date"
+
+        response_data = []
+        with self._session_maker() as session:
+            rs = session.execute(sa.text(sql_query), arg_dict)
+            for row in rs:
+                response_data.append(
+                    {
+                        "date": str(row.date),
+                        "token_count": row.token_count,
+                    }
+                )
+
+        return cast(list[DailyTokenCostStats], response_data)
+
+    def get_average_app_interaction_statistics(
+        self,
+        tenant_id: str,
+        app_id: str,
+        triggered_from: str,
+        start_date: datetime | None = None,
+        end_date: datetime | None = None,
+        timezone: str = "UTC",
+    ) -> list[AverageInteractionStats]:
+        """
+        Get average app interaction statistics using raw SQL for optimal performance.
+        """
+        converted_created_at = convert_datetime_to_date("c.created_at")
+        sql_query = f"""SELECT
+    AVG(sub.interactions) AS interactions,
+    sub.date
+FROM
+    (
+        SELECT
+            {converted_created_at} AS date,
+            c.created_by,
+            COUNT(c.id) AS interactions
+        FROM
+            workflow_runs c
+        WHERE
+            c.tenant_id = :tenant_id
+            AND c.app_id = :app_id
+            AND c.triggered_from = :triggered_from
+            {{{{start}}}}
+            {{{{end}}}}
+        GROUP BY
+            date, c.created_by
+    ) sub
+GROUP BY
+    sub.date"""
+
+        arg_dict: dict[str, Any] = {
+            "tz": timezone,
+            "tenant_id": tenant_id,
+            "app_id": app_id,
+            "triggered_from": triggered_from,
+        }
+
+        if start_date:
+            sql_query = sql_query.replace("{{start}}", " AND c.created_at >= :start_date")
+            arg_dict["start_date"] = start_date
+        else:
+            sql_query = sql_query.replace("{{start}}", "")
+
+        if end_date:
+            sql_query = sql_query.replace("{{end}}", " AND c.created_at < :end_date")
+            arg_dict["end_date"] = end_date
+        else:
+            sql_query = sql_query.replace("{{end}}", "")
+
+        response_data = []
+        with self._session_maker() as session:
+            rs = session.execute(sa.text(sql_query), arg_dict)
+            for row in rs:
+                response_data.append(
+                    {"date": str(row.date), "interactions": float(row.interactions.quantize(Decimal("0.01")))}
+                )
+
+        return cast(list[AverageInteractionStats], response_data)
+
+
+class _PrivateWorkflowPauseEntity(WorkflowPauseEntity):
+    """
+    Private implementation of WorkflowPauseEntity for SQLAlchemy repository.
+
+    This implementation is internal to the repository layer and provides
+    the concrete implementation of the WorkflowPauseEntity interface.
+    """
+
+    def __init__(
+        self,
+        *,
+        pause_model: WorkflowPauseModel,
+    ) -> None:
+        self._pause_model = pause_model
+        self._cached_state: bytes | None = None
+
+    @classmethod
+    def from_models(cls, workflow_pause_model) -> "_PrivateWorkflowPauseEntity":
+        """
+        Create a _PrivateWorkflowPauseEntity from database models.
+
+        Args:
+            workflow_pause_model: The WorkflowPause database model
+            upload_file_model: The UploadFile database model
+
+        Returns:
+            _PrivateWorkflowPauseEntity: The constructed entity
+
+        Raises:
+            ValueError: If required model attributes are missing
+        """
+        return cls(pause_model=workflow_pause_model)
+
+    @property
+    def id(self) -> str:
+        return self._pause_model.id
+
+    @property
+    def workflow_execution_id(self) -> str:
+        return self._pause_model.workflow_run_id
+
+    def get_state(self) -> bytes:
+        """
+        Retrieve the serialized workflow state from storage.
+
+        Returns:
+            Mapping[str, Any]: The workflow state as a dictionary
+
+        Raises:
+            FileNotFoundError: If the state file cannot be found
+            IOError: If there are issues reading the state file
+            _Workflow: If the state cannot be deserialized properly
+        """
+        if self._cached_state is not None:
+            return self._cached_state
+
+        # Load the state from storage
+        state_data = storage.load(self._pause_model.state_object_key)
+        self._cached_state = state_data
+        return state_data
+
+    @property
+    def resumed_at(self) -> datetime | None:
+        return self._pause_model.resumed_at
--- a/dify/api/repositories/sqlalchemy_workflow_trigger_log_repository.py
+++ b/dify/api/repositories/sqlalchemy_workflow_trigger_log_repository.py
@@ -0,0 +1,86 @@
+"""
+SQLAlchemy implementation of WorkflowTriggerLogRepository.
+"""
+
+from collections.abc import Sequence
+from datetime import UTC, datetime, timedelta
+
+from sqlalchemy import and_, select
+from sqlalchemy.orm import Session
+
+from models.enums import WorkflowTriggerStatus
+from models.trigger import WorkflowTriggerLog
+from repositories.workflow_trigger_log_repository import WorkflowTriggerLogRepository
+
+
+class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository):
+    """
+    SQLAlchemy implementation of WorkflowTriggerLogRepository.
+
+    Optimized for large table operations with proper indexing and batch processing.
+    """
+
+    def __init__(self, session: Session):
+        self.session = session
+
+    def create(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
+        """Create a new trigger log entry."""
+        self.session.add(trigger_log)
+        self.session.flush()
+        return trigger_log
+
+    def update(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
+        """Update an existing trigger log entry."""
+        self.session.merge(trigger_log)
+        self.session.flush()
+        return trigger_log
+
+    def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None:
+        """Get a trigger log by its ID."""
+        query = select(WorkflowTriggerLog).where(WorkflowTriggerLog.id == trigger_log_id)
+
+        if tenant_id:
+            query = query.where(WorkflowTriggerLog.tenant_id == tenant_id)
+
+        return self.session.scalar(query)
+
+    def get_failed_for_retry(
+        self, tenant_id: str, max_retry_count: int = 3, limit: int = 100
+    ) -> Sequence[WorkflowTriggerLog]:
+        """Get failed trigger logs eligible for retry."""
+        query = (
+            select(WorkflowTriggerLog)
+            .where(
+                and_(
+                    WorkflowTriggerLog.tenant_id == tenant_id,
+                    WorkflowTriggerLog.status.in_([WorkflowTriggerStatus.FAILED, WorkflowTriggerStatus.RATE_LIMITED]),
+                    WorkflowTriggerLog.retry_count < max_retry_count,
+                )
+            )
+            .order_by(WorkflowTriggerLog.created_at.asc())
+            .limit(limit)
+        )
+
+        return list(self.session.scalars(query).all())
+
+    def get_recent_logs(
+        self, tenant_id: str, app_id: str, hours: int = 24, limit: int = 100, offset: int = 0
+    ) -> Sequence[WorkflowTriggerLog]:
+        """Get recent trigger logs within specified hours."""
+        since = datetime.now(UTC) - timedelta(hours=hours)
+
+        query = (
+            select(WorkflowTriggerLog)
+            .where(
+                and_(
+                    WorkflowTriggerLog.tenant_id == tenant_id,
+                    WorkflowTriggerLog.app_id == app_id,
+                    WorkflowTriggerLog.created_at >= since,
+                )
+            )
+            .order_by(WorkflowTriggerLog.created_at.desc())
+            .limit(limit)
+            .offset(offset)
+        )
+
+        return list(self.session.scalars(query).all())
--- a/dify/api/repositories/types.py
+++ b/dify/api/repositories/types.py
@@ -0,0 +1,21 @@
+from typing import TypedDict
+
+
+class DailyRunsStats(TypedDict):
+    date: str
+    runs: int
+
+
+class DailyTerminalsStats(TypedDict):
+    date: str
+    terminal_count: int
+
+
+class DailyTokenCostStats(TypedDict):
+    date: str
+    token_count: int
+
+
+class AverageInteractionStats(TypedDict):
+    date: str
+    interactions: float
--- a/dify/api/repositories/workflow_trigger_log_repository.py
+++ b/dify/api/repositories/workflow_trigger_log_repository.py
@@ -0,0 +1,111 @@
+"""
+Repository protocol for WorkflowTriggerLog operations.
+
+This module provides a protocol interface for operations on WorkflowTriggerLog,
+designed to efficiently handle a potentially large volume of trigger logs with
+proper indexing and batch operations.
+"""
+
+from collections.abc import Sequence
+from enum import StrEnum
+from typing import Protocol
+
+from models.trigger import WorkflowTriggerLog
+
+
+class TriggerLogOrderBy(StrEnum):
+    """Fields available for ordering trigger logs"""
+
+    CREATED_AT = "created_at"
+    TRIGGERED_AT = "triggered_at"
+    FINISHED_AT = "finished_at"
+    STATUS = "status"
+
+
+class WorkflowTriggerLogRepository(Protocol):
+    """
+    Protocol for operations on WorkflowTriggerLog.
+
+    This repository provides efficient access patterns for the trigger log table,
+    which is expected to grow large over time. It includes:
+    - Batch operations for cleanup
+    - Efficient queries with proper indexing
+    - Pagination support
+    - Status-based filtering
+
+    Implementation notes:
+    - Leverage database indexes on (tenant_id, app_id), status, and created_at
+    - Use batch operations for deletions to avoid locking
+    - Support pagination for large result sets
+    """
+
+    def create(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
+        """
+        Create a new trigger log entry.
+
+        Args:
+            trigger_log: The WorkflowTriggerLog instance to create
+
+        Returns:
+            The created WorkflowTriggerLog with generated ID
+        """
+        ...
+
+    def update(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
+        """
+        Update an existing trigger log entry.
+
+        Args:
+            trigger_log: The WorkflowTriggerLog instance to update
+
+        Returns:
+            The updated WorkflowTriggerLog
+        """
+        ...
+
+    def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None:
+        """
+        Get a trigger log by its ID.
+
+        Args:
+            trigger_log_id: The trigger log identifier
+            tenant_id: Optional tenant identifier for additional security
+
+        Returns:
+            The WorkflowTriggerLog if found, None otherwise
+        """
+        ...
+
+    def get_failed_for_retry(
+        self, tenant_id: str, max_retry_count: int = 3, limit: int = 100
+    ) -> Sequence[WorkflowTriggerLog]:
+        """
+        Get failed trigger logs that are eligible for retry.
+
+        Args:
+            tenant_id: The tenant identifier
+            max_retry_count: Maximum retry count to consider
+            limit: Maximum number of results
+
+        Returns:
+            A sequence of WorkflowTriggerLog instances eligible for retry
+        """
+        ...
+
+    def get_recent_logs(
+        self, tenant_id: str, app_id: str, hours: int = 24, limit: int = 100, offset: int = 0
+    ) -> Sequence[WorkflowTriggerLog]:
+        """
+        Get recent trigger logs within specified hours.
+
+        Args:
+            tenant_id: The tenant identifier
+            app_id: The application identifier
+            hours: Number of hours to look back
+            limit: Maximum number of results
+            offset: Number of results to skip
+
+        Returns:
+            A sequence of recent WorkflowTriggerLog instances
+        """
+        ...