This commit is contained in:
2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions

View File

View File

@@ -0,0 +1,197 @@
"""
Service-layer repository protocol for WorkflowNodeExecutionModel operations.
This module provides a protocol interface for service-layer operations on WorkflowNodeExecutionModel
that abstracts database queries currently done directly in service classes. This repository is
specifically designed for service-layer needs and is separate from the core domain repository.
The service repository handles operations that require access to database-specific fields like
tenant_id, app_id, triggered_from, etc., which are not part of the core domain model.
"""
from collections.abc import Sequence
from datetime import datetime
from typing import Protocol
from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
from models.workflow import WorkflowNodeExecutionModel
class DifyAPIWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository, Protocol):
"""
Protocol for service-layer operations on WorkflowNodeExecutionModel.
This repository provides database access patterns specifically needed by service classes,
handling queries that involve database-specific fields and multi-tenancy concerns.
Key responsibilities:
- Manages database operations for workflow node executions
- Handles multi-tenant data isolation
- Provides batch processing capabilities
- Supports execution lifecycle management
Implementation notes:
- Returns database models directly (WorkflowNodeExecutionModel)
- Handles tenant/app filtering automatically
- Provides service-specific query patterns
- Focuses on database operations without domain logic
- Supports cleanup and maintenance operations
"""
def get_node_last_execution(
self,
tenant_id: str,
app_id: str,
workflow_id: str,
node_id: str,
) -> WorkflowNodeExecutionModel | None:
"""
Get the most recent execution for a specific node.
This method finds the latest execution of a specific node within a workflow,
ordered by creation time. Used primarily for debugging and inspection purposes.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
workflow_id: The workflow identifier
node_id: The node identifier
Returns:
The most recent WorkflowNodeExecutionModel for the node, or None if not found
"""
...
def get_executions_by_workflow_run(
self,
tenant_id: str,
app_id: str,
workflow_run_id: str,
) -> Sequence[WorkflowNodeExecutionModel]:
"""
Get all node executions for a specific workflow run.
This method retrieves all node executions that belong to a specific workflow run,
ordered by index in descending order for proper trace visualization.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
workflow_run_id: The workflow run identifier
Returns:
A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
"""
...
def get_execution_by_id(
self,
execution_id: str,
tenant_id: str | None = None,
) -> WorkflowNodeExecutionModel | None:
"""
Get a workflow node execution by its ID.
This method retrieves a specific execution by its unique identifier.
Tenant filtering is optional for cases where the execution ID is globally unique.
When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
set `tenant_id` to prevent horizontal privilege escalation.
Args:
execution_id: The execution identifier
tenant_id: Optional tenant identifier for additional filtering
Returns:
The WorkflowNodeExecutionModel if found, or None if not found
"""
...
def delete_expired_executions(
self,
tenant_id: str,
before_date: datetime,
batch_size: int = 1000,
) -> int:
"""
Delete workflow node executions that are older than the specified date.
This method is used for cleanup operations to remove expired executions
in batches to avoid overwhelming the database.
Args:
tenant_id: The tenant identifier
before_date: Delete executions created before this date
batch_size: Maximum number of executions to delete in one batch
Returns:
The number of executions deleted
"""
...
def delete_executions_by_app(
self,
tenant_id: str,
app_id: str,
batch_size: int = 1000,
) -> int:
"""
Delete all workflow node executions for a specific app.
This method is used when removing an app and all its related data.
Executions are deleted in batches to avoid overwhelming the database.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
batch_size: Maximum number of executions to delete in one batch
Returns:
The total number of executions deleted
"""
...
def get_expired_executions_batch(
self,
tenant_id: str,
before_date: datetime,
batch_size: int = 1000,
) -> Sequence[WorkflowNodeExecutionModel]:
"""
Get a batch of expired workflow node executions for backup purposes.
This method retrieves expired executions without deleting them,
allowing the caller to backup the data before deletion.
Args:
tenant_id: The tenant identifier
before_date: Get executions created before this date
batch_size: Maximum number of executions to retrieve
Returns:
A sequence of WorkflowNodeExecutionModel instances
"""
...
def delete_executions_by_ids(
self,
execution_ids: Sequence[str],
) -> int:
"""
Delete workflow node executions by their IDs.
This method deletes specific executions by their IDs,
typically used after backing up the data.
This method does not perform tenant isolation checks. The caller is responsible for ensuring proper
data isolation between tenants. When execution IDs come from untrusted sources (e.g., API requests),
additional tenant validation should be implemented to prevent unauthorized access.
Args:
execution_ids: List of execution IDs to delete
Returns:
The number of executions deleted
"""
...

View File

@@ -0,0 +1,479 @@
"""
API WorkflowRun Repository Protocol
This module defines the protocol for service-layer WorkflowRun operations.
The repository provides an abstraction layer for WorkflowRun database operations
used by service classes, separating service-layer concerns from core domain logic.
Key Features:
- Paginated workflow run queries with filtering
- Bulk deletion operations with OSS backup support
- Multi-tenant data isolation
- Expired record cleanup with data retention
- Service-layer specific query patterns
Usage:
This protocol should be used by service classes that need to perform
WorkflowRun database operations. It provides a clean interface that
hides implementation details and supports dependency injection.
Example:
```python
from repositories.dify_api_repository_factory import DifyAPIRepositoryFactory
session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
# Get paginated workflow runs
runs = repo.get_paginated_workflow_runs(
tenant_id="tenant-123",
app_id="app-456",
triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
limit=20
)
```
"""
from collections.abc import Sequence
from datetime import datetime
from typing import Protocol
from core.workflow.entities.workflow_pause import WorkflowPauseEntity
from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
from libs.infinite_scroll_pagination import InfiniteScrollPagination
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import WorkflowRun
from repositories.types import (
AverageInteractionStats,
DailyRunsStats,
DailyTerminalsStats,
DailyTokenCostStats,
)
class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
"""
Protocol for service-layer WorkflowRun repository operations.
This protocol defines the interface for WorkflowRun database operations
that are specific to service-layer needs, including pagination, filtering,
and bulk operations with data backup support.
"""
def get_paginated_workflow_runs(
self,
tenant_id: str,
app_id: str,
triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
limit: int = 20,
last_id: str | None = None,
status: str | None = None,
) -> InfiniteScrollPagination:
"""
Get paginated workflow runs with filtering.
Retrieves workflow runs for a specific app and trigger source with
cursor-based pagination support. Used primarily for debugging and
workflow run listing in the UI.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source(s) (e.g., "debugging", "app-run", or list of values)
limit: Maximum number of records to return (default: 20)
last_id: Cursor for pagination - ID of the last record from previous page
status: Optional filter by status (e.g., "running", "succeeded", "failed")
Returns:
InfiniteScrollPagination object containing:
- data: List of WorkflowRun objects
- limit: Applied limit
- has_more: Boolean indicating if more records exist
Raises:
ValueError: If last_id is provided but the corresponding record doesn't exist
"""
...
def get_workflow_run_by_id(
self,
tenant_id: str,
app_id: str,
run_id: str,
) -> WorkflowRun | None:
"""
Get a specific workflow run by ID.
Retrieves a single workflow run with tenant and app isolation.
Used for workflow run detail views and execution tracking.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
run_id: Workflow run identifier
Returns:
WorkflowRun object if found, None otherwise
"""
...
def get_workflow_run_by_id_without_tenant(
self,
run_id: str,
) -> WorkflowRun | None:
"""
Get a specific workflow run by ID without tenant/app context.
Retrieves a single workflow run using only the run ID, without
requiring tenant_id or app_id. This method is intended for internal
system operations like tracing and monitoring where the tenant context
is not available upfront.
Args:
run_id: Workflow run identifier
Returns:
WorkflowRun object if found, None otherwise
Note:
This method bypasses tenant isolation checks and should only be used
in trusted system contexts like ops trace collection. For user-facing
operations, use get_workflow_run_by_id() with proper tenant isolation.
"""
...
def get_workflow_runs_count(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
status: str | None = None,
time_range: str | None = None,
) -> dict[str, int]:
"""
Get workflow runs count statistics.
Retrieves total count and count by status for workflow runs
matching the specified filters.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source (e.g., "debugging", "app-run")
status: Optional filter by specific status
time_range: Optional time range filter (e.g., "7d", "4h", "30m", "30s")
Filters records based on created_at field
Returns:
Dictionary containing:
- total: Total count of all workflow runs (or filtered by status)
- running: Count of workflow runs with status "running"
- succeeded: Count of workflow runs with status "succeeded"
- failed: Count of workflow runs with status "failed"
- stopped: Count of workflow runs with status "stopped"
- partial_succeeded: Count of workflow runs with status "partial-succeeded"
Note: If a status is provided, 'total' will be the count for that status,
and the specific status count will also be set to this value, with all
other status counts being 0.
"""
...
def get_expired_runs_batch(
self,
tenant_id: str,
before_date: datetime,
batch_size: int = 1000,
) -> Sequence[WorkflowRun]:
"""
Get a batch of expired workflow runs for cleanup.
Retrieves workflow runs created before the specified date for
cleanup operations. Used by scheduled tasks to remove old data
while maintaining data retention policies.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
before_date: Only return runs created before this date
batch_size: Maximum number of records to return
Returns:
Sequence of WorkflowRun objects to be processed for cleanup
"""
...
def delete_runs_by_ids(
self,
run_ids: Sequence[str],
) -> int:
"""
Delete workflow runs by their IDs.
Performs bulk deletion of workflow runs by ID. This method should
be used after backing up the data to OSS storage for retention.
Args:
run_ids: Sequence of workflow run IDs to delete
Returns:
Number of records actually deleted
Note:
This method performs hard deletion. Ensure data is backed up
to OSS storage before calling this method for compliance with
data retention policies.
"""
...
def delete_runs_by_app(
self,
tenant_id: str,
app_id: str,
batch_size: int = 1000,
) -> int:
"""
Delete all workflow runs for a specific app.
Performs bulk deletion of all workflow runs associated with an app.
Used during app cleanup operations. Processes records in batches
to avoid memory issues and long-running transactions.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
batch_size: Number of records to process in each batch
Returns:
Total number of records deleted across all batches
Note:
This method performs hard deletion without backup. Use with caution
and ensure proper data retention policies are followed.
"""
...
def create_workflow_pause(
self,
workflow_run_id: str,
state_owner_user_id: str,
state: str,
) -> WorkflowPauseEntity:
"""
Create a new workflow pause state.
Creates a pause state for a workflow run, storing the current execution
state and marking the workflow as paused. This is used when a workflow
needs to be suspended and later resumed.
Args:
workflow_run_id: Identifier of the workflow run to pause
state_owner_user_id: User ID who owns the pause state for file storage
state: Serialized workflow execution state (JSON string)
Returns:
WorkflowPauseEntity representing the created pause state
Raises:
ValueError: If workflow_run_id is invalid or workflow run doesn't exist
RuntimeError: If workflow is already paused or in invalid state
"""
# NOTE: we may get rid of the `state_owner_user_id` in parameter list.
# However, removing it would require an extra for `Workflow` model
# while creating pause.
...
def resume_workflow_pause(
self,
workflow_run_id: str,
pause_entity: WorkflowPauseEntity,
) -> WorkflowPauseEntity:
"""
Resume a paused workflow.
Marks a paused workflow as resumed, set the `resumed_at` field of WorkflowPauseEntity
and returning the workflow to running status. Returns the pause entity
that was resumed.
The returned `WorkflowPauseEntity` model has `resumed_at` set.
NOTE: this method does not delete the correspond `WorkflowPauseEntity` record and associated states.
It's the callers responsibility to clear the correspond state with `delete_workflow_pause`.
Args:
workflow_run_id: Identifier of the workflow run to resume
pause_entity: The pause entity to resume
Returns:
WorkflowPauseEntity representing the resumed pause state
Raises:
ValueError: If workflow_run_id is invalid
RuntimeError: If workflow is not paused or already resumed
"""
...
def delete_workflow_pause(
self,
pause_entity: WorkflowPauseEntity,
) -> None:
"""
Delete a workflow pause state.
Permanently removes the pause state for a workflow run, including
the stored state file. Used for cleanup operations when a paused
workflow is no longer needed.
Args:
pause_entity: The pause entity to delete
Raises:
ValueError: If pause_entity is invalid
RuntimeError: If workflow is not paused
Note:
This operation is irreversible. The stored workflow state will be
permanently deleted along with the pause record.
"""
...
def prune_pauses(
self,
expiration: datetime,
resumption_expiration: datetime,
limit: int | None = None,
) -> Sequence[str]:
"""
Clean up expired and old pause states.
Removes pause states that have expired (created before expiration time)
and pause states that were resumed more than resumption_duration ago.
This is used for maintenance and cleanup operations.
Args:
expiration: Remove pause states created before this time
resumption_expiration: Remove pause states resumed before this time
limit: maximum number of records deleted in one call
Returns:
a list of ids for pause records that were pruned
Raises:
ValueError: If parameters are invalid
"""
...
def get_daily_runs_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyRunsStats]:
"""
Get daily runs statistics.
Retrieves daily workflow runs count grouped by date for a specific app
and trigger source. Used for workflow statistics dashboard.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source (e.g., "app-run")
start_date: Optional start date filter
end_date: Optional end date filter
timezone: Timezone for date grouping (default: "UTC")
Returns:
List of dictionaries containing date and runs count:
[{"date": "2024-01-01", "runs": 10}, ...]
"""
...
def get_daily_terminals_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyTerminalsStats]:
"""
Get daily terminals statistics.
Retrieves daily unique terminal count grouped by date for a specific app
and trigger source. Used for workflow statistics dashboard.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source (e.g., "app-run")
start_date: Optional start date filter
end_date: Optional end date filter
timezone: Timezone for date grouping (default: "UTC")
Returns:
List of dictionaries containing date and terminal count:
[{"date": "2024-01-01", "terminal_count": 5}, ...]
"""
...
def get_daily_token_cost_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyTokenCostStats]:
"""
Get daily token cost statistics.
Retrieves daily total token count grouped by date for a specific app
and trigger source. Used for workflow statistics dashboard.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source (e.g., "app-run")
start_date: Optional start date filter
end_date: Optional end date filter
timezone: Timezone for date grouping (default: "UTC")
Returns:
List of dictionaries containing date and token count:
[{"date": "2024-01-01", "token_count": 1000}, ...]
"""
...
def get_average_app_interaction_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[AverageInteractionStats]:
"""
Get average app interaction statistics.
Retrieves daily average interactions per user grouped by date for a specific app
and trigger source. Used for workflow statistics dashboard.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source (e.g., "app-run")
start_date: Optional start date filter
end_date: Optional end date filter
timezone: Timezone for date grouping (default: "UTC")
Returns:
List of dictionaries containing date and average interactions:
[{"date": "2024-01-01", "interactions": 2.5}, ...]
"""
...

View File

@@ -0,0 +1,82 @@
"""
DifyAPI Repository Factory for creating repository instances.
This factory is specifically designed for DifyAPI repositories that handle
service-layer operations with dependency injection patterns.
"""
from sqlalchemy.orm import Session, sessionmaker
from configs import dify_config
from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError
from libs.module_loading import import_string
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory):
"""
Factory for creating DifyAPI repository instances based on configuration.
This factory handles the creation of repositories that are specifically designed
for service-layer operations and use dependency injection with sessionmaker
for better testability and separation of concerns.
"""
@classmethod
def create_api_workflow_node_execution_repository(
cls, session_maker: sessionmaker[Session]
) -> DifyAPIWorkflowNodeExecutionRepository:
"""
Create a DifyAPIWorkflowNodeExecutionRepository instance based on configuration.
This repository is designed for service-layer operations and uses dependency injection
with a sessionmaker for better testability and separation of concerns. It provides
database access patterns specifically needed by service classes, handling queries
that involve database-specific fields and multi-tenancy concerns.
Args:
session_maker: SQLAlchemy sessionmaker to inject for database session management.
Returns:
Configured DifyAPIWorkflowNodeExecutionRepository instance
Raises:
RepositoryImportError: If the configured repository cannot be imported or instantiated
"""
class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY
try:
repository_class = import_string(class_path)
return repository_class(session_maker=session_maker)
except (ImportError, Exception) as e:
raise RepositoryImportError(
f"Failed to create DifyAPIWorkflowNodeExecutionRepository from '{class_path}': {e}"
) from e
@classmethod
def create_api_workflow_run_repository(cls, session_maker: sessionmaker[Session]) -> APIWorkflowRunRepository:
"""
Create an APIWorkflowRunRepository instance based on configuration.
This repository is designed for service-layer WorkflowRun operations and uses dependency
injection with a sessionmaker for better testability and separation of concerns. It provides
database access patterns specifically needed by service classes for workflow run management,
including pagination, filtering, and bulk operations.
Args:
session_maker: SQLAlchemy sessionmaker to inject for database session management.
Returns:
Configured APIWorkflowRunRepository instance
Raises:
RepositoryImportError: If the configured repository cannot be imported or instantiated
"""
class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY
try:
repository_class = import_string(class_path)
return repository_class(session_maker=session_maker)
except (ImportError, Exception) as e:
raise RepositoryImportError(f"Failed to create APIWorkflowRunRepository from '{class_path}': {e}") from e

View File

@@ -0,0 +1,292 @@
"""
SQLAlchemy implementation of WorkflowNodeExecutionServiceRepository.
This module provides a concrete implementation of the service repository protocol
using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
"""
from collections.abc import Sequence
from datetime import datetime
from typing import cast
from sqlalchemy import asc, delete, desc, select
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session, sessionmaker
from models.workflow import WorkflowNodeExecutionModel
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecutionRepository):
"""
SQLAlchemy implementation of DifyAPIWorkflowNodeExecutionRepository.
This repository provides service-layer database operations for WorkflowNodeExecutionModel
using SQLAlchemy 2.0 style queries. It implements the DifyAPIWorkflowNodeExecutionRepository
protocol with the following features:
- Multi-tenancy data isolation through tenant_id filtering
- Direct database model operations without domain conversion
- Batch processing for efficient large-scale operations
- Optimized query patterns for common access patterns
- Dependency injection for better testability and maintainability
- Session management and transaction handling with proper cleanup
- Maintenance operations for data lifecycle management
- Thread-safe database operations using session-per-request pattern
"""
def __init__(self, session_maker: sessionmaker[Session]):
"""
Initialize the repository with a sessionmaker.
Args:
session_maker: SQLAlchemy sessionmaker for creating database sessions
"""
self._session_maker = session_maker
def get_node_last_execution(
self,
tenant_id: str,
app_id: str,
workflow_id: str,
node_id: str,
) -> WorkflowNodeExecutionModel | None:
"""
Get the most recent execution for a specific node.
This method replicates the query pattern from WorkflowService.get_node_last_run()
using SQLAlchemy 2.0 style syntax.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
workflow_id: The workflow identifier
node_id: The node identifier
Returns:
The most recent WorkflowNodeExecutionModel for the node, or None if not found.
The returned WorkflowNodeExecutionModel will have `offload_data` preloaded.
"""
stmt = select(WorkflowNodeExecutionModel)
stmt = WorkflowNodeExecutionModel.preload_offload_data(stmt)
stmt = (
stmt.where(
WorkflowNodeExecutionModel.tenant_id == tenant_id,
WorkflowNodeExecutionModel.app_id == app_id,
WorkflowNodeExecutionModel.workflow_id == workflow_id,
WorkflowNodeExecutionModel.node_id == node_id,
)
.order_by(desc(WorkflowNodeExecutionModel.created_at))
.limit(1)
)
with self._session_maker() as session:
return session.scalar(stmt)
def get_executions_by_workflow_run(
self,
tenant_id: str,
app_id: str,
workflow_run_id: str,
) -> Sequence[WorkflowNodeExecutionModel]:
"""
Get all node executions for a specific workflow run.
This method replicates the query pattern from WorkflowRunService.get_workflow_run_node_executions()
using SQLAlchemy 2.0 style syntax.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
workflow_run_id: The workflow run identifier
Returns:
A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
"""
stmt = WorkflowNodeExecutionModel.preload_offload_data(select(WorkflowNodeExecutionModel))
stmt = stmt.where(
WorkflowNodeExecutionModel.tenant_id == tenant_id,
WorkflowNodeExecutionModel.app_id == app_id,
WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
).order_by(asc(WorkflowNodeExecutionModel.created_at))
with self._session_maker() as session:
return session.execute(stmt).scalars().all()
def get_execution_by_id(
self,
execution_id: str,
tenant_id: str | None = None,
) -> WorkflowNodeExecutionModel | None:
"""
Get a workflow node execution by its ID.
This method replicates the query pattern from WorkflowDraftVariableService
and WorkflowService.single_step_run_workflow_node() using SQLAlchemy 2.0 style syntax.
When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
set `tenant_id` to prevent horizontal privilege escalation.
Args:
execution_id: The execution identifier
tenant_id: Optional tenant identifier for additional filtering
Returns:
The WorkflowNodeExecutionModel if found, or None if not found
"""
stmt = WorkflowNodeExecutionModel.preload_offload_data(select(WorkflowNodeExecutionModel))
stmt = stmt.where(WorkflowNodeExecutionModel.id == execution_id)
# Add tenant filtering if provided
if tenant_id is not None:
stmt = stmt.where(WorkflowNodeExecutionModel.tenant_id == tenant_id)
with self._session_maker() as session:
return session.scalar(stmt)
def delete_expired_executions(
self,
tenant_id: str,
before_date: datetime,
batch_size: int = 1000,
) -> int:
"""
Delete workflow node executions that are older than the specified date.
Args:
tenant_id: The tenant identifier
before_date: Delete executions created before this date
batch_size: Maximum number of executions to delete in one batch
Returns:
The number of executions deleted
"""
total_deleted = 0
while True:
with self._session_maker() as session:
# Find executions to delete in batches
stmt = (
select(WorkflowNodeExecutionModel.id)
.where(
WorkflowNodeExecutionModel.tenant_id == tenant_id,
WorkflowNodeExecutionModel.created_at < before_date,
)
.limit(batch_size)
)
execution_ids = session.execute(stmt).scalars().all()
if not execution_ids:
break
# Delete the batch
delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
result = cast(CursorResult, session.execute(delete_stmt))
session.commit()
total_deleted += result.rowcount
# If we deleted fewer than the batch size, we're done
if len(execution_ids) < batch_size:
break
return total_deleted
def delete_executions_by_app(
self,
tenant_id: str,
app_id: str,
batch_size: int = 1000,
) -> int:
"""
Delete all workflow node executions for a specific app.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
batch_size: Maximum number of executions to delete in one batch
Returns:
The total number of executions deleted
"""
total_deleted = 0
while True:
with self._session_maker() as session:
# Find executions to delete in batches
stmt = (
select(WorkflowNodeExecutionModel.id)
.where(
WorkflowNodeExecutionModel.tenant_id == tenant_id,
WorkflowNodeExecutionModel.app_id == app_id,
)
.limit(batch_size)
)
execution_ids = session.execute(stmt).scalars().all()
if not execution_ids:
break
# Delete the batch
delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
result = cast(CursorResult, session.execute(delete_stmt))
session.commit()
total_deleted += result.rowcount
# If we deleted fewer than the batch size, we're done
if len(execution_ids) < batch_size:
break
return total_deleted
def get_expired_executions_batch(
self,
tenant_id: str,
before_date: datetime,
batch_size: int = 1000,
) -> Sequence[WorkflowNodeExecutionModel]:
"""
Get a batch of expired workflow node executions for backup purposes.
Args:
tenant_id: The tenant identifier
before_date: Get executions created before this date
batch_size: Maximum number of executions to retrieve
Returns:
A sequence of WorkflowNodeExecutionModel instances
"""
stmt = (
select(WorkflowNodeExecutionModel)
.where(
WorkflowNodeExecutionModel.tenant_id == tenant_id,
WorkflowNodeExecutionModel.created_at < before_date,
)
.limit(batch_size)
)
with self._session_maker() as session:
return session.execute(stmt).scalars().all()
def delete_executions_by_ids(
self,
execution_ids: Sequence[str],
) -> int:
"""
Delete workflow node executions by their IDs.
Args:
execution_ids: List of execution IDs to delete
Returns:
The number of executions deleted
"""
if not execution_ids:
return 0
with self._session_maker() as session:
stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
result = cast(CursorResult, session.execute(stmt))
session.commit()
return result.rowcount

View File

@@ -0,0 +1,869 @@
"""
SQLAlchemy API WorkflowRun Repository Implementation
This module provides the SQLAlchemy-based implementation of the APIWorkflowRunRepository
protocol. It handles service-layer WorkflowRun database operations using SQLAlchemy 2.0
style queries with proper session management and multi-tenant data isolation.
Key Features:
- SQLAlchemy 2.0 style queries for modern database operations
- Cursor-based pagination for efficient large dataset handling
- Bulk operations with batch processing for performance
- Multi-tenant data isolation and security
- Proper session management with dependency injection
Implementation Notes:
- Uses sessionmaker for consistent session management
- Implements cursor-based pagination using created_at timestamps
- Provides efficient bulk deletion with batch processing
- Maintains data consistency with proper transaction handling
"""
import logging
import uuid
from collections.abc import Sequence
from datetime import datetime
from decimal import Decimal
from typing import Any, cast
import sqlalchemy as sa
from sqlalchemy import and_, delete, func, null, or_, select
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session, selectinload, sessionmaker
from core.workflow.entities.workflow_pause import WorkflowPauseEntity
from core.workflow.enums import WorkflowExecutionStatus
from extensions.ext_storage import storage
from libs.datetime_utils import naive_utc_now
from libs.helper import convert_datetime_to_date
from libs.infinite_scroll_pagination import InfiniteScrollPagination
from libs.time_parser import get_time_threshold
from libs.uuid_utils import uuidv7
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import WorkflowPause as WorkflowPauseModel
from models.workflow import WorkflowRun
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
from repositories.types import (
AverageInteractionStats,
DailyRunsStats,
DailyTerminalsStats,
DailyTokenCostStats,
)
logger = logging.getLogger(__name__)
class _WorkflowRunError(Exception):
pass
class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
"""
SQLAlchemy implementation of APIWorkflowRunRepository.
Provides service-layer WorkflowRun database operations using SQLAlchemy 2.0
style queries. Supports dependency injection through sessionmaker and
maintains proper multi-tenant data isolation.
Args:
session_maker: SQLAlchemy sessionmaker instance for database connections
"""
def __init__(self, session_maker: sessionmaker[Session]):
"""
Initialize the repository with a sessionmaker.
Args:
session_maker: SQLAlchemy sessionmaker for database connections
"""
self._session_maker = session_maker
def get_paginated_workflow_runs(
self,
tenant_id: str,
app_id: str,
triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
limit: int = 20,
last_id: str | None = None,
status: str | None = None,
) -> InfiniteScrollPagination:
"""
Get paginated workflow runs with filtering.
Implements cursor-based pagination using created_at timestamps for
efficient handling of large datasets. Filters by tenant, app, and
trigger source for proper data isolation.
"""
with self._session_maker() as session:
# Build base query with filters
base_stmt = select(WorkflowRun).where(
WorkflowRun.tenant_id == tenant_id,
WorkflowRun.app_id == app_id,
)
# Handle triggered_from values
if isinstance(triggered_from, WorkflowRunTriggeredFrom):
triggered_from = [triggered_from]
if triggered_from:
base_stmt = base_stmt.where(WorkflowRun.triggered_from.in_(triggered_from))
# Add optional status filter
if status:
base_stmt = base_stmt.where(WorkflowRun.status == status)
if last_id:
# Get the last workflow run for cursor-based pagination
last_run_stmt = base_stmt.where(WorkflowRun.id == last_id)
last_workflow_run = session.scalar(last_run_stmt)
if not last_workflow_run:
raise ValueError("Last workflow run not exists")
# Get records created before the last run's timestamp
base_stmt = base_stmt.where(
WorkflowRun.created_at < last_workflow_run.created_at,
WorkflowRun.id != last_workflow_run.id,
)
# First page - get most recent records
workflow_runs = session.scalars(base_stmt.order_by(WorkflowRun.created_at.desc()).limit(limit + 1)).all()
# Check if there are more records for pagination
has_more = len(workflow_runs) > limit
if has_more:
workflow_runs = workflow_runs[:-1]
return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
def get_workflow_run_by_id(
self,
tenant_id: str,
app_id: str,
run_id: str,
) -> WorkflowRun | None:
"""
Get a specific workflow run by ID with tenant and app isolation.
"""
with self._session_maker() as session:
stmt = select(WorkflowRun).where(
WorkflowRun.tenant_id == tenant_id,
WorkflowRun.app_id == app_id,
WorkflowRun.id == run_id,
)
return session.scalar(stmt)
def get_workflow_run_by_id_without_tenant(
self,
run_id: str,
) -> WorkflowRun | None:
"""
Get a specific workflow run by ID without tenant/app context.
"""
with self._session_maker() as session:
stmt = select(WorkflowRun).where(WorkflowRun.id == run_id)
return session.scalar(stmt)
def get_workflow_runs_count(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
status: str | None = None,
time_range: str | None = None,
) -> dict[str, int]:
"""
Get workflow runs count statistics grouped by status.
"""
_initial_status_counts = {
"running": 0,
"succeeded": 0,
"failed": 0,
"stopped": 0,
"partial-succeeded": 0,
}
with self._session_maker() as session:
# Build base where conditions
base_conditions = [
WorkflowRun.tenant_id == tenant_id,
WorkflowRun.app_id == app_id,
WorkflowRun.triggered_from == triggered_from,
]
# Add time range filter if provided
if time_range:
time_threshold = get_time_threshold(time_range)
if time_threshold:
base_conditions.append(WorkflowRun.created_at >= time_threshold)
# If status filter is provided, return simple count
if status:
count_stmt = select(func.count(WorkflowRun.id)).where(*base_conditions, WorkflowRun.status == status)
total = session.scalar(count_stmt) or 0
result = {"total": total} | _initial_status_counts
# Set the count for the filtered status
if status in result:
result[status] = total
return result
# No status filter - get counts grouped by status
base_stmt = (
select(WorkflowRun.status, func.count(WorkflowRun.id).label("count"))
.where(*base_conditions)
.group_by(WorkflowRun.status)
)
# Execute query
results = session.execute(base_stmt).all()
# Build response dictionary
status_counts = _initial_status_counts.copy()
total = 0
for status_val, count in results:
total += count
if status_val in status_counts:
status_counts[status_val] = count
return {"total": total} | status_counts
def get_expired_runs_batch(
self,
tenant_id: str,
before_date: datetime,
batch_size: int = 1000,
) -> Sequence[WorkflowRun]:
"""
Get a batch of expired workflow runs for cleanup operations.
"""
with self._session_maker() as session:
stmt = (
select(WorkflowRun)
.where(
WorkflowRun.tenant_id == tenant_id,
WorkflowRun.created_at < before_date,
)
.limit(batch_size)
)
return session.scalars(stmt).all()
def delete_runs_by_ids(
self,
run_ids: Sequence[str],
) -> int:
"""
Delete workflow runs by their IDs using bulk deletion.
"""
if not run_ids:
return 0
with self._session_maker() as session:
stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
result = cast(CursorResult, session.execute(stmt))
session.commit()
deleted_count = result.rowcount
logger.info("Deleted %s workflow runs by IDs", deleted_count)
return deleted_count
def delete_runs_by_app(
self,
tenant_id: str,
app_id: str,
batch_size: int = 1000,
) -> int:
"""
Delete all workflow runs for a specific app in batches.
"""
total_deleted = 0
while True:
with self._session_maker() as session:
# Get a batch of run IDs to delete
stmt = (
select(WorkflowRun.id)
.where(
WorkflowRun.tenant_id == tenant_id,
WorkflowRun.app_id == app_id,
)
.limit(batch_size)
)
run_ids = session.scalars(stmt).all()
if not run_ids:
break
# Delete the batch
delete_stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
result = cast(CursorResult, session.execute(delete_stmt))
session.commit()
batch_deleted = result.rowcount
total_deleted += batch_deleted
logger.info("Deleted batch of %s workflow runs for app %s", batch_deleted, app_id)
# If we deleted fewer records than the batch size, we're done
if batch_deleted < batch_size:
break
logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id)
return total_deleted
def create_workflow_pause(
self,
workflow_run_id: str,
state_owner_user_id: str,
state: str,
) -> WorkflowPauseEntity:
"""
Create a new workflow pause state.
Creates a pause state for a workflow run, storing the current execution
state and marking the workflow as paused. This is used when a workflow
needs to be suspended and later resumed.
Args:
workflow_run_id: Identifier of the workflow run to pause
state_owner_user_id: User ID who owns the pause state for file storage
state: Serialized workflow execution state (JSON string)
Returns:
RepositoryWorkflowPauseEntity representing the created pause state
Raises:
ValueError: If workflow_run_id is invalid or workflow run doesn't exist
RuntimeError: If workflow is already paused or in invalid state
"""
previous_pause_model_query = select(WorkflowPauseModel).where(
WorkflowPauseModel.workflow_run_id == workflow_run_id
)
with self._session_maker() as session, session.begin():
# Get the workflow run
workflow_run = session.get(WorkflowRun, workflow_run_id)
if workflow_run is None:
raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
# Check if workflow is in RUNNING status
if workflow_run.status != WorkflowExecutionStatus.RUNNING:
raise _WorkflowRunError(
f"Only WorkflowRun with RUNNING status can be paused, "
f"workflow_run_id={workflow_run_id}, current_status={workflow_run.status}"
)
#
previous_pause = session.scalars(previous_pause_model_query).first()
if previous_pause:
self._delete_pause_model(session, previous_pause)
# we need to flush here to ensure that the old one is actually deleted.
session.flush()
state_obj_key = f"workflow-state-{uuid.uuid4()}.json"
storage.save(state_obj_key, state.encode())
# Upload the state file
# Create the pause record
pause_model = WorkflowPauseModel()
pause_model.id = str(uuidv7())
pause_model.workflow_id = workflow_run.workflow_id
pause_model.workflow_run_id = workflow_run.id
pause_model.state_object_key = state_obj_key
pause_model.created_at = naive_utc_now()
# Update workflow run status
workflow_run.status = WorkflowExecutionStatus.PAUSED
# Save everything in a transaction
session.add(pause_model)
session.add(workflow_run)
logger.info("Created workflow pause %s for workflow run %s", pause_model.id, workflow_run_id)
return _PrivateWorkflowPauseEntity.from_models(pause_model)
def get_workflow_pause(
self,
workflow_run_id: str,
) -> WorkflowPauseEntity | None:
"""
Get an existing workflow pause state.
Retrieves the pause state for a specific workflow run if it exists.
Used to check if a workflow is paused and to retrieve its saved state.
Args:
workflow_run_id: Identifier of the workflow run to get pause state for
Returns:
RepositoryWorkflowPauseEntity if pause state exists, None otherwise
Raises:
ValueError: If workflow_run_id is invalid
"""
with self._session_maker() as session:
# Query workflow run with pause and state file
stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id)
workflow_run = session.scalar(stmt)
if workflow_run is None:
raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
pause_model = workflow_run.pause
if pause_model is None:
return None
return _PrivateWorkflowPauseEntity.from_models(pause_model)
def resume_workflow_pause(
self,
workflow_run_id: str,
pause_entity: WorkflowPauseEntity,
) -> WorkflowPauseEntity:
"""
Resume a paused workflow.
Marks a paused workflow as resumed, clearing the pause state and
returning the workflow to running status. Returns the pause entity
that was resumed.
Args:
workflow_run_id: Identifier of the workflow run to resume
pause_entity: The pause entity to resume
Returns:
RepositoryWorkflowPauseEntity representing the resumed pause state
Raises:
ValueError: If workflow_run_id is invalid
RuntimeError: If workflow is not paused or already resumed
"""
with self._session_maker() as session, session.begin():
# Get the workflow run with pause
stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id)
workflow_run = session.scalar(stmt)
if workflow_run is None:
raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
if workflow_run.status != WorkflowExecutionStatus.PAUSED:
raise _WorkflowRunError(
f"WorkflowRun is not in PAUSED status, workflow_run_id={workflow_run_id}, "
f"current_status={workflow_run.status}"
)
pause_model = workflow_run.pause
if pause_model is None:
raise _WorkflowRunError(f"No pause state found for workflow run: {workflow_run_id}")
if pause_model.id != pause_entity.id:
raise _WorkflowRunError(
"different id in WorkflowPause and WorkflowPauseEntity, "
f"WorkflowPause.id={pause_model.id}, "
f"WorkflowPauseEntity.id={pause_entity.id}"
)
if pause_model.resumed_at is not None:
raise _WorkflowRunError(f"Cannot resume an already resumed pause, pause_id={pause_model.id}")
# Mark as resumed
pause_model.resumed_at = naive_utc_now()
workflow_run.pause_id = None # type: ignore
workflow_run.status = WorkflowExecutionStatus.RUNNING
session.add(pause_model)
session.add(workflow_run)
logger.info("Resumed workflow pause %s for workflow run %s", pause_model.id, workflow_run_id)
return _PrivateWorkflowPauseEntity.from_models(pause_model)
def delete_workflow_pause(
self,
pause_entity: WorkflowPauseEntity,
) -> None:
"""
Delete a workflow pause state.
Permanently removes the pause state for a workflow run, including
the stored state file. Used for cleanup operations when a paused
workflow is no longer needed.
Args:
pause_entity: The pause entity to delete
Raises:
ValueError: If pause_entity is invalid
_WorkflowRunError: If workflow is not paused
Note:
This operation is irreversible. The stored workflow state will be
permanently deleted along with the pause record.
"""
with self._session_maker() as session, session.begin():
# Get the pause model by ID
pause_model = session.get(WorkflowPauseModel, pause_entity.id)
if pause_model is None:
raise _WorkflowRunError(f"WorkflowPause not found: {pause_entity.id}")
self._delete_pause_model(session, pause_model)
@staticmethod
def _delete_pause_model(session: Session, pause_model: WorkflowPauseModel):
storage.delete(pause_model.state_object_key)
# Delete the pause record
session.delete(pause_model)
logger.info("Deleted workflow pause %s for workflow run %s", pause_model.id, pause_model.workflow_run_id)
def prune_pauses(
self,
expiration: datetime,
resumption_expiration: datetime,
limit: int | None = None,
) -> Sequence[str]:
"""
Clean up expired and old pause states.
Removes pause states that have expired (created before expiration time)
and pause states that were resumed more than resumption_duration ago.
This is used for maintenance and cleanup operations.
Args:
expiration: Remove pause states created before this time
resumption_expiration: Remove pause states resumed before this time
limit: maximum number of records deleted in one call
Returns:
a list of ids for pause records that were pruned
Raises:
ValueError: If parameters are invalid
"""
_limit: int = limit or 1000
pruned_record_ids: list[str] = []
cond = or_(
WorkflowPauseModel.created_at < expiration,
and_(
WorkflowPauseModel.resumed_at.is_not(null()),
WorkflowPauseModel.resumed_at < resumption_expiration,
),
)
# First, collect pause records to delete with their state files
# Expired pauses (created before expiration time)
stmt = select(WorkflowPauseModel).where(cond).limit(_limit)
with self._session_maker(expire_on_commit=False) as session:
# Old resumed pauses (resumed more than resumption_duration ago)
# Get all records to delete
pauses_to_delete = session.scalars(stmt).all()
# Delete state files from storage
for pause in pauses_to_delete:
with self._session_maker(expire_on_commit=False) as session, session.begin():
# todo: this issues a separate query for each WorkflowPauseModel record.
# consider batching this lookup.
try:
storage.delete(pause.state_object_key)
logger.info(
"Deleted state object for pause, pause_id=%s, object_key=%s",
pause.id,
pause.state_object_key,
)
except Exception:
logger.exception(
"Failed to delete state file for pause, pause_id=%s, object_key=%s",
pause.id,
pause.state_object_key,
)
continue
session.delete(pause)
pruned_record_ids.append(pause.id)
logger.info(
"workflow pause records deleted, id=%s, resumed_at=%s",
pause.id,
pause.resumed_at,
)
return pruned_record_ids
def get_daily_runs_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyRunsStats]:
"""
Get daily runs statistics using raw SQL for optimal performance.
"""
converted_created_at = convert_datetime_to_date("created_at")
sql_query = f"""SELECT
{converted_created_at} AS date,
COUNT(id) AS runs
FROM
workflow_runs
WHERE
tenant_id = :tenant_id
AND app_id = :app_id
AND triggered_from = :triggered_from"""
arg_dict: dict[str, Any] = {
"tz": timezone,
"tenant_id": tenant_id,
"app_id": app_id,
"triggered_from": triggered_from,
}
if start_date:
sql_query += " AND created_at >= :start_date"
arg_dict["start_date"] = start_date
if end_date:
sql_query += " AND created_at < :end_date"
arg_dict["end_date"] = end_date
sql_query += " GROUP BY date ORDER BY date"
response_data = []
with self._session_maker() as session:
rs = session.execute(sa.text(sql_query), arg_dict)
for row in rs:
response_data.append({"date": str(row.date), "runs": row.runs})
return cast(list[DailyRunsStats], response_data)
def get_daily_terminals_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyTerminalsStats]:
"""
Get daily terminals statistics using raw SQL for optimal performance.
"""
converted_created_at = convert_datetime_to_date("created_at")
sql_query = f"""SELECT
{converted_created_at} AS date,
COUNT(DISTINCT created_by) AS terminal_count
FROM
workflow_runs
WHERE
tenant_id = :tenant_id
AND app_id = :app_id
AND triggered_from = :triggered_from"""
arg_dict: dict[str, Any] = {
"tz": timezone,
"tenant_id": tenant_id,
"app_id": app_id,
"triggered_from": triggered_from,
}
if start_date:
sql_query += " AND created_at >= :start_date"
arg_dict["start_date"] = start_date
if end_date:
sql_query += " AND created_at < :end_date"
arg_dict["end_date"] = end_date
sql_query += " GROUP BY date ORDER BY date"
response_data = []
with self._session_maker() as session:
rs = session.execute(sa.text(sql_query), arg_dict)
for row in rs:
response_data.append({"date": str(row.date), "terminal_count": row.terminal_count})
return cast(list[DailyTerminalsStats], response_data)
def get_daily_token_cost_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyTokenCostStats]:
"""
Get daily token cost statistics using raw SQL for optimal performance.
"""
converted_created_at = convert_datetime_to_date("created_at")
sql_query = f"""SELECT
{converted_created_at} AS date,
SUM(total_tokens) AS token_count
FROM
workflow_runs
WHERE
tenant_id = :tenant_id
AND app_id = :app_id
AND triggered_from = :triggered_from"""
arg_dict: dict[str, Any] = {
"tz": timezone,
"tenant_id": tenant_id,
"app_id": app_id,
"triggered_from": triggered_from,
}
if start_date:
sql_query += " AND created_at >= :start_date"
arg_dict["start_date"] = start_date
if end_date:
sql_query += " AND created_at < :end_date"
arg_dict["end_date"] = end_date
sql_query += " GROUP BY date ORDER BY date"
response_data = []
with self._session_maker() as session:
rs = session.execute(sa.text(sql_query), arg_dict)
for row in rs:
response_data.append(
{
"date": str(row.date),
"token_count": row.token_count,
}
)
return cast(list[DailyTokenCostStats], response_data)
def get_average_app_interaction_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[AverageInteractionStats]:
"""
Get average app interaction statistics using raw SQL for optimal performance.
"""
converted_created_at = convert_datetime_to_date("c.created_at")
sql_query = f"""SELECT
AVG(sub.interactions) AS interactions,
sub.date
FROM
(
SELECT
{converted_created_at} AS date,
c.created_by,
COUNT(c.id) AS interactions
FROM
workflow_runs c
WHERE
c.tenant_id = :tenant_id
AND c.app_id = :app_id
AND c.triggered_from = :triggered_from
{{{{start}}}}
{{{{end}}}}
GROUP BY
date, c.created_by
) sub
GROUP BY
sub.date"""
arg_dict: dict[str, Any] = {
"tz": timezone,
"tenant_id": tenant_id,
"app_id": app_id,
"triggered_from": triggered_from,
}
if start_date:
sql_query = sql_query.replace("{{start}}", " AND c.created_at >= :start_date")
arg_dict["start_date"] = start_date
else:
sql_query = sql_query.replace("{{start}}", "")
if end_date:
sql_query = sql_query.replace("{{end}}", " AND c.created_at < :end_date")
arg_dict["end_date"] = end_date
else:
sql_query = sql_query.replace("{{end}}", "")
response_data = []
with self._session_maker() as session:
rs = session.execute(sa.text(sql_query), arg_dict)
for row in rs:
response_data.append(
{"date": str(row.date), "interactions": float(row.interactions.quantize(Decimal("0.01")))}
)
return cast(list[AverageInteractionStats], response_data)
class _PrivateWorkflowPauseEntity(WorkflowPauseEntity):
"""
Private implementation of WorkflowPauseEntity for SQLAlchemy repository.
This implementation is internal to the repository layer and provides
the concrete implementation of the WorkflowPauseEntity interface.
"""
def __init__(
self,
*,
pause_model: WorkflowPauseModel,
) -> None:
self._pause_model = pause_model
self._cached_state: bytes | None = None
@classmethod
def from_models(cls, workflow_pause_model) -> "_PrivateWorkflowPauseEntity":
"""
Create a _PrivateWorkflowPauseEntity from database models.
Args:
workflow_pause_model: The WorkflowPause database model
upload_file_model: The UploadFile database model
Returns:
_PrivateWorkflowPauseEntity: The constructed entity
Raises:
ValueError: If required model attributes are missing
"""
return cls(pause_model=workflow_pause_model)
@property
def id(self) -> str:
return self._pause_model.id
@property
def workflow_execution_id(self) -> str:
return self._pause_model.workflow_run_id
def get_state(self) -> bytes:
"""
Retrieve the serialized workflow state from storage.
Returns:
Mapping[str, Any]: The workflow state as a dictionary
Raises:
FileNotFoundError: If the state file cannot be found
IOError: If there are issues reading the state file
_Workflow: If the state cannot be deserialized properly
"""
if self._cached_state is not None:
return self._cached_state
# Load the state from storage
state_data = storage.load(self._pause_model.state_object_key)
self._cached_state = state_data
return state_data
@property
def resumed_at(self) -> datetime | None:
return self._pause_model.resumed_at

View File

@@ -0,0 +1,86 @@
"""
SQLAlchemy implementation of WorkflowTriggerLogRepository.
"""
from collections.abc import Sequence
from datetime import UTC, datetime, timedelta
from sqlalchemy import and_, select
from sqlalchemy.orm import Session
from models.enums import WorkflowTriggerStatus
from models.trigger import WorkflowTriggerLog
from repositories.workflow_trigger_log_repository import WorkflowTriggerLogRepository
class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository):
"""
SQLAlchemy implementation of WorkflowTriggerLogRepository.
Optimized for large table operations with proper indexing and batch processing.
"""
def __init__(self, session: Session):
self.session = session
def create(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
"""Create a new trigger log entry."""
self.session.add(trigger_log)
self.session.flush()
return trigger_log
def update(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
"""Update an existing trigger log entry."""
self.session.merge(trigger_log)
self.session.flush()
return trigger_log
def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None:
"""Get a trigger log by its ID."""
query = select(WorkflowTriggerLog).where(WorkflowTriggerLog.id == trigger_log_id)
if tenant_id:
query = query.where(WorkflowTriggerLog.tenant_id == tenant_id)
return self.session.scalar(query)
def get_failed_for_retry(
self, tenant_id: str, max_retry_count: int = 3, limit: int = 100
) -> Sequence[WorkflowTriggerLog]:
"""Get failed trigger logs eligible for retry."""
query = (
select(WorkflowTriggerLog)
.where(
and_(
WorkflowTriggerLog.tenant_id == tenant_id,
WorkflowTriggerLog.status.in_([WorkflowTriggerStatus.FAILED, WorkflowTriggerStatus.RATE_LIMITED]),
WorkflowTriggerLog.retry_count < max_retry_count,
)
)
.order_by(WorkflowTriggerLog.created_at.asc())
.limit(limit)
)
return list(self.session.scalars(query).all())
def get_recent_logs(
self, tenant_id: str, app_id: str, hours: int = 24, limit: int = 100, offset: int = 0
) -> Sequence[WorkflowTriggerLog]:
"""Get recent trigger logs within specified hours."""
since = datetime.now(UTC) - timedelta(hours=hours)
query = (
select(WorkflowTriggerLog)
.where(
and_(
WorkflowTriggerLog.tenant_id == tenant_id,
WorkflowTriggerLog.app_id == app_id,
WorkflowTriggerLog.created_at >= since,
)
)
.order_by(WorkflowTriggerLog.created_at.desc())
.limit(limit)
.offset(offset)
)
return list(self.session.scalars(query).all())

View File

@@ -0,0 +1,21 @@
from typing import TypedDict
class DailyRunsStats(TypedDict):
date: str
runs: int
class DailyTerminalsStats(TypedDict):
date: str
terminal_count: int
class DailyTokenCostStats(TypedDict):
date: str
token_count: int
class AverageInteractionStats(TypedDict):
date: str
interactions: float

View File

@@ -0,0 +1,111 @@
"""
Repository protocol for WorkflowTriggerLog operations.
This module provides a protocol interface for operations on WorkflowTriggerLog,
designed to efficiently handle a potentially large volume of trigger logs with
proper indexing and batch operations.
"""
from collections.abc import Sequence
from enum import StrEnum
from typing import Protocol
from models.trigger import WorkflowTriggerLog
class TriggerLogOrderBy(StrEnum):
"""Fields available for ordering trigger logs"""
CREATED_AT = "created_at"
TRIGGERED_AT = "triggered_at"
FINISHED_AT = "finished_at"
STATUS = "status"
class WorkflowTriggerLogRepository(Protocol):
"""
Protocol for operations on WorkflowTriggerLog.
This repository provides efficient access patterns for the trigger log table,
which is expected to grow large over time. It includes:
- Batch operations for cleanup
- Efficient queries with proper indexing
- Pagination support
- Status-based filtering
Implementation notes:
- Leverage database indexes on (tenant_id, app_id), status, and created_at
- Use batch operations for deletions to avoid locking
- Support pagination for large result sets
"""
def create(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
"""
Create a new trigger log entry.
Args:
trigger_log: The WorkflowTriggerLog instance to create
Returns:
The created WorkflowTriggerLog with generated ID
"""
...
def update(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
"""
Update an existing trigger log entry.
Args:
trigger_log: The WorkflowTriggerLog instance to update
Returns:
The updated WorkflowTriggerLog
"""
...
def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None:
"""
Get a trigger log by its ID.
Args:
trigger_log_id: The trigger log identifier
tenant_id: Optional tenant identifier for additional security
Returns:
The WorkflowTriggerLog if found, None otherwise
"""
...
def get_failed_for_retry(
self, tenant_id: str, max_retry_count: int = 3, limit: int = 100
) -> Sequence[WorkflowTriggerLog]:
"""
Get failed trigger logs that are eligible for retry.
Args:
tenant_id: The tenant identifier
max_retry_count: Maximum retry count to consider
limit: Maximum number of results
Returns:
A sequence of WorkflowTriggerLog instances eligible for retry
"""
...
def get_recent_logs(
self, tenant_id: str, app_id: str, hours: int = 24, limit: int = 100, offset: int = 0
) -> Sequence[WorkflowTriggerLog]:
"""
Get recent trigger logs within specified hours.
Args:
tenant_id: The tenant identifier
app_id: The application identifier
hours: Number of hours to look back
limit: Maximum number of results
offset: Number of results to skip
Returns:
A sequence of recent WorkflowTriggerLog instances
"""
...