dify
This commit is contained in:
0
dify/api/repositories/__init__.py
Normal file
0
dify/api/repositories/__init__.py
Normal file
197
dify/api/repositories/api_workflow_node_execution_repository.py
Normal file
197
dify/api/repositories/api_workflow_node_execution_repository.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""
|
||||
Service-layer repository protocol for WorkflowNodeExecutionModel operations.
|
||||
|
||||
This module provides a protocol interface for service-layer operations on WorkflowNodeExecutionModel
|
||||
that abstracts database queries currently done directly in service classes. This repository is
|
||||
specifically designed for service-layer needs and is separate from the core domain repository.
|
||||
|
||||
The service repository handles operations that require access to database-specific fields like
|
||||
tenant_id, app_id, triggered_from, etc., which are not part of the core domain model.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Protocol
|
||||
|
||||
from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository
|
||||
from models.workflow import WorkflowNodeExecutionModel
|
||||
|
||||
|
||||
class DifyAPIWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository, Protocol):
|
||||
"""
|
||||
Protocol for service-layer operations on WorkflowNodeExecutionModel.
|
||||
|
||||
This repository provides database access patterns specifically needed by service classes,
|
||||
handling queries that involve database-specific fields and multi-tenancy concerns.
|
||||
|
||||
Key responsibilities:
|
||||
- Manages database operations for workflow node executions
|
||||
- Handles multi-tenant data isolation
|
||||
- Provides batch processing capabilities
|
||||
- Supports execution lifecycle management
|
||||
|
||||
Implementation notes:
|
||||
- Returns database models directly (WorkflowNodeExecutionModel)
|
||||
- Handles tenant/app filtering automatically
|
||||
- Provides service-specific query patterns
|
||||
- Focuses on database operations without domain logic
|
||||
- Supports cleanup and maintenance operations
|
||||
"""
|
||||
|
||||
def get_node_last_execution(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
) -> WorkflowNodeExecutionModel | None:
|
||||
"""
|
||||
Get the most recent execution for a specific node.
|
||||
|
||||
This method finds the latest execution of a specific node within a workflow,
|
||||
ordered by creation time. Used primarily for debugging and inspection purposes.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_id: The workflow identifier
|
||||
node_id: The node identifier
|
||||
|
||||
Returns:
|
||||
The most recent WorkflowNodeExecutionModel for the node, or None if not found
|
||||
"""
|
||||
...
|
||||
|
||||
def get_executions_by_workflow_run(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_run_id: str,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get all node executions for a specific workflow run.
|
||||
|
||||
This method retrieves all node executions that belong to a specific workflow run,
|
||||
ordered by index in descending order for proper trace visualization.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_run_id: The workflow run identifier
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
|
||||
"""
|
||||
...
|
||||
|
||||
def get_execution_by_id(
|
||||
self,
|
||||
execution_id: str,
|
||||
tenant_id: str | None = None,
|
||||
) -> WorkflowNodeExecutionModel | None:
|
||||
"""
|
||||
Get a workflow node execution by its ID.
|
||||
|
||||
This method retrieves a specific execution by its unique identifier.
|
||||
Tenant filtering is optional for cases where the execution ID is globally unique.
|
||||
|
||||
When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
|
||||
If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
|
||||
set `tenant_id` to prevent horizontal privilege escalation.
|
||||
|
||||
Args:
|
||||
execution_id: The execution identifier
|
||||
tenant_id: Optional tenant identifier for additional filtering
|
||||
|
||||
Returns:
|
||||
The WorkflowNodeExecutionModel if found, or None if not found
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_expired_executions(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions that are older than the specified date.
|
||||
|
||||
This method is used for cleanup operations to remove expired executions
|
||||
in batches to avoid overwhelming the database.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Delete executions created before this date
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_executions_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow node executions for a specific app.
|
||||
|
||||
This method is used when removing an app and all its related data.
|
||||
Executions are deleted in batches to avoid overwhelming the database.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The total number of executions deleted
|
||||
"""
|
||||
...
|
||||
|
||||
def get_expired_executions_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get a batch of expired workflow node executions for backup purposes.
|
||||
|
||||
This method retrieves expired executions without deleting them,
|
||||
allowing the caller to backup the data before deletion.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Get executions created before this date
|
||||
batch_size: Maximum number of executions to retrieve
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_executions_by_ids(
|
||||
self,
|
||||
execution_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions by their IDs.
|
||||
|
||||
This method deletes specific executions by their IDs,
|
||||
typically used after backing up the data.
|
||||
|
||||
This method does not perform tenant isolation checks. The caller is responsible for ensuring proper
|
||||
data isolation between tenants. When execution IDs come from untrusted sources (e.g., API requests),
|
||||
additional tenant validation should be implemented to prevent unauthorized access.
|
||||
|
||||
Args:
|
||||
execution_ids: List of execution IDs to delete
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
...
|
||||
479
dify/api/repositories/api_workflow_run_repository.py
Normal file
479
dify/api/repositories/api_workflow_run_repository.py
Normal file
@@ -0,0 +1,479 @@
|
||||
"""
|
||||
API WorkflowRun Repository Protocol
|
||||
|
||||
This module defines the protocol for service-layer WorkflowRun operations.
|
||||
The repository provides an abstraction layer for WorkflowRun database operations
|
||||
used by service classes, separating service-layer concerns from core domain logic.
|
||||
|
||||
Key Features:
|
||||
- Paginated workflow run queries with filtering
|
||||
- Bulk deletion operations with OSS backup support
|
||||
- Multi-tenant data isolation
|
||||
- Expired record cleanup with data retention
|
||||
- Service-layer specific query patterns
|
||||
|
||||
Usage:
|
||||
This protocol should be used by service classes that need to perform
|
||||
WorkflowRun database operations. It provides a clean interface that
|
||||
hides implementation details and supports dependency injection.
|
||||
|
||||
Example:
|
||||
```python
|
||||
from repositories.dify_api_repository_factory import DifyAPIRepositoryFactory
|
||||
|
||||
session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
|
||||
repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
|
||||
|
||||
# Get paginated workflow runs
|
||||
runs = repo.get_paginated_workflow_runs(
|
||||
tenant_id="tenant-123",
|
||||
app_id="app-456",
|
||||
triggered_from=WorkflowRunTriggeredFrom.DEBUGGING,
|
||||
limit=20
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import Protocol
|
||||
|
||||
from core.workflow.entities.workflow_pause import WorkflowPauseEntity
|
||||
from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from models.enums import WorkflowRunTriggeredFrom
|
||||
from models.workflow import WorkflowRun
|
||||
from repositories.types import (
|
||||
AverageInteractionStats,
|
||||
DailyRunsStats,
|
||||
DailyTerminalsStats,
|
||||
DailyTokenCostStats,
|
||||
)
|
||||
|
||||
|
||||
class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
|
||||
"""
|
||||
Protocol for service-layer WorkflowRun repository operations.
|
||||
|
||||
This protocol defines the interface for WorkflowRun database operations
|
||||
that are specific to service-layer needs, including pagination, filtering,
|
||||
and bulk operations with data backup support.
|
||||
"""
|
||||
|
||||
def get_paginated_workflow_runs(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
|
||||
limit: int = 20,
|
||||
last_id: str | None = None,
|
||||
status: str | None = None,
|
||||
) -> InfiniteScrollPagination:
|
||||
"""
|
||||
Get paginated workflow runs with filtering.
|
||||
|
||||
Retrieves workflow runs for a specific app and trigger source with
|
||||
cursor-based pagination support. Used primarily for debugging and
|
||||
workflow run listing in the UI.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source(s) (e.g., "debugging", "app-run", or list of values)
|
||||
limit: Maximum number of records to return (default: 20)
|
||||
last_id: Cursor for pagination - ID of the last record from previous page
|
||||
status: Optional filter by status (e.g., "running", "succeeded", "failed")
|
||||
|
||||
Returns:
|
||||
InfiniteScrollPagination object containing:
|
||||
- data: List of WorkflowRun objects
|
||||
- limit: Applied limit
|
||||
- has_more: Boolean indicating if more records exist
|
||||
|
||||
Raises:
|
||||
ValueError: If last_id is provided but the corresponding record doesn't exist
|
||||
"""
|
||||
...
|
||||
|
||||
def get_workflow_run_by_id(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
run_id: str,
|
||||
) -> WorkflowRun | None:
|
||||
"""
|
||||
Get a specific workflow run by ID.
|
||||
|
||||
Retrieves a single workflow run with tenant and app isolation.
|
||||
Used for workflow run detail views and execution tracking.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
run_id: Workflow run identifier
|
||||
|
||||
Returns:
|
||||
WorkflowRun object if found, None otherwise
|
||||
"""
|
||||
...
|
||||
|
||||
def get_workflow_run_by_id_without_tenant(
|
||||
self,
|
||||
run_id: str,
|
||||
) -> WorkflowRun | None:
|
||||
"""
|
||||
Get a specific workflow run by ID without tenant/app context.
|
||||
|
||||
Retrieves a single workflow run using only the run ID, without
|
||||
requiring tenant_id or app_id. This method is intended for internal
|
||||
system operations like tracing and monitoring where the tenant context
|
||||
is not available upfront.
|
||||
|
||||
Args:
|
||||
run_id: Workflow run identifier
|
||||
|
||||
Returns:
|
||||
WorkflowRun object if found, None otherwise
|
||||
|
||||
Note:
|
||||
This method bypasses tenant isolation checks and should only be used
|
||||
in trusted system contexts like ops trace collection. For user-facing
|
||||
operations, use get_workflow_run_by_id() with proper tenant isolation.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_workflow_runs_count(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
status: str | None = None,
|
||||
time_range: str | None = None,
|
||||
) -> dict[str, int]:
|
||||
"""
|
||||
Get workflow runs count statistics.
|
||||
|
||||
Retrieves total count and count by status for workflow runs
|
||||
matching the specified filters.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "debugging", "app-run")
|
||||
status: Optional filter by specific status
|
||||
time_range: Optional time range filter (e.g., "7d", "4h", "30m", "30s")
|
||||
Filters records based on created_at field
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- total: Total count of all workflow runs (or filtered by status)
|
||||
- running: Count of workflow runs with status "running"
|
||||
- succeeded: Count of workflow runs with status "succeeded"
|
||||
- failed: Count of workflow runs with status "failed"
|
||||
- stopped: Count of workflow runs with status "stopped"
|
||||
- partial_succeeded: Count of workflow runs with status "partial-succeeded"
|
||||
|
||||
Note: If a status is provided, 'total' will be the count for that status,
|
||||
and the specific status count will also be set to this value, with all
|
||||
other status counts being 0.
|
||||
"""
|
||||
...
|
||||
|
||||
def get_expired_runs_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Get a batch of expired workflow runs for cleanup.
|
||||
|
||||
Retrieves workflow runs created before the specified date for
|
||||
cleanup operations. Used by scheduled tasks to remove old data
|
||||
while maintaining data retention policies.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
before_date: Only return runs created before this date
|
||||
batch_size: Maximum number of records to return
|
||||
|
||||
Returns:
|
||||
Sequence of WorkflowRun objects to be processed for cleanup
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_runs_by_ids(
|
||||
self,
|
||||
run_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow runs by their IDs.
|
||||
|
||||
Performs bulk deletion of workflow runs by ID. This method should
|
||||
be used after backing up the data to OSS storage for retention.
|
||||
|
||||
Args:
|
||||
run_ids: Sequence of workflow run IDs to delete
|
||||
|
||||
Returns:
|
||||
Number of records actually deleted
|
||||
|
||||
Note:
|
||||
This method performs hard deletion. Ensure data is backed up
|
||||
to OSS storage before calling this method for compliance with
|
||||
data retention policies.
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_runs_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow runs for a specific app.
|
||||
|
||||
Performs bulk deletion of all workflow runs associated with an app.
|
||||
Used during app cleanup operations. Processes records in batches
|
||||
to avoid memory issues and long-running transactions.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
batch_size: Number of records to process in each batch
|
||||
|
||||
Returns:
|
||||
Total number of records deleted across all batches
|
||||
|
||||
Note:
|
||||
This method performs hard deletion without backup. Use with caution
|
||||
and ensure proper data retention policies are followed.
|
||||
"""
|
||||
...
|
||||
|
||||
def create_workflow_pause(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
state_owner_user_id: str,
|
||||
state: str,
|
||||
) -> WorkflowPauseEntity:
|
||||
"""
|
||||
Create a new workflow pause state.
|
||||
|
||||
Creates a pause state for a workflow run, storing the current execution
|
||||
state and marking the workflow as paused. This is used when a workflow
|
||||
needs to be suspended and later resumed.
|
||||
|
||||
Args:
|
||||
workflow_run_id: Identifier of the workflow run to pause
|
||||
state_owner_user_id: User ID who owns the pause state for file storage
|
||||
state: Serialized workflow execution state (JSON string)
|
||||
|
||||
Returns:
|
||||
WorkflowPauseEntity representing the created pause state
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow_run_id is invalid or workflow run doesn't exist
|
||||
RuntimeError: If workflow is already paused or in invalid state
|
||||
"""
|
||||
# NOTE: we may get rid of the `state_owner_user_id` in parameter list.
|
||||
# However, removing it would require an extra for `Workflow` model
|
||||
# while creating pause.
|
||||
...
|
||||
|
||||
def resume_workflow_pause(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
pause_entity: WorkflowPauseEntity,
|
||||
) -> WorkflowPauseEntity:
|
||||
"""
|
||||
Resume a paused workflow.
|
||||
|
||||
Marks a paused workflow as resumed, set the `resumed_at` field of WorkflowPauseEntity
|
||||
and returning the workflow to running status. Returns the pause entity
|
||||
that was resumed.
|
||||
|
||||
The returned `WorkflowPauseEntity` model has `resumed_at` set.
|
||||
|
||||
NOTE: this method does not delete the correspond `WorkflowPauseEntity` record and associated states.
|
||||
It's the callers responsibility to clear the correspond state with `delete_workflow_pause`.
|
||||
|
||||
Args:
|
||||
workflow_run_id: Identifier of the workflow run to resume
|
||||
pause_entity: The pause entity to resume
|
||||
|
||||
Returns:
|
||||
WorkflowPauseEntity representing the resumed pause state
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow_run_id is invalid
|
||||
RuntimeError: If workflow is not paused or already resumed
|
||||
"""
|
||||
...
|
||||
|
||||
def delete_workflow_pause(
|
||||
self,
|
||||
pause_entity: WorkflowPauseEntity,
|
||||
) -> None:
|
||||
"""
|
||||
Delete a workflow pause state.
|
||||
|
||||
Permanently removes the pause state for a workflow run, including
|
||||
the stored state file. Used for cleanup operations when a paused
|
||||
workflow is no longer needed.
|
||||
|
||||
Args:
|
||||
pause_entity: The pause entity to delete
|
||||
|
||||
Raises:
|
||||
ValueError: If pause_entity is invalid
|
||||
RuntimeError: If workflow is not paused
|
||||
|
||||
Note:
|
||||
This operation is irreversible. The stored workflow state will be
|
||||
permanently deleted along with the pause record.
|
||||
"""
|
||||
...
|
||||
|
||||
def prune_pauses(
|
||||
self,
|
||||
expiration: datetime,
|
||||
resumption_expiration: datetime,
|
||||
limit: int | None = None,
|
||||
) -> Sequence[str]:
|
||||
"""
|
||||
Clean up expired and old pause states.
|
||||
|
||||
Removes pause states that have expired (created before expiration time)
|
||||
and pause states that were resumed more than resumption_duration ago.
|
||||
This is used for maintenance and cleanup operations.
|
||||
|
||||
Args:
|
||||
expiration: Remove pause states created before this time
|
||||
resumption_expiration: Remove pause states resumed before this time
|
||||
limit: maximum number of records deleted in one call
|
||||
|
||||
Returns:
|
||||
a list of ids for pause records that were pruned
|
||||
|
||||
Raises:
|
||||
ValueError: If parameters are invalid
|
||||
"""
|
||||
...
|
||||
|
||||
def get_daily_runs_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[DailyRunsStats]:
|
||||
"""
|
||||
Get daily runs statistics.
|
||||
|
||||
Retrieves daily workflow runs count grouped by date for a specific app
|
||||
and trigger source. Used for workflow statistics dashboard.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "app-run")
|
||||
start_date: Optional start date filter
|
||||
end_date: Optional end date filter
|
||||
timezone: Timezone for date grouping (default: "UTC")
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing date and runs count:
|
||||
[{"date": "2024-01-01", "runs": 10}, ...]
|
||||
"""
|
||||
...
|
||||
|
||||
def get_daily_terminals_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[DailyTerminalsStats]:
|
||||
"""
|
||||
Get daily terminals statistics.
|
||||
|
||||
Retrieves daily unique terminal count grouped by date for a specific app
|
||||
and trigger source. Used for workflow statistics dashboard.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "app-run")
|
||||
start_date: Optional start date filter
|
||||
end_date: Optional end date filter
|
||||
timezone: Timezone for date grouping (default: "UTC")
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing date and terminal count:
|
||||
[{"date": "2024-01-01", "terminal_count": 5}, ...]
|
||||
"""
|
||||
...
|
||||
|
||||
def get_daily_token_cost_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[DailyTokenCostStats]:
|
||||
"""
|
||||
Get daily token cost statistics.
|
||||
|
||||
Retrieves daily total token count grouped by date for a specific app
|
||||
and trigger source. Used for workflow statistics dashboard.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "app-run")
|
||||
start_date: Optional start date filter
|
||||
end_date: Optional end date filter
|
||||
timezone: Timezone for date grouping (default: "UTC")
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing date and token count:
|
||||
[{"date": "2024-01-01", "token_count": 1000}, ...]
|
||||
"""
|
||||
...
|
||||
|
||||
def get_average_app_interaction_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[AverageInteractionStats]:
|
||||
"""
|
||||
Get average app interaction statistics.
|
||||
|
||||
Retrieves daily average interactions per user grouped by date for a specific app
|
||||
and trigger source. Used for workflow statistics dashboard.
|
||||
|
||||
Args:
|
||||
tenant_id: Tenant identifier for multi-tenant isolation
|
||||
app_id: Application identifier
|
||||
triggered_from: Filter by trigger source (e.g., "app-run")
|
||||
start_date: Optional start date filter
|
||||
end_date: Optional end date filter
|
||||
timezone: Timezone for date grouping (default: "UTC")
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing date and average interactions:
|
||||
[{"date": "2024-01-01", "interactions": 2.5}, ...]
|
||||
"""
|
||||
...
|
||||
82
dify/api/repositories/factory.py
Normal file
82
dify/api/repositories/factory.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
DifyAPI Repository Factory for creating repository instances.
|
||||
|
||||
This factory is specifically designed for DifyAPI repositories that handle
|
||||
service-layer operations with dependency injection patterns.
|
||||
"""
|
||||
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from configs import dify_config
|
||||
from core.repositories import DifyCoreRepositoryFactory, RepositoryImportError
|
||||
from libs.module_loading import import_string
|
||||
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
|
||||
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
|
||||
|
||||
|
||||
class DifyAPIRepositoryFactory(DifyCoreRepositoryFactory):
|
||||
"""
|
||||
Factory for creating DifyAPI repository instances based on configuration.
|
||||
|
||||
This factory handles the creation of repositories that are specifically designed
|
||||
for service-layer operations and use dependency injection with sessionmaker
|
||||
for better testability and separation of concerns.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def create_api_workflow_node_execution_repository(
|
||||
cls, session_maker: sessionmaker[Session]
|
||||
) -> DifyAPIWorkflowNodeExecutionRepository:
|
||||
"""
|
||||
Create a DifyAPIWorkflowNodeExecutionRepository instance based on configuration.
|
||||
|
||||
This repository is designed for service-layer operations and uses dependency injection
|
||||
with a sessionmaker for better testability and separation of concerns. It provides
|
||||
database access patterns specifically needed by service classes, handling queries
|
||||
that involve database-specific fields and multi-tenancy concerns.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker to inject for database session management.
|
||||
|
||||
Returns:
|
||||
Configured DifyAPIWorkflowNodeExecutionRepository instance
|
||||
|
||||
Raises:
|
||||
RepositoryImportError: If the configured repository cannot be imported or instantiated
|
||||
"""
|
||||
class_path = dify_config.API_WORKFLOW_NODE_EXECUTION_REPOSITORY
|
||||
|
||||
try:
|
||||
repository_class = import_string(class_path)
|
||||
return repository_class(session_maker=session_maker)
|
||||
except (ImportError, Exception) as e:
|
||||
raise RepositoryImportError(
|
||||
f"Failed to create DifyAPIWorkflowNodeExecutionRepository from '{class_path}': {e}"
|
||||
) from e
|
||||
|
||||
@classmethod
|
||||
def create_api_workflow_run_repository(cls, session_maker: sessionmaker[Session]) -> APIWorkflowRunRepository:
|
||||
"""
|
||||
Create an APIWorkflowRunRepository instance based on configuration.
|
||||
|
||||
This repository is designed for service-layer WorkflowRun operations and uses dependency
|
||||
injection with a sessionmaker for better testability and separation of concerns. It provides
|
||||
database access patterns specifically needed by service classes for workflow run management,
|
||||
including pagination, filtering, and bulk operations.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker to inject for database session management.
|
||||
|
||||
Returns:
|
||||
Configured APIWorkflowRunRepository instance
|
||||
|
||||
Raises:
|
||||
RepositoryImportError: If the configured repository cannot be imported or instantiated
|
||||
"""
|
||||
class_path = dify_config.API_WORKFLOW_RUN_REPOSITORY
|
||||
|
||||
try:
|
||||
repository_class = import_string(class_path)
|
||||
return repository_class(session_maker=session_maker)
|
||||
except (ImportError, Exception) as e:
|
||||
raise RepositoryImportError(f"Failed to create APIWorkflowRunRepository from '{class_path}': {e}") from e
|
||||
@@ -0,0 +1,292 @@
|
||||
"""
|
||||
SQLAlchemy implementation of WorkflowNodeExecutionServiceRepository.
|
||||
|
||||
This module provides a concrete implementation of the service repository protocol
|
||||
using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from typing import cast
|
||||
|
||||
from sqlalchemy import asc, delete, desc, select
|
||||
from sqlalchemy.engine import CursorResult
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from models.workflow import WorkflowNodeExecutionModel
|
||||
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
|
||||
|
||||
|
||||
class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecutionRepository):
|
||||
"""
|
||||
SQLAlchemy implementation of DifyAPIWorkflowNodeExecutionRepository.
|
||||
|
||||
This repository provides service-layer database operations for WorkflowNodeExecutionModel
|
||||
using SQLAlchemy 2.0 style queries. It implements the DifyAPIWorkflowNodeExecutionRepository
|
||||
protocol with the following features:
|
||||
|
||||
- Multi-tenancy data isolation through tenant_id filtering
|
||||
- Direct database model operations without domain conversion
|
||||
- Batch processing for efficient large-scale operations
|
||||
- Optimized query patterns for common access patterns
|
||||
- Dependency injection for better testability and maintainability
|
||||
- Session management and transaction handling with proper cleanup
|
||||
- Maintenance operations for data lifecycle management
|
||||
- Thread-safe database operations using session-per-request pattern
|
||||
"""
|
||||
|
||||
def __init__(self, session_maker: sessionmaker[Session]):
|
||||
"""
|
||||
Initialize the repository with a sessionmaker.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker for creating database sessions
|
||||
"""
|
||||
self._session_maker = session_maker
|
||||
|
||||
def get_node_last_execution(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_id: str,
|
||||
node_id: str,
|
||||
) -> WorkflowNodeExecutionModel | None:
|
||||
"""
|
||||
Get the most recent execution for a specific node.
|
||||
|
||||
This method replicates the query pattern from WorkflowService.get_node_last_run()
|
||||
using SQLAlchemy 2.0 style syntax.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_id: The workflow identifier
|
||||
node_id: The node identifier
|
||||
|
||||
Returns:
|
||||
The most recent WorkflowNodeExecutionModel for the node, or None if not found.
|
||||
|
||||
The returned WorkflowNodeExecutionModel will have `offload_data` preloaded.
|
||||
"""
|
||||
stmt = select(WorkflowNodeExecutionModel)
|
||||
stmt = WorkflowNodeExecutionModel.preload_offload_data(stmt)
|
||||
stmt = (
|
||||
stmt.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.app_id == app_id,
|
||||
WorkflowNodeExecutionModel.workflow_id == workflow_id,
|
||||
WorkflowNodeExecutionModel.node_id == node_id,
|
||||
)
|
||||
.order_by(desc(WorkflowNodeExecutionModel.created_at))
|
||||
.limit(1)
|
||||
)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.scalar(stmt)
|
||||
|
||||
def get_executions_by_workflow_run(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
workflow_run_id: str,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get all node executions for a specific workflow run.
|
||||
|
||||
This method replicates the query pattern from WorkflowRunService.get_workflow_run_node_executions()
|
||||
using SQLAlchemy 2.0 style syntax.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
workflow_run_id: The workflow run identifier
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances ordered by index (desc)
|
||||
"""
|
||||
stmt = WorkflowNodeExecutionModel.preload_offload_data(select(WorkflowNodeExecutionModel))
|
||||
stmt = stmt.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.app_id == app_id,
|
||||
WorkflowNodeExecutionModel.workflow_run_id == workflow_run_id,
|
||||
).order_by(asc(WorkflowNodeExecutionModel.created_at))
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.execute(stmt).scalars().all()
|
||||
|
||||
def get_execution_by_id(
|
||||
self,
|
||||
execution_id: str,
|
||||
tenant_id: str | None = None,
|
||||
) -> WorkflowNodeExecutionModel | None:
|
||||
"""
|
||||
Get a workflow node execution by its ID.
|
||||
|
||||
This method replicates the query pattern from WorkflowDraftVariableService
|
||||
and WorkflowService.single_step_run_workflow_node() using SQLAlchemy 2.0 style syntax.
|
||||
|
||||
When `tenant_id` is None, it's the caller's responsibility to ensure proper data isolation between tenants.
|
||||
If the `execution_id` comes from untrusted sources (e.g., retrieved from an API request), the caller should
|
||||
set `tenant_id` to prevent horizontal privilege escalation.
|
||||
|
||||
Args:
|
||||
execution_id: The execution identifier
|
||||
tenant_id: Optional tenant identifier for additional filtering
|
||||
|
||||
Returns:
|
||||
The WorkflowNodeExecutionModel if found, or None if not found
|
||||
"""
|
||||
stmt = WorkflowNodeExecutionModel.preload_offload_data(select(WorkflowNodeExecutionModel))
|
||||
stmt = stmt.where(WorkflowNodeExecutionModel.id == execution_id)
|
||||
|
||||
# Add tenant filtering if provided
|
||||
if tenant_id is not None:
|
||||
stmt = stmt.where(WorkflowNodeExecutionModel.tenant_id == tenant_id)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.scalar(stmt)
|
||||
|
||||
def delete_expired_executions(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions that are older than the specified date.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Delete executions created before this date
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Find executions to delete in batches
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel.id)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
|
||||
execution_ids = session.execute(stmt).scalars().all()
|
||||
if not execution_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
|
||||
result = cast(CursorResult, session.execute(delete_stmt))
|
||||
session.commit()
|
||||
total_deleted += result.rowcount
|
||||
|
||||
# If we deleted fewer than the batch size, we're done
|
||||
if len(execution_ids) < batch_size:
|
||||
break
|
||||
|
||||
return total_deleted
|
||||
|
||||
def delete_executions_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow node executions for a specific app.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
batch_size: Maximum number of executions to delete in one batch
|
||||
|
||||
Returns:
|
||||
The total number of executions deleted
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Find executions to delete in batches
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel.id)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.app_id == app_id,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
|
||||
execution_ids = session.execute(stmt).scalars().all()
|
||||
if not execution_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
|
||||
result = cast(CursorResult, session.execute(delete_stmt))
|
||||
session.commit()
|
||||
total_deleted += result.rowcount
|
||||
|
||||
# If we deleted fewer than the batch size, we're done
|
||||
if len(execution_ids) < batch_size:
|
||||
break
|
||||
|
||||
return total_deleted
|
||||
|
||||
def get_expired_executions_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowNodeExecutionModel]:
|
||||
"""
|
||||
Get a batch of expired workflow node executions for backup purposes.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
before_date: Get executions created before this date
|
||||
batch_size: Maximum number of executions to retrieve
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowNodeExecutionModel instances
|
||||
"""
|
||||
stmt = (
|
||||
select(WorkflowNodeExecutionModel)
|
||||
.where(
|
||||
WorkflowNodeExecutionModel.tenant_id == tenant_id,
|
||||
WorkflowNodeExecutionModel.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
|
||||
with self._session_maker() as session:
|
||||
return session.execute(stmt).scalars().all()
|
||||
|
||||
def delete_executions_by_ids(
|
||||
self,
|
||||
execution_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow node executions by their IDs.
|
||||
|
||||
Args:
|
||||
execution_ids: List of execution IDs to delete
|
||||
|
||||
Returns:
|
||||
The number of executions deleted
|
||||
"""
|
||||
if not execution_ids:
|
||||
return 0
|
||||
|
||||
with self._session_maker() as session:
|
||||
stmt = delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(execution_ids))
|
||||
result = cast(CursorResult, session.execute(stmt))
|
||||
session.commit()
|
||||
return result.rowcount
|
||||
869
dify/api/repositories/sqlalchemy_api_workflow_run_repository.py
Normal file
869
dify/api/repositories/sqlalchemy_api_workflow_run_repository.py
Normal file
@@ -0,0 +1,869 @@
|
||||
"""
|
||||
SQLAlchemy API WorkflowRun Repository Implementation
|
||||
|
||||
This module provides the SQLAlchemy-based implementation of the APIWorkflowRunRepository
|
||||
protocol. It handles service-layer WorkflowRun database operations using SQLAlchemy 2.0
|
||||
style queries with proper session management and multi-tenant data isolation.
|
||||
|
||||
Key Features:
|
||||
- SQLAlchemy 2.0 style queries for modern database operations
|
||||
- Cursor-based pagination for efficient large dataset handling
|
||||
- Bulk operations with batch processing for performance
|
||||
- Multi-tenant data isolation and security
|
||||
- Proper session management with dependency injection
|
||||
|
||||
Implementation Notes:
|
||||
- Uses sessionmaker for consistent session management
|
||||
- Implements cursor-based pagination using created_at timestamps
|
||||
- Provides efficient bulk deletion with batch processing
|
||||
- Maintains data consistency with proper transaction handling
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import Any, cast
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import and_, delete, func, null, or_, select
|
||||
from sqlalchemy.engine import CursorResult
|
||||
from sqlalchemy.orm import Session, selectinload, sessionmaker
|
||||
|
||||
from core.workflow.entities.workflow_pause import WorkflowPauseEntity
|
||||
from core.workflow.enums import WorkflowExecutionStatus
|
||||
from extensions.ext_storage import storage
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from libs.helper import convert_datetime_to_date
|
||||
from libs.infinite_scroll_pagination import InfiniteScrollPagination
|
||||
from libs.time_parser import get_time_threshold
|
||||
from libs.uuid_utils import uuidv7
|
||||
from models.enums import WorkflowRunTriggeredFrom
|
||||
from models.workflow import WorkflowPause as WorkflowPauseModel
|
||||
from models.workflow import WorkflowRun
|
||||
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
|
||||
from repositories.types import (
|
||||
AverageInteractionStats,
|
||||
DailyRunsStats,
|
||||
DailyTerminalsStats,
|
||||
DailyTokenCostStats,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _WorkflowRunError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
|
||||
"""
|
||||
SQLAlchemy implementation of APIWorkflowRunRepository.
|
||||
|
||||
Provides service-layer WorkflowRun database operations using SQLAlchemy 2.0
|
||||
style queries. Supports dependency injection through sessionmaker and
|
||||
maintains proper multi-tenant data isolation.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker instance for database connections
|
||||
"""
|
||||
|
||||
def __init__(self, session_maker: sessionmaker[Session]):
|
||||
"""
|
||||
Initialize the repository with a sessionmaker.
|
||||
|
||||
Args:
|
||||
session_maker: SQLAlchemy sessionmaker for database connections
|
||||
"""
|
||||
self._session_maker = session_maker
|
||||
|
||||
def get_paginated_workflow_runs(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
|
||||
limit: int = 20,
|
||||
last_id: str | None = None,
|
||||
status: str | None = None,
|
||||
) -> InfiniteScrollPagination:
|
||||
"""
|
||||
Get paginated workflow runs with filtering.
|
||||
|
||||
Implements cursor-based pagination using created_at timestamps for
|
||||
efficient handling of large datasets. Filters by tenant, app, and
|
||||
trigger source for proper data isolation.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
# Build base query with filters
|
||||
base_stmt = select(WorkflowRun).where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
)
|
||||
|
||||
# Handle triggered_from values
|
||||
if isinstance(triggered_from, WorkflowRunTriggeredFrom):
|
||||
triggered_from = [triggered_from]
|
||||
if triggered_from:
|
||||
base_stmt = base_stmt.where(WorkflowRun.triggered_from.in_(triggered_from))
|
||||
|
||||
# Add optional status filter
|
||||
if status:
|
||||
base_stmt = base_stmt.where(WorkflowRun.status == status)
|
||||
|
||||
if last_id:
|
||||
# Get the last workflow run for cursor-based pagination
|
||||
last_run_stmt = base_stmt.where(WorkflowRun.id == last_id)
|
||||
last_workflow_run = session.scalar(last_run_stmt)
|
||||
|
||||
if not last_workflow_run:
|
||||
raise ValueError("Last workflow run not exists")
|
||||
|
||||
# Get records created before the last run's timestamp
|
||||
base_stmt = base_stmt.where(
|
||||
WorkflowRun.created_at < last_workflow_run.created_at,
|
||||
WorkflowRun.id != last_workflow_run.id,
|
||||
)
|
||||
|
||||
# First page - get most recent records
|
||||
workflow_runs = session.scalars(base_stmt.order_by(WorkflowRun.created_at.desc()).limit(limit + 1)).all()
|
||||
|
||||
# Check if there are more records for pagination
|
||||
has_more = len(workflow_runs) > limit
|
||||
if has_more:
|
||||
workflow_runs = workflow_runs[:-1]
|
||||
|
||||
return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
|
||||
|
||||
def get_workflow_run_by_id(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
run_id: str,
|
||||
) -> WorkflowRun | None:
|
||||
"""
|
||||
Get a specific workflow run by ID with tenant and app isolation.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = select(WorkflowRun).where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.id == run_id,
|
||||
)
|
||||
return session.scalar(stmt)
|
||||
|
||||
def get_workflow_run_by_id_without_tenant(
|
||||
self,
|
||||
run_id: str,
|
||||
) -> WorkflowRun | None:
|
||||
"""
|
||||
Get a specific workflow run by ID without tenant/app context.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = select(WorkflowRun).where(WorkflowRun.id == run_id)
|
||||
return session.scalar(stmt)
|
||||
|
||||
def get_workflow_runs_count(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
status: str | None = None,
|
||||
time_range: str | None = None,
|
||||
) -> dict[str, int]:
|
||||
"""
|
||||
Get workflow runs count statistics grouped by status.
|
||||
"""
|
||||
_initial_status_counts = {
|
||||
"running": 0,
|
||||
"succeeded": 0,
|
||||
"failed": 0,
|
||||
"stopped": 0,
|
||||
"partial-succeeded": 0,
|
||||
}
|
||||
|
||||
with self._session_maker() as session:
|
||||
# Build base where conditions
|
||||
base_conditions = [
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
WorkflowRun.triggered_from == triggered_from,
|
||||
]
|
||||
|
||||
# Add time range filter if provided
|
||||
if time_range:
|
||||
time_threshold = get_time_threshold(time_range)
|
||||
if time_threshold:
|
||||
base_conditions.append(WorkflowRun.created_at >= time_threshold)
|
||||
|
||||
# If status filter is provided, return simple count
|
||||
if status:
|
||||
count_stmt = select(func.count(WorkflowRun.id)).where(*base_conditions, WorkflowRun.status == status)
|
||||
total = session.scalar(count_stmt) or 0
|
||||
|
||||
result = {"total": total} | _initial_status_counts
|
||||
|
||||
# Set the count for the filtered status
|
||||
if status in result:
|
||||
result[status] = total
|
||||
|
||||
return result
|
||||
|
||||
# No status filter - get counts grouped by status
|
||||
base_stmt = (
|
||||
select(WorkflowRun.status, func.count(WorkflowRun.id).label("count"))
|
||||
.where(*base_conditions)
|
||||
.group_by(WorkflowRun.status)
|
||||
)
|
||||
|
||||
# Execute query
|
||||
results = session.execute(base_stmt).all()
|
||||
|
||||
# Build response dictionary
|
||||
status_counts = _initial_status_counts.copy()
|
||||
|
||||
total = 0
|
||||
for status_val, count in results:
|
||||
total += count
|
||||
if status_val in status_counts:
|
||||
status_counts[status_val] = count
|
||||
|
||||
return {"total": total} | status_counts
|
||||
|
||||
def get_expired_runs_batch(
|
||||
self,
|
||||
tenant_id: str,
|
||||
before_date: datetime,
|
||||
batch_size: int = 1000,
|
||||
) -> Sequence[WorkflowRun]:
|
||||
"""
|
||||
Get a batch of expired workflow runs for cleanup operations.
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
stmt = (
|
||||
select(WorkflowRun)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.created_at < before_date,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
return session.scalars(stmt).all()
|
||||
|
||||
def delete_runs_by_ids(
|
||||
self,
|
||||
run_ids: Sequence[str],
|
||||
) -> int:
|
||||
"""
|
||||
Delete workflow runs by their IDs using bulk deletion.
|
||||
"""
|
||||
if not run_ids:
|
||||
return 0
|
||||
|
||||
with self._session_maker() as session:
|
||||
stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
|
||||
result = cast(CursorResult, session.execute(stmt))
|
||||
session.commit()
|
||||
|
||||
deleted_count = result.rowcount
|
||||
logger.info("Deleted %s workflow runs by IDs", deleted_count)
|
||||
return deleted_count
|
||||
|
||||
def delete_runs_by_app(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
batch_size: int = 1000,
|
||||
) -> int:
|
||||
"""
|
||||
Delete all workflow runs for a specific app in batches.
|
||||
"""
|
||||
total_deleted = 0
|
||||
|
||||
while True:
|
||||
with self._session_maker() as session:
|
||||
# Get a batch of run IDs to delete
|
||||
stmt = (
|
||||
select(WorkflowRun.id)
|
||||
.where(
|
||||
WorkflowRun.tenant_id == tenant_id,
|
||||
WorkflowRun.app_id == app_id,
|
||||
)
|
||||
.limit(batch_size)
|
||||
)
|
||||
run_ids = session.scalars(stmt).all()
|
||||
|
||||
if not run_ids:
|
||||
break
|
||||
|
||||
# Delete the batch
|
||||
delete_stmt = delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids))
|
||||
result = cast(CursorResult, session.execute(delete_stmt))
|
||||
session.commit()
|
||||
|
||||
batch_deleted = result.rowcount
|
||||
total_deleted += batch_deleted
|
||||
|
||||
logger.info("Deleted batch of %s workflow runs for app %s", batch_deleted, app_id)
|
||||
|
||||
# If we deleted fewer records than the batch size, we're done
|
||||
if batch_deleted < batch_size:
|
||||
break
|
||||
|
||||
logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id)
|
||||
return total_deleted
|
||||
|
||||
def create_workflow_pause(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
state_owner_user_id: str,
|
||||
state: str,
|
||||
) -> WorkflowPauseEntity:
|
||||
"""
|
||||
Create a new workflow pause state.
|
||||
|
||||
Creates a pause state for a workflow run, storing the current execution
|
||||
state and marking the workflow as paused. This is used when a workflow
|
||||
needs to be suspended and later resumed.
|
||||
|
||||
Args:
|
||||
workflow_run_id: Identifier of the workflow run to pause
|
||||
state_owner_user_id: User ID who owns the pause state for file storage
|
||||
state: Serialized workflow execution state (JSON string)
|
||||
|
||||
Returns:
|
||||
RepositoryWorkflowPauseEntity representing the created pause state
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow_run_id is invalid or workflow run doesn't exist
|
||||
RuntimeError: If workflow is already paused or in invalid state
|
||||
"""
|
||||
previous_pause_model_query = select(WorkflowPauseModel).where(
|
||||
WorkflowPauseModel.workflow_run_id == workflow_run_id
|
||||
)
|
||||
with self._session_maker() as session, session.begin():
|
||||
# Get the workflow run
|
||||
workflow_run = session.get(WorkflowRun, workflow_run_id)
|
||||
if workflow_run is None:
|
||||
raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
|
||||
|
||||
# Check if workflow is in RUNNING status
|
||||
if workflow_run.status != WorkflowExecutionStatus.RUNNING:
|
||||
raise _WorkflowRunError(
|
||||
f"Only WorkflowRun with RUNNING status can be paused, "
|
||||
f"workflow_run_id={workflow_run_id}, current_status={workflow_run.status}"
|
||||
)
|
||||
#
|
||||
previous_pause = session.scalars(previous_pause_model_query).first()
|
||||
if previous_pause:
|
||||
self._delete_pause_model(session, previous_pause)
|
||||
# we need to flush here to ensure that the old one is actually deleted.
|
||||
session.flush()
|
||||
|
||||
state_obj_key = f"workflow-state-{uuid.uuid4()}.json"
|
||||
storage.save(state_obj_key, state.encode())
|
||||
# Upload the state file
|
||||
|
||||
# Create the pause record
|
||||
pause_model = WorkflowPauseModel()
|
||||
pause_model.id = str(uuidv7())
|
||||
pause_model.workflow_id = workflow_run.workflow_id
|
||||
pause_model.workflow_run_id = workflow_run.id
|
||||
pause_model.state_object_key = state_obj_key
|
||||
pause_model.created_at = naive_utc_now()
|
||||
|
||||
# Update workflow run status
|
||||
workflow_run.status = WorkflowExecutionStatus.PAUSED
|
||||
|
||||
# Save everything in a transaction
|
||||
session.add(pause_model)
|
||||
session.add(workflow_run)
|
||||
|
||||
logger.info("Created workflow pause %s for workflow run %s", pause_model.id, workflow_run_id)
|
||||
|
||||
return _PrivateWorkflowPauseEntity.from_models(pause_model)
|
||||
|
||||
def get_workflow_pause(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
) -> WorkflowPauseEntity | None:
|
||||
"""
|
||||
Get an existing workflow pause state.
|
||||
|
||||
Retrieves the pause state for a specific workflow run if it exists.
|
||||
Used to check if a workflow is paused and to retrieve its saved state.
|
||||
|
||||
Args:
|
||||
workflow_run_id: Identifier of the workflow run to get pause state for
|
||||
|
||||
Returns:
|
||||
RepositoryWorkflowPauseEntity if pause state exists, None otherwise
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow_run_id is invalid
|
||||
"""
|
||||
with self._session_maker() as session:
|
||||
# Query workflow run with pause and state file
|
||||
stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id)
|
||||
workflow_run = session.scalar(stmt)
|
||||
|
||||
if workflow_run is None:
|
||||
raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
|
||||
|
||||
pause_model = workflow_run.pause
|
||||
if pause_model is None:
|
||||
return None
|
||||
|
||||
return _PrivateWorkflowPauseEntity.from_models(pause_model)
|
||||
|
||||
def resume_workflow_pause(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
pause_entity: WorkflowPauseEntity,
|
||||
) -> WorkflowPauseEntity:
|
||||
"""
|
||||
Resume a paused workflow.
|
||||
|
||||
Marks a paused workflow as resumed, clearing the pause state and
|
||||
returning the workflow to running status. Returns the pause entity
|
||||
that was resumed.
|
||||
|
||||
Args:
|
||||
workflow_run_id: Identifier of the workflow run to resume
|
||||
pause_entity: The pause entity to resume
|
||||
|
||||
Returns:
|
||||
RepositoryWorkflowPauseEntity representing the resumed pause state
|
||||
|
||||
Raises:
|
||||
ValueError: If workflow_run_id is invalid
|
||||
RuntimeError: If workflow is not paused or already resumed
|
||||
"""
|
||||
with self._session_maker() as session, session.begin():
|
||||
# Get the workflow run with pause
|
||||
stmt = select(WorkflowRun).options(selectinload(WorkflowRun.pause)).where(WorkflowRun.id == workflow_run_id)
|
||||
workflow_run = session.scalar(stmt)
|
||||
|
||||
if workflow_run is None:
|
||||
raise ValueError(f"WorkflowRun not found: {workflow_run_id}")
|
||||
|
||||
if workflow_run.status != WorkflowExecutionStatus.PAUSED:
|
||||
raise _WorkflowRunError(
|
||||
f"WorkflowRun is not in PAUSED status, workflow_run_id={workflow_run_id}, "
|
||||
f"current_status={workflow_run.status}"
|
||||
)
|
||||
pause_model = workflow_run.pause
|
||||
if pause_model is None:
|
||||
raise _WorkflowRunError(f"No pause state found for workflow run: {workflow_run_id}")
|
||||
|
||||
if pause_model.id != pause_entity.id:
|
||||
raise _WorkflowRunError(
|
||||
"different id in WorkflowPause and WorkflowPauseEntity, "
|
||||
f"WorkflowPause.id={pause_model.id}, "
|
||||
f"WorkflowPauseEntity.id={pause_entity.id}"
|
||||
)
|
||||
|
||||
if pause_model.resumed_at is not None:
|
||||
raise _WorkflowRunError(f"Cannot resume an already resumed pause, pause_id={pause_model.id}")
|
||||
|
||||
# Mark as resumed
|
||||
pause_model.resumed_at = naive_utc_now()
|
||||
workflow_run.pause_id = None # type: ignore
|
||||
workflow_run.status = WorkflowExecutionStatus.RUNNING
|
||||
|
||||
session.add(pause_model)
|
||||
session.add(workflow_run)
|
||||
|
||||
logger.info("Resumed workflow pause %s for workflow run %s", pause_model.id, workflow_run_id)
|
||||
|
||||
return _PrivateWorkflowPauseEntity.from_models(pause_model)
|
||||
|
||||
def delete_workflow_pause(
|
||||
self,
|
||||
pause_entity: WorkflowPauseEntity,
|
||||
) -> None:
|
||||
"""
|
||||
Delete a workflow pause state.
|
||||
|
||||
Permanently removes the pause state for a workflow run, including
|
||||
the stored state file. Used for cleanup operations when a paused
|
||||
workflow is no longer needed.
|
||||
|
||||
Args:
|
||||
pause_entity: The pause entity to delete
|
||||
|
||||
Raises:
|
||||
ValueError: If pause_entity is invalid
|
||||
_WorkflowRunError: If workflow is not paused
|
||||
|
||||
Note:
|
||||
This operation is irreversible. The stored workflow state will be
|
||||
permanently deleted along with the pause record.
|
||||
"""
|
||||
with self._session_maker() as session, session.begin():
|
||||
# Get the pause model by ID
|
||||
pause_model = session.get(WorkflowPauseModel, pause_entity.id)
|
||||
if pause_model is None:
|
||||
raise _WorkflowRunError(f"WorkflowPause not found: {pause_entity.id}")
|
||||
self._delete_pause_model(session, pause_model)
|
||||
|
||||
@staticmethod
|
||||
def _delete_pause_model(session: Session, pause_model: WorkflowPauseModel):
|
||||
storage.delete(pause_model.state_object_key)
|
||||
|
||||
# Delete the pause record
|
||||
session.delete(pause_model)
|
||||
|
||||
logger.info("Deleted workflow pause %s for workflow run %s", pause_model.id, pause_model.workflow_run_id)
|
||||
|
||||
def prune_pauses(
|
||||
self,
|
||||
expiration: datetime,
|
||||
resumption_expiration: datetime,
|
||||
limit: int | None = None,
|
||||
) -> Sequence[str]:
|
||||
"""
|
||||
Clean up expired and old pause states.
|
||||
|
||||
Removes pause states that have expired (created before expiration time)
|
||||
and pause states that were resumed more than resumption_duration ago.
|
||||
This is used for maintenance and cleanup operations.
|
||||
|
||||
Args:
|
||||
expiration: Remove pause states created before this time
|
||||
resumption_expiration: Remove pause states resumed before this time
|
||||
limit: maximum number of records deleted in one call
|
||||
|
||||
Returns:
|
||||
a list of ids for pause records that were pruned
|
||||
|
||||
Raises:
|
||||
ValueError: If parameters are invalid
|
||||
"""
|
||||
_limit: int = limit or 1000
|
||||
pruned_record_ids: list[str] = []
|
||||
cond = or_(
|
||||
WorkflowPauseModel.created_at < expiration,
|
||||
and_(
|
||||
WorkflowPauseModel.resumed_at.is_not(null()),
|
||||
WorkflowPauseModel.resumed_at < resumption_expiration,
|
||||
),
|
||||
)
|
||||
# First, collect pause records to delete with their state files
|
||||
# Expired pauses (created before expiration time)
|
||||
stmt = select(WorkflowPauseModel).where(cond).limit(_limit)
|
||||
|
||||
with self._session_maker(expire_on_commit=False) as session:
|
||||
# Old resumed pauses (resumed more than resumption_duration ago)
|
||||
|
||||
# Get all records to delete
|
||||
pauses_to_delete = session.scalars(stmt).all()
|
||||
|
||||
# Delete state files from storage
|
||||
for pause in pauses_to_delete:
|
||||
with self._session_maker(expire_on_commit=False) as session, session.begin():
|
||||
# todo: this issues a separate query for each WorkflowPauseModel record.
|
||||
# consider batching this lookup.
|
||||
try:
|
||||
storage.delete(pause.state_object_key)
|
||||
logger.info(
|
||||
"Deleted state object for pause, pause_id=%s, object_key=%s",
|
||||
pause.id,
|
||||
pause.state_object_key,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"Failed to delete state file for pause, pause_id=%s, object_key=%s",
|
||||
pause.id,
|
||||
pause.state_object_key,
|
||||
)
|
||||
continue
|
||||
session.delete(pause)
|
||||
pruned_record_ids.append(pause.id)
|
||||
logger.info(
|
||||
"workflow pause records deleted, id=%s, resumed_at=%s",
|
||||
pause.id,
|
||||
pause.resumed_at,
|
||||
)
|
||||
|
||||
return pruned_record_ids
|
||||
|
||||
def get_daily_runs_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[DailyRunsStats]:
|
||||
"""
|
||||
Get daily runs statistics using raw SQL for optimal performance.
|
||||
"""
|
||||
converted_created_at = convert_datetime_to_date("created_at")
|
||||
sql_query = f"""SELECT
|
||||
{converted_created_at} AS date,
|
||||
COUNT(id) AS runs
|
||||
FROM
|
||||
workflow_runs
|
||||
WHERE
|
||||
tenant_id = :tenant_id
|
||||
AND app_id = :app_id
|
||||
AND triggered_from = :triggered_from"""
|
||||
|
||||
arg_dict: dict[str, Any] = {
|
||||
"tz": timezone,
|
||||
"tenant_id": tenant_id,
|
||||
"app_id": app_id,
|
||||
"triggered_from": triggered_from,
|
||||
}
|
||||
|
||||
if start_date:
|
||||
sql_query += " AND created_at >= :start_date"
|
||||
arg_dict["start_date"] = start_date
|
||||
|
||||
if end_date:
|
||||
sql_query += " AND created_at < :end_date"
|
||||
arg_dict["end_date"] = end_date
|
||||
|
||||
sql_query += " GROUP BY date ORDER BY date"
|
||||
|
||||
response_data = []
|
||||
with self._session_maker() as session:
|
||||
rs = session.execute(sa.text(sql_query), arg_dict)
|
||||
for row in rs:
|
||||
response_data.append({"date": str(row.date), "runs": row.runs})
|
||||
|
||||
return cast(list[DailyRunsStats], response_data)
|
||||
|
||||
def get_daily_terminals_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[DailyTerminalsStats]:
|
||||
"""
|
||||
Get daily terminals statistics using raw SQL for optimal performance.
|
||||
"""
|
||||
converted_created_at = convert_datetime_to_date("created_at")
|
||||
sql_query = f"""SELECT
|
||||
{converted_created_at} AS date,
|
||||
COUNT(DISTINCT created_by) AS terminal_count
|
||||
FROM
|
||||
workflow_runs
|
||||
WHERE
|
||||
tenant_id = :tenant_id
|
||||
AND app_id = :app_id
|
||||
AND triggered_from = :triggered_from"""
|
||||
|
||||
arg_dict: dict[str, Any] = {
|
||||
"tz": timezone,
|
||||
"tenant_id": tenant_id,
|
||||
"app_id": app_id,
|
||||
"triggered_from": triggered_from,
|
||||
}
|
||||
|
||||
if start_date:
|
||||
sql_query += " AND created_at >= :start_date"
|
||||
arg_dict["start_date"] = start_date
|
||||
|
||||
if end_date:
|
||||
sql_query += " AND created_at < :end_date"
|
||||
arg_dict["end_date"] = end_date
|
||||
|
||||
sql_query += " GROUP BY date ORDER BY date"
|
||||
|
||||
response_data = []
|
||||
with self._session_maker() as session:
|
||||
rs = session.execute(sa.text(sql_query), arg_dict)
|
||||
for row in rs:
|
||||
response_data.append({"date": str(row.date), "terminal_count": row.terminal_count})
|
||||
|
||||
return cast(list[DailyTerminalsStats], response_data)
|
||||
|
||||
def get_daily_token_cost_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[DailyTokenCostStats]:
|
||||
"""
|
||||
Get daily token cost statistics using raw SQL for optimal performance.
|
||||
"""
|
||||
converted_created_at = convert_datetime_to_date("created_at")
|
||||
sql_query = f"""SELECT
|
||||
{converted_created_at} AS date,
|
||||
SUM(total_tokens) AS token_count
|
||||
FROM
|
||||
workflow_runs
|
||||
WHERE
|
||||
tenant_id = :tenant_id
|
||||
AND app_id = :app_id
|
||||
AND triggered_from = :triggered_from"""
|
||||
|
||||
arg_dict: dict[str, Any] = {
|
||||
"tz": timezone,
|
||||
"tenant_id": tenant_id,
|
||||
"app_id": app_id,
|
||||
"triggered_from": triggered_from,
|
||||
}
|
||||
|
||||
if start_date:
|
||||
sql_query += " AND created_at >= :start_date"
|
||||
arg_dict["start_date"] = start_date
|
||||
|
||||
if end_date:
|
||||
sql_query += " AND created_at < :end_date"
|
||||
arg_dict["end_date"] = end_date
|
||||
|
||||
sql_query += " GROUP BY date ORDER BY date"
|
||||
|
||||
response_data = []
|
||||
with self._session_maker() as session:
|
||||
rs = session.execute(sa.text(sql_query), arg_dict)
|
||||
for row in rs:
|
||||
response_data.append(
|
||||
{
|
||||
"date": str(row.date),
|
||||
"token_count": row.token_count,
|
||||
}
|
||||
)
|
||||
|
||||
return cast(list[DailyTokenCostStats], response_data)
|
||||
|
||||
def get_average_app_interaction_statistics(
|
||||
self,
|
||||
tenant_id: str,
|
||||
app_id: str,
|
||||
triggered_from: str,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
timezone: str = "UTC",
|
||||
) -> list[AverageInteractionStats]:
|
||||
"""
|
||||
Get average app interaction statistics using raw SQL for optimal performance.
|
||||
"""
|
||||
converted_created_at = convert_datetime_to_date("c.created_at")
|
||||
sql_query = f"""SELECT
|
||||
AVG(sub.interactions) AS interactions,
|
||||
sub.date
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
{converted_created_at} AS date,
|
||||
c.created_by,
|
||||
COUNT(c.id) AS interactions
|
||||
FROM
|
||||
workflow_runs c
|
||||
WHERE
|
||||
c.tenant_id = :tenant_id
|
||||
AND c.app_id = :app_id
|
||||
AND c.triggered_from = :triggered_from
|
||||
{{{{start}}}}
|
||||
{{{{end}}}}
|
||||
GROUP BY
|
||||
date, c.created_by
|
||||
) sub
|
||||
GROUP BY
|
||||
sub.date"""
|
||||
|
||||
arg_dict: dict[str, Any] = {
|
||||
"tz": timezone,
|
||||
"tenant_id": tenant_id,
|
||||
"app_id": app_id,
|
||||
"triggered_from": triggered_from,
|
||||
}
|
||||
|
||||
if start_date:
|
||||
sql_query = sql_query.replace("{{start}}", " AND c.created_at >= :start_date")
|
||||
arg_dict["start_date"] = start_date
|
||||
else:
|
||||
sql_query = sql_query.replace("{{start}}", "")
|
||||
|
||||
if end_date:
|
||||
sql_query = sql_query.replace("{{end}}", " AND c.created_at < :end_date")
|
||||
arg_dict["end_date"] = end_date
|
||||
else:
|
||||
sql_query = sql_query.replace("{{end}}", "")
|
||||
|
||||
response_data = []
|
||||
with self._session_maker() as session:
|
||||
rs = session.execute(sa.text(sql_query), arg_dict)
|
||||
for row in rs:
|
||||
response_data.append(
|
||||
{"date": str(row.date), "interactions": float(row.interactions.quantize(Decimal("0.01")))}
|
||||
)
|
||||
|
||||
return cast(list[AverageInteractionStats], response_data)
|
||||
|
||||
|
||||
class _PrivateWorkflowPauseEntity(WorkflowPauseEntity):
|
||||
"""
|
||||
Private implementation of WorkflowPauseEntity for SQLAlchemy repository.
|
||||
|
||||
This implementation is internal to the repository layer and provides
|
||||
the concrete implementation of the WorkflowPauseEntity interface.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
pause_model: WorkflowPauseModel,
|
||||
) -> None:
|
||||
self._pause_model = pause_model
|
||||
self._cached_state: bytes | None = None
|
||||
|
||||
@classmethod
|
||||
def from_models(cls, workflow_pause_model) -> "_PrivateWorkflowPauseEntity":
|
||||
"""
|
||||
Create a _PrivateWorkflowPauseEntity from database models.
|
||||
|
||||
Args:
|
||||
workflow_pause_model: The WorkflowPause database model
|
||||
upload_file_model: The UploadFile database model
|
||||
|
||||
Returns:
|
||||
_PrivateWorkflowPauseEntity: The constructed entity
|
||||
|
||||
Raises:
|
||||
ValueError: If required model attributes are missing
|
||||
"""
|
||||
return cls(pause_model=workflow_pause_model)
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
return self._pause_model.id
|
||||
|
||||
@property
|
||||
def workflow_execution_id(self) -> str:
|
||||
return self._pause_model.workflow_run_id
|
||||
|
||||
def get_state(self) -> bytes:
|
||||
"""
|
||||
Retrieve the serialized workflow state from storage.
|
||||
|
||||
Returns:
|
||||
Mapping[str, Any]: The workflow state as a dictionary
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the state file cannot be found
|
||||
IOError: If there are issues reading the state file
|
||||
_Workflow: If the state cannot be deserialized properly
|
||||
"""
|
||||
if self._cached_state is not None:
|
||||
return self._cached_state
|
||||
|
||||
# Load the state from storage
|
||||
state_data = storage.load(self._pause_model.state_object_key)
|
||||
self._cached_state = state_data
|
||||
return state_data
|
||||
|
||||
@property
|
||||
def resumed_at(self) -> datetime | None:
|
||||
return self._pause_model.resumed_at
|
||||
@@ -0,0 +1,86 @@
|
||||
"""
|
||||
SQLAlchemy implementation of WorkflowTriggerLogRepository.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from sqlalchemy import and_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from models.enums import WorkflowTriggerStatus
|
||||
from models.trigger import WorkflowTriggerLog
|
||||
from repositories.workflow_trigger_log_repository import WorkflowTriggerLogRepository
|
||||
|
||||
|
||||
class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository):
|
||||
"""
|
||||
SQLAlchemy implementation of WorkflowTriggerLogRepository.
|
||||
|
||||
Optimized for large table operations with proper indexing and batch processing.
|
||||
"""
|
||||
|
||||
def __init__(self, session: Session):
|
||||
self.session = session
|
||||
|
||||
def create(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
|
||||
"""Create a new trigger log entry."""
|
||||
self.session.add(trigger_log)
|
||||
self.session.flush()
|
||||
return trigger_log
|
||||
|
||||
def update(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
|
||||
"""Update an existing trigger log entry."""
|
||||
self.session.merge(trigger_log)
|
||||
self.session.flush()
|
||||
return trigger_log
|
||||
|
||||
def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None:
|
||||
"""Get a trigger log by its ID."""
|
||||
query = select(WorkflowTriggerLog).where(WorkflowTriggerLog.id == trigger_log_id)
|
||||
|
||||
if tenant_id:
|
||||
query = query.where(WorkflowTriggerLog.tenant_id == tenant_id)
|
||||
|
||||
return self.session.scalar(query)
|
||||
|
||||
def get_failed_for_retry(
|
||||
self, tenant_id: str, max_retry_count: int = 3, limit: int = 100
|
||||
) -> Sequence[WorkflowTriggerLog]:
|
||||
"""Get failed trigger logs eligible for retry."""
|
||||
query = (
|
||||
select(WorkflowTriggerLog)
|
||||
.where(
|
||||
and_(
|
||||
WorkflowTriggerLog.tenant_id == tenant_id,
|
||||
WorkflowTriggerLog.status.in_([WorkflowTriggerStatus.FAILED, WorkflowTriggerStatus.RATE_LIMITED]),
|
||||
WorkflowTriggerLog.retry_count < max_retry_count,
|
||||
)
|
||||
)
|
||||
.order_by(WorkflowTriggerLog.created_at.asc())
|
||||
.limit(limit)
|
||||
)
|
||||
|
||||
return list(self.session.scalars(query).all())
|
||||
|
||||
def get_recent_logs(
|
||||
self, tenant_id: str, app_id: str, hours: int = 24, limit: int = 100, offset: int = 0
|
||||
) -> Sequence[WorkflowTriggerLog]:
|
||||
"""Get recent trigger logs within specified hours."""
|
||||
since = datetime.now(UTC) - timedelta(hours=hours)
|
||||
|
||||
query = (
|
||||
select(WorkflowTriggerLog)
|
||||
.where(
|
||||
and_(
|
||||
WorkflowTriggerLog.tenant_id == tenant_id,
|
||||
WorkflowTriggerLog.app_id == app_id,
|
||||
WorkflowTriggerLog.created_at >= since,
|
||||
)
|
||||
)
|
||||
.order_by(WorkflowTriggerLog.created_at.desc())
|
||||
.limit(limit)
|
||||
.offset(offset)
|
||||
)
|
||||
|
||||
return list(self.session.scalars(query).all())
|
||||
21
dify/api/repositories/types.py
Normal file
21
dify/api/repositories/types.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from typing import TypedDict
|
||||
|
||||
|
||||
class DailyRunsStats(TypedDict):
|
||||
date: str
|
||||
runs: int
|
||||
|
||||
|
||||
class DailyTerminalsStats(TypedDict):
|
||||
date: str
|
||||
terminal_count: int
|
||||
|
||||
|
||||
class DailyTokenCostStats(TypedDict):
|
||||
date: str
|
||||
token_count: int
|
||||
|
||||
|
||||
class AverageInteractionStats(TypedDict):
|
||||
date: str
|
||||
interactions: float
|
||||
111
dify/api/repositories/workflow_trigger_log_repository.py
Normal file
111
dify/api/repositories/workflow_trigger_log_repository.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
Repository protocol for WorkflowTriggerLog operations.
|
||||
|
||||
This module provides a protocol interface for operations on WorkflowTriggerLog,
|
||||
designed to efficiently handle a potentially large volume of trigger logs with
|
||||
proper indexing and batch operations.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from enum import StrEnum
|
||||
from typing import Protocol
|
||||
|
||||
from models.trigger import WorkflowTriggerLog
|
||||
|
||||
|
||||
class TriggerLogOrderBy(StrEnum):
|
||||
"""Fields available for ordering trigger logs"""
|
||||
|
||||
CREATED_AT = "created_at"
|
||||
TRIGGERED_AT = "triggered_at"
|
||||
FINISHED_AT = "finished_at"
|
||||
STATUS = "status"
|
||||
|
||||
|
||||
class WorkflowTriggerLogRepository(Protocol):
|
||||
"""
|
||||
Protocol for operations on WorkflowTriggerLog.
|
||||
|
||||
This repository provides efficient access patterns for the trigger log table,
|
||||
which is expected to grow large over time. It includes:
|
||||
- Batch operations for cleanup
|
||||
- Efficient queries with proper indexing
|
||||
- Pagination support
|
||||
- Status-based filtering
|
||||
|
||||
Implementation notes:
|
||||
- Leverage database indexes on (tenant_id, app_id), status, and created_at
|
||||
- Use batch operations for deletions to avoid locking
|
||||
- Support pagination for large result sets
|
||||
"""
|
||||
|
||||
def create(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
|
||||
"""
|
||||
Create a new trigger log entry.
|
||||
|
||||
Args:
|
||||
trigger_log: The WorkflowTriggerLog instance to create
|
||||
|
||||
Returns:
|
||||
The created WorkflowTriggerLog with generated ID
|
||||
"""
|
||||
...
|
||||
|
||||
def update(self, trigger_log: WorkflowTriggerLog) -> WorkflowTriggerLog:
|
||||
"""
|
||||
Update an existing trigger log entry.
|
||||
|
||||
Args:
|
||||
trigger_log: The WorkflowTriggerLog instance to update
|
||||
|
||||
Returns:
|
||||
The updated WorkflowTriggerLog
|
||||
"""
|
||||
...
|
||||
|
||||
def get_by_id(self, trigger_log_id: str, tenant_id: str | None = None) -> WorkflowTriggerLog | None:
|
||||
"""
|
||||
Get a trigger log by its ID.
|
||||
|
||||
Args:
|
||||
trigger_log_id: The trigger log identifier
|
||||
tenant_id: Optional tenant identifier for additional security
|
||||
|
||||
Returns:
|
||||
The WorkflowTriggerLog if found, None otherwise
|
||||
"""
|
||||
...
|
||||
|
||||
def get_failed_for_retry(
|
||||
self, tenant_id: str, max_retry_count: int = 3, limit: int = 100
|
||||
) -> Sequence[WorkflowTriggerLog]:
|
||||
"""
|
||||
Get failed trigger logs that are eligible for retry.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
max_retry_count: Maximum retry count to consider
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
A sequence of WorkflowTriggerLog instances eligible for retry
|
||||
"""
|
||||
...
|
||||
|
||||
def get_recent_logs(
|
||||
self, tenant_id: str, app_id: str, hours: int = 24, limit: int = 100, offset: int = 0
|
||||
) -> Sequence[WorkflowTriggerLog]:
|
||||
"""
|
||||
Get recent trigger logs within specified hours.
|
||||
|
||||
Args:
|
||||
tenant_id: The tenant identifier
|
||||
app_id: The application identifier
|
||||
hours: Number of hours to look back
|
||||
limit: Maximum number of results
|
||||
offset: Number of results to skip
|
||||
|
||||
Returns:
|
||||
A sequence of recent WorkflowTriggerLog instances
|
||||
"""
|
||||
...
|
||||
Reference in New Issue
Block a user