84 lines
3.5 KiB
Python
84 lines
3.5 KiB
Python
|
|
import logging
|
||
|
|
from collections.abc import Callable, Sequence
|
||
|
|
from dataclasses import asdict
|
||
|
|
from functools import cached_property
|
||
|
|
|
||
|
|
from core.entities.document_task import DocumentTask
|
||
|
|
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||
|
|
from enums.cloud_plan import CloudPlan
|
||
|
|
from services.feature_service import FeatureService
|
||
|
|
from tasks.document_indexing_task import normal_document_indexing_task, priority_document_indexing_task
|
||
|
|
|
||
|
|
logger = logging.getLogger(__name__)
|
||
|
|
|
||
|
|
|
||
|
|
class DocumentIndexingTaskProxy:
|
||
|
|
def __init__(self, tenant_id: str, dataset_id: str, document_ids: Sequence[str]):
|
||
|
|
self._tenant_id = tenant_id
|
||
|
|
self._dataset_id = dataset_id
|
||
|
|
self._document_ids = document_ids
|
||
|
|
self._tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing")
|
||
|
|
|
||
|
|
@cached_property
|
||
|
|
def features(self):
|
||
|
|
return FeatureService.get_features(self._tenant_id)
|
||
|
|
|
||
|
|
def _send_to_direct_queue(self, task_func: Callable[[str, str, Sequence[str]], None]):
|
||
|
|
logger.info("send dataset %s to direct queue", self._dataset_id)
|
||
|
|
task_func.delay( # type: ignore
|
||
|
|
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
||
|
|
)
|
||
|
|
|
||
|
|
def _send_to_tenant_queue(self, task_func: Callable[[str, str, Sequence[str]], None]):
|
||
|
|
logger.info("send dataset %s to tenant queue", self._dataset_id)
|
||
|
|
if self._tenant_isolated_task_queue.get_task_key():
|
||
|
|
# Add to waiting queue using List operations (lpush)
|
||
|
|
self._tenant_isolated_task_queue.push_tasks(
|
||
|
|
[
|
||
|
|
asdict(
|
||
|
|
DocumentTask(
|
||
|
|
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
||
|
|
)
|
||
|
|
)
|
||
|
|
]
|
||
|
|
)
|
||
|
|
logger.info("push tasks: %s - %s", self._dataset_id, self._document_ids)
|
||
|
|
else:
|
||
|
|
# Set flag and execute task
|
||
|
|
self._tenant_isolated_task_queue.set_task_waiting_time()
|
||
|
|
task_func.delay( # type: ignore
|
||
|
|
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
||
|
|
)
|
||
|
|
logger.info("init tasks: %s - %s", self._dataset_id, self._document_ids)
|
||
|
|
|
||
|
|
def _send_to_default_tenant_queue(self):
|
||
|
|
self._send_to_tenant_queue(normal_document_indexing_task)
|
||
|
|
|
||
|
|
def _send_to_priority_tenant_queue(self):
|
||
|
|
self._send_to_tenant_queue(priority_document_indexing_task)
|
||
|
|
|
||
|
|
def _send_to_priority_direct_queue(self):
|
||
|
|
self._send_to_direct_queue(priority_document_indexing_task)
|
||
|
|
|
||
|
|
def _dispatch(self):
|
||
|
|
logger.info(
|
||
|
|
"dispatch args: %s - %s - %s",
|
||
|
|
self._tenant_id,
|
||
|
|
self.features.billing.enabled,
|
||
|
|
self.features.billing.subscription.plan,
|
||
|
|
)
|
||
|
|
# dispatch to different indexing queue with tenant isolation when billing enabled
|
||
|
|
if self.features.billing.enabled:
|
||
|
|
if self.features.billing.subscription.plan == CloudPlan.SANDBOX:
|
||
|
|
# dispatch to normal pipeline queue with tenant self sub queue for sandbox plan
|
||
|
|
self._send_to_default_tenant_queue()
|
||
|
|
else:
|
||
|
|
# dispatch to priority pipeline queue with tenant self sub queue for other plans
|
||
|
|
self._send_to_priority_tenant_queue()
|
||
|
|
else:
|
||
|
|
# dispatch to priority queue without tenant isolation for others, e.g.: self-hosted or enterprise
|
||
|
|
self._send_to_priority_direct_queue()
|
||
|
|
|
||
|
|
def delay(self):
|
||
|
|
self._dispatch()
|