This commit is contained in:
2025-12-01 17:21:38 +08:00
parent 32fee2b8ab
commit fab8c13cb3
7511 changed files with 996300 additions and 0 deletions

View File

@@ -0,0 +1,793 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, DatasetAutoDisableLog, Document, DocumentSegment
from tasks.add_document_to_index_task import add_document_to_index_task
class TestAddDocumentToIndexTask:
"""Integration tests for add_document_to_index_task using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.add_document_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
):
# Setup mock index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
yield {
"index_processor_factory": mock_index_processor_factory,
"index_processor": mock_processor,
}
def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
"""
Helper method to create a test dataset and document for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
mock_external_service_dependencies: Mock dependencies
Returns:
tuple: (dataset, document) - Created dataset and document instances
"""
fake = Faker()
# Create account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Create dataset
dataset = Dataset(
id=fake.uuid4(),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db.session.add(dataset)
db.session.commit()
# Create document
document = Document(
id=fake.uuid4(),
tenant_id=tenant.id,
dataset_id=dataset.id,
position=1,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="completed",
enabled=True,
doc_form=IndexType.PARAGRAPH_INDEX,
)
db.session.add(document)
db.session.commit()
# Refresh dataset to ensure doc_form property works correctly
db.session.refresh(dataset)
return dataset, document
def _create_test_segments(self, db_session_with_containers, document, dataset):
"""
Helper method to create test document segments.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
document: Document instance
dataset: Dataset instance
Returns:
list: List of created DocumentSegment instances
"""
fake = Faker()
segments = []
for i in range(3):
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=i,
content=fake.text(max_nb_chars=200),
word_count=len(fake.text(max_nb_chars=200).split()),
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id=f"node_{i}",
index_node_hash=f"hash_{i}",
enabled=False,
status="completed",
created_by=document.created_by,
)
db.session.add(segment)
segments.append(segment)
db.session.commit()
return segments
def test_add_document_to_index_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful document indexing with paragraph index type.
This test verifies:
- Proper document retrieval from database
- Correct segment processing and document creation
- Index processor integration
- Database state updates
- Segment status changes
- Redis cache key deletion
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache key to simulate indexing in progress
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300) # 5 minutes expiry
# Verify cache key exists
assert redis_client.exists(indexing_cache_key) == 1
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify the expected outcomes
# Verify index processor was called correctly
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify database state changes
db.session.refresh(document)
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is True
assert segment.disabled_at is None
assert segment.disabled_by is None
# Verify Redis cache key was deleted
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_with_different_index_type(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test document indexing with different index types.
This test verifies:
- Proper handling of different index types
- Index processor factory integration
- Document processing with various configurations
- Redis cache key deletion
"""
# Arrange: Create test data with different index type
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Update document to use different index type
document.doc_form = IndexType.QA_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
db.session.refresh(dataset)
# Create segments
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify different index type handling
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3
# Verify database state changes
db.session.refresh(document)
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is True
assert segment.disabled_at is None
assert segment.disabled_by is None
# Verify Redis cache key was deleted
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_document_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of non-existent document.
This test verifies:
- Proper error handling for missing documents
- Early return without processing
- Database session cleanup
- No unnecessary index processor calls
- Redis cache key not affected (since it was never created)
"""
# Arrange: Use non-existent document ID
fake = Faker()
non_existent_id = str(fake.uuid4())
# Act: Execute the task with non-existent document
add_document_to_index_task(non_existent_id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
# Note: redis_client.delete is not called when document is not found
# because indexing_cache_key is not defined in that case
def test_add_document_to_index_invalid_indexing_status(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of document with invalid indexing status.
This test verifies:
- Early return when indexing_status is not "completed"
- No index processing for documents not ready for indexing
- Proper database session cleanup
- No unnecessary external service calls
- Redis cache key not affected
"""
# Arrange: Create test data with invalid indexing status
_, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Set invalid indexing status
document.indexing_status = "processing"
db.session.commit()
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_add_document_to_index_dataset_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling when document's dataset doesn't exist.
This test verifies:
- Proper error handling when dataset is missing
- Document status is set to error
- Document is disabled
- Error information is recorded
- Redis cache is cleared despite error
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Delete the dataset to simulate dataset not found scenario
db.session.delete(dataset)
db.session.commit()
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify error handling
db.session.refresh(document)
assert document.enabled is False
assert document.indexing_status == "error"
assert document.error is not None
assert "doesn't exist" in document.error
assert document.disabled_at is not None
# Verify no index processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
# Verify redis cache was cleared despite error
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_with_parent_child_structure(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test document indexing with parent-child structure.
This test verifies:
- Proper handling of PARENT_CHILD_INDEX type
- Child document creation from segments
- Correct document structure for parent-child indexing
- Index processor receives properly structured documents
- Redis cache key deletion
"""
# Arrange: Create test data with parent-child index type
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Update document to use parent-child index type
document.doc_form = IndexType.PARENT_CHILD_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
db.session.refresh(dataset)
# Create segments with mock child chunks
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Mock the get_child_chunks method for each segment
with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
# Setup mock to return child chunks for each segment
mock_child_chunks = []
for i in range(2): # Each segment has 2 child chunks
mock_child = MagicMock()
mock_child.content = f"child_content_{i}"
mock_child.index_node_id = f"child_node_{i}"
mock_child.index_node_hash = f"child_hash_{i}"
mock_child_chunks.append(mock_child)
mock_get_child_chunks.return_value = mock_child_chunks
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify parent-child index processing
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexType.PARENT_CHILD_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3 # 3 segments
# Verify each document has children
for doc in documents:
assert hasattr(doc, "children")
assert len(doc.children) == 2 # Each document has 2 children
# Verify database state changes
db.session.refresh(document)
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is True
assert segment.disabled_at is None
assert segment.disabled_by is None
# Verify redis cache was cleared
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_with_already_enabled_segments(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test document indexing when segments are already enabled.
This test verifies:
- Segments with status="completed" are processed regardless of enabled status
- Index processing occurs with all completed segments
- Auto disable log deletion still occurs
- Redis cache is cleared
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Create segments that are already enabled
fake = Faker()
segments = []
for i in range(3):
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=i,
content=fake.text(max_nb_chars=200),
word_count=len(fake.text(max_nb_chars=200).split()),
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id=f"node_{i}",
index_node_hash=f"hash_{i}",
enabled=True, # Already enabled
status="completed",
created_by=document.created_by,
)
db.session.add(segment)
segments.append(segment)
db.session.commit()
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify index processing occurred with all completed segments
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with all completed segments
# (implementation doesn't filter by enabled status, only by status="completed")
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3 # All completed segments are processed
# Verify redis cache was cleared
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_auto_disable_log_deletion(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test that auto disable logs are properly deleted during indexing.
This test verifies:
- Auto disable log entries are deleted for the document
- Database state is properly managed
- Index processing continues normally
- Redis cache key deletion
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Create some auto disable log entries
fake = Faker()
auto_disable_logs = []
for _ in range(2):
log_entry = DatasetAutoDisableLog(
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
)
log_entry.id = str(fake.uuid4())
db.session.add(log_entry)
auto_disable_logs.append(log_entry)
db.session.commit()
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Verify logs exist before processing
existing_logs = (
db.session.query(DatasetAutoDisableLog).where(DatasetAutoDisableLog.document_id == document.id).all()
)
assert len(existing_logs) == 2
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify auto disable logs were deleted
remaining_logs = (
db.session.query(DatasetAutoDisableLog).where(DatasetAutoDisableLog.document_id == document.id).all()
)
assert len(remaining_logs) == 0
# Verify index processing occurred normally
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify segments were enabled
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is True
# Verify redis cache was cleared
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_general_exception_handling(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test general exception handling during indexing process.
This test verifies:
- Exceptions are properly caught and handled
- Document status is set to error
- Document is disabled
- Error information is recorded
- Redis cache is still cleared
- Database session is properly closed
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Mock the index processor to raise an exception
mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify error handling
db.session.refresh(document)
assert document.enabled is False
assert document.indexing_status == "error"
assert document.error is not None
assert "Index processing failed" in document.error
assert document.disabled_at is not None
# Verify segments were not enabled due to error
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is False # Should remain disabled due to error
# Verify redis cache was still cleared despite error
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_segment_filtering_edge_cases(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test segment filtering with various edge cases.
This test verifies:
- Only segments with status="completed" are processed (regardless of enabled status)
- Segments with status!="completed" are NOT processed
- Segments are ordered by position correctly
- Mixed segment states are handled properly
- All segments are updated to enabled=True after processing
- Redis cache key deletion
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Create segments with mixed states
fake = Faker()
segments = []
# Segment 1: Should be processed (enabled=False, status="completed")
segment1 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=0,
content=fake.text(max_nb_chars=200),
word_count=len(fake.text(max_nb_chars=200).split()),
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id="node_0",
index_node_hash="hash_0",
enabled=False,
status="completed",
created_by=document.created_by,
)
db.session.add(segment1)
segments.append(segment1)
# Segment 2: Should be processed (enabled=True, status="completed")
# Note: Implementation doesn't filter by enabled status, only by status="completed"
segment2 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=1,
content=fake.text(max_nb_chars=200),
word_count=len(fake.text(max_nb_chars=200).split()),
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id="node_1",
index_node_hash="hash_1",
enabled=True, # Already enabled, but will still be processed
status="completed",
created_by=document.created_by,
)
db.session.add(segment2)
segments.append(segment2)
# Segment 3: Should NOT be processed (enabled=False, status="processing")
segment3 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=2,
content=fake.text(max_nb_chars=200),
word_count=len(fake.text(max_nb_chars=200).split()),
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id="node_2",
index_node_hash="hash_2",
enabled=False,
status="processing", # Not completed
created_by=document.created_by,
)
db.session.add(segment3)
segments.append(segment3)
# Segment 4: Should be processed (enabled=False, status="completed")
segment4 = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=3,
content=fake.text(max_nb_chars=200),
word_count=len(fake.text(max_nb_chars=200).split()),
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
index_node_id="node_3",
index_node_hash="hash_3",
enabled=False,
status="completed",
created_by=document.created_by,
)
db.session.add(segment4)
segments.append(segment4)
db.session.commit()
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify only eligible segments were processed
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3 # 3 segments with status="completed" should be processed
# Verify correct segments were processed (by position order)
# Segments 1, 2, 4 should be processed (positions 0, 1, 3)
# Segment 3 is skipped (position 2, status="processing")
assert documents[0].metadata["doc_id"] == "node_0" # segment1, position 0
assert documents[1].metadata["doc_id"] == "node_1" # segment2, position 1
assert documents[2].metadata["doc_id"] == "node_3" # segment4, position 3
# Verify database state changes
db.session.refresh(document)
db.session.refresh(segment1)
db.session.refresh(segment2)
db.session.refresh(segment3)
db.session.refresh(segment4)
# All segments should be enabled because the task updates ALL segments for the document
assert segment1.enabled is True
assert segment2.enabled is True # Was already enabled, stays True
assert segment3.enabled is True # Was not processed but still updated to True
assert segment4.enabled is True
# Verify redis cache was cleared
assert redis_client.exists(indexing_cache_key) == 0
def test_add_document_to_index_comprehensive_error_scenarios(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test comprehensive error scenarios and recovery.
This test verifies:
- Multiple types of exceptions are handled properly
- Error state is consistently managed
- Resource cleanup occurs in all error cases
- Database session management is robust
- Redis cache key deletion in all scenarios
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Test different exception types
test_exceptions = [
("Database connection error", Exception("Database connection failed")),
("Index processor error", RuntimeError("Index processor initialization failed")),
("Memory error", MemoryError("Out of memory")),
("Value error", ValueError("Invalid index type")),
]
for error_name, exception in test_exceptions:
# Reset mocks for each test
mock_external_service_dependencies["index_processor"].load.side_effect = exception
# Reset document state
document.enabled = True
document.indexing_status = "completed"
document.error = None
document.disabled_at = None
db.session.commit()
# Set up Redis cache key
indexing_cache_key = f"document_{document.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Act: Execute the task
add_document_to_index_task(document.id)
# Assert: Verify consistent error handling
db.session.refresh(document)
assert document.enabled is False, f"Document should be disabled for {error_name}"
assert document.indexing_status == "error", f"Document status should be error for {error_name}"
assert document.error is not None, f"Error should be recorded for {error_name}"
assert str(exception) in document.error, f"Error message should contain exception for {error_name}"
assert document.disabled_at is not None, f"Disabled timestamp should be set for {error_name}"
# Verify segments remain disabled due to error
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is False, f"Segments should remain disabled for {error_name}"
# Verify redis cache was still cleared despite error
assert redis_client.exists(indexing_cache_key) == 0, f"Redis cache should be cleared for {error_name}"

View File

@@ -0,0 +1,720 @@
"""
Integration tests for batch_clean_document_task using testcontainers.
This module tests the batch document cleaning functionality with real database
and storage containers to ensure proper cleanup of documents, segments, and files.
"""
import json
import uuid
from unittest.mock import Mock, patch
import pytest
from faker import Faker
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment
from models.model import UploadFile
from tasks.batch_clean_document_task import batch_clean_document_task
class TestBatchCleanDocumentTask:
"""Integration tests for batch_clean_document_task using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("extensions.ext_storage.storage") as mock_storage,
patch("core.rag.index_processor.index_processor_factory.IndexProcessorFactory") as mock_index_factory,
patch("core.tools.utils.web_reader_tool.get_image_upload_file_ids") as mock_get_image_ids,
):
# Setup default mock returns
mock_storage.delete.return_value = None
# Mock index processor
mock_index_processor = Mock()
mock_index_processor.clean.return_value = None
mock_index_factory.return_value.init_index_processor.return_value = mock_index_processor
# Mock image file ID extraction
mock_get_image_ids.return_value = []
yield {
"storage": mock_storage,
"index_factory": mock_index_factory,
"index_processor": mock_index_processor,
"get_image_ids": mock_get_image_ids,
}
def _create_test_account(self, db_session_with_containers):
"""
Helper method to create a test account for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
Account: Created account instance
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
# Create tenant for the account
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Set current tenant for account
account.current_tenant = tenant
return account
def _create_test_dataset(self, db_session_with_containers, account):
"""
Helper method to create a test dataset for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
account: Account instance
Returns:
Dataset: Created dataset instance
"""
fake = Faker()
dataset = Dataset(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
name=fake.word(),
description=fake.sentence(),
data_source_type="upload_file",
created_by=account.id,
embedding_model="text-embedding-ada-002",
embedding_model_provider="openai",
)
db.session.add(dataset)
db.session.commit()
return dataset
def _create_test_document(self, db_session_with_containers, dataset, account):
"""
Helper method to create a test document for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
dataset: Dataset instance
account: Account instance
Returns:
Document: Created document instance
"""
fake = Faker()
document = Document(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=dataset.id,
position=0,
name=fake.word(),
data_source_type="upload_file",
data_source_info=json.dumps({"upload_file_id": str(uuid.uuid4())}),
batch="test_batch",
created_from="test",
created_by=account.id,
indexing_status="completed",
doc_form="text_model",
)
db.session.add(document)
db.session.commit()
return document
def _create_test_document_segment(self, db_session_with_containers, document, account):
"""
Helper method to create a test document segment for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
document: Document instance
account: Account instance
Returns:
DocumentSegment: Created document segment instance
"""
fake = Faker()
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=document.dataset_id,
document_id=document.id,
position=0,
content=fake.text(),
word_count=100,
tokens=50,
index_node_id=str(uuid.uuid4()),
created_by=account.id,
status="completed",
)
db.session.add(segment)
db.session.commit()
return segment
def _create_test_upload_file(self, db_session_with_containers, account):
"""
Helper method to create a test upload file for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
account: Account instance
Returns:
UploadFile: Created upload file instance
"""
fake = Faker()
from models.enums import CreatorUserRole
upload_file = UploadFile(
tenant_id=account.current_tenant.id,
storage_type="local",
key=f"test_files/{fake.file_name()}",
name=fake.file_name(),
size=1024,
extension="txt",
mime_type="text/plain",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=account.id,
created_at=naive_utc_now(),
used=False,
)
db.session.add(upload_file)
db.session.commit()
return upload_file
def test_batch_clean_document_task_successful_cleanup(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful cleanup of documents with segments and files.
This test verifies that the task properly cleans up:
- Document segments from the index
- Associated image files from storage
- Upload files from storage and database
"""
# Create test data
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
document = self._create_test_document(db_session_with_containers, dataset, account)
segment = self._create_test_document_segment(db_session_with_containers, document, account)
upload_file = self._create_test_upload_file(db_session_with_containers, account)
# Update document to reference the upload file
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
db.session.commit()
# Store original IDs for verification
document_id = document.id
segment_id = segment.id
file_id = upload_file.id
# Execute the task
batch_clean_document_task(
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
)
# Verify that the task completed successfully
# The task should have processed the segment and cleaned up the database
# Verify database cleanup
db.session.commit() # Ensure all changes are committed
# Check that segment is deleted
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
# Check that upload file is deleted
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
def test_batch_clean_document_task_with_image_files(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup of documents containing image references.
This test verifies that the task properly handles documents with
image content and cleans up associated segments.
"""
# Create test data
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
document = self._create_test_document(db_session_with_containers, dataset, account)
# Create segment with simple content (no image references)
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=document.dataset_id,
document_id=document.id,
position=0,
content="Simple text content without images",
word_count=100,
tokens=50,
index_node_id=str(uuid.uuid4()),
created_by=account.id,
status="completed",
)
db.session.add(segment)
db.session.commit()
# Store original IDs for verification
segment_id = segment.id
document_id = document.id
# Execute the task
batch_clean_document_task(
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[]
)
# Verify database cleanup
db.session.commit()
# Check that segment is deleted
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
# Verify that the task completed successfully by checking the log output
# The task should have processed the segment and cleaned up the database
def test_batch_clean_document_task_no_segments(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup when document has no segments.
This test verifies that the task handles documents without segments
gracefully and still cleans up associated files.
"""
# Create test data without segments
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
document = self._create_test_document(db_session_with_containers, dataset, account)
upload_file = self._create_test_upload_file(db_session_with_containers, account)
# Update document to reference the upload file
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
db.session.commit()
# Store original IDs for verification
document_id = document.id
file_id = upload_file.id
# Execute the task
batch_clean_document_task(
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
)
# Verify that the task completed successfully
# Since there are no segments, the task should handle this gracefully
# Verify database cleanup
db.session.commit()
# Check that upload file is deleted
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
# Verify database cleanup
db.session.commit()
# Check that upload file is deleted
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
def test_batch_clean_document_task_dataset_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup when dataset is not found.
This test verifies that the task properly handles the case where
the specified dataset does not exist in the database.
"""
# Create test data
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
document = self._create_test_document(db_session_with_containers, dataset, account)
# Store original IDs for verification
document_id = document.id
dataset_id = dataset.id
# Delete the dataset to simulate not found scenario
db.session.delete(dataset)
db.session.commit()
# Execute the task with non-existent dataset
batch_clean_document_task(document_ids=[document_id], dataset_id=dataset_id, doc_form="text_model", file_ids=[])
# Verify that no index processing occurred
mock_external_service_dependencies["index_processor"].clean.assert_not_called()
# Verify that no storage operations occurred
mock_external_service_dependencies["storage"].delete.assert_not_called()
# Verify that no database cleanup occurred
db.session.commit()
# Document should still exist since cleanup failed
existing_document = db.session.query(Document).filter_by(id=document_id).first()
assert existing_document is not None
def test_batch_clean_document_task_storage_cleanup_failure(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup when storage operations fail.
This test verifies that the task continues processing even when
storage cleanup operations fail, ensuring database cleanup still occurs.
"""
# Create test data
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
document = self._create_test_document(db_session_with_containers, dataset, account)
segment = self._create_test_document_segment(db_session_with_containers, document, account)
upload_file = self._create_test_upload_file(db_session_with_containers, account)
# Update document to reference the upload file
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
db.session.commit()
# Store original IDs for verification
document_id = document.id
segment_id = segment.id
file_id = upload_file.id
# Mock storage.delete to raise an exception
mock_external_service_dependencies["storage"].delete.side_effect = Exception("Storage error")
# Execute the task
batch_clean_document_task(
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
)
# Verify that the task completed successfully despite storage failure
# The task should continue processing even when storage operations fail
# Verify database cleanup still occurred despite storage failure
db.session.commit()
# Check that segment is deleted from database
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
# Check that upload file is deleted from database
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
def test_batch_clean_document_task_multiple_documents(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup of multiple documents in a single batch operation.
This test verifies that the task can handle multiple documents
efficiently and cleans up all associated resources.
"""
# Create test data for multiple documents
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
documents = []
segments = []
upload_files = []
# Create 3 documents with segments and files
for i in range(3):
document = self._create_test_document(db_session_with_containers, dataset, account)
segment = self._create_test_document_segment(db_session_with_containers, document, account)
upload_file = self._create_test_upload_file(db_session_with_containers, account)
# Update document to reference the upload file
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
documents.append(document)
segments.append(segment)
upload_files.append(upload_file)
db.session.commit()
# Store original IDs for verification
document_ids = [doc.id for doc in documents]
segment_ids = [seg.id for seg in segments]
file_ids = [file.id for file in upload_files]
# Execute the task with multiple documents
batch_clean_document_task(
document_ids=document_ids, dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=file_ids
)
# Verify that the task completed successfully for all documents
# The task should process all documents and clean up all associated resources
# Verify database cleanup for all resources
db.session.commit()
# Check that all segments are deleted
for segment_id in segment_ids:
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
# Check that all upload files are deleted
for file_id in file_ids:
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
def test_batch_clean_document_task_different_doc_forms(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup with different document form types.
This test verifies that the task properly handles different
document form types and creates the appropriate index processor.
"""
# Create test data
account = self._create_test_account(db_session_with_containers)
# Test different doc_form types
doc_forms = ["text_model", "qa_model", "hierarchical_model"]
for doc_form in doc_forms:
dataset = self._create_test_dataset(db_session_with_containers, account)
db.session.commit()
document = self._create_test_document(db_session_with_containers, dataset, account)
# Update document doc_form
document.doc_form = doc_form
db.session.commit()
segment = self._create_test_document_segment(db_session_with_containers, document, account)
# Store the ID before the object is deleted
segment_id = segment.id
try:
# Execute the task
batch_clean_document_task(
document_ids=[document.id], dataset_id=dataset.id, doc_form=doc_form, file_ids=[]
)
# Verify that the task completed successfully for this doc_form
# The task should handle different document forms correctly
# Verify database cleanup
db.session.commit()
# Check that segment is deleted
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
except Exception as e:
# If the task fails due to external service issues (e.g., plugin daemon),
# we should still verify that the database state is consistent
# This is a common scenario in test environments where external services may not be available
db.session.commit()
# Check if the segment still exists (task may have failed before deletion)
existing_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
if existing_segment is not None:
# If segment still exists, the task failed before deletion
# This is acceptable in test environments with external service issues
pass
else:
# If segment was deleted, the task succeeded
pass
def test_batch_clean_document_task_large_batch_performance(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test cleanup performance with a large batch of documents.
This test verifies that the task can handle large batches efficiently
and maintains performance characteristics.
"""
import time
# Create test data for large batch
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
documents = []
segments = []
upload_files = []
# Create 10 documents with segments and files (larger batch)
batch_size = 10
for i in range(batch_size):
document = self._create_test_document(db_session_with_containers, dataset, account)
segment = self._create_test_document_segment(db_session_with_containers, document, account)
upload_file = self._create_test_upload_file(db_session_with_containers, account)
# Update document to reference the upload file
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
documents.append(document)
segments.append(segment)
upload_files.append(upload_file)
db.session.commit()
# Store original IDs for verification
document_ids = [doc.id for doc in documents]
segment_ids = [seg.id for seg in segments]
file_ids = [file.id for file in upload_files]
# Measure execution time
start_time = time.perf_counter()
# Execute the task with large batch
batch_clean_document_task(
document_ids=document_ids, dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=file_ids
)
end_time = time.perf_counter()
execution_time = end_time - start_time
# Verify performance characteristics (should complete within reasonable time)
assert execution_time < 5.0 # Should complete within 5 seconds
# Verify that the task completed successfully for the large batch
# The task should handle large batches efficiently
# Verify database cleanup for all resources
db.session.commit()
# Check that all segments are deleted
for segment_id in segment_ids:
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
# Check that all upload files are deleted
for file_id in file_ids:
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
def test_batch_clean_document_task_integration_with_real_database(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test full integration with real database operations.
This test verifies that the task integrates properly with the
actual database and maintains data consistency throughout the process.
"""
# Create test data
account = self._create_test_account(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account)
# Create document with complex structure
document = self._create_test_document(db_session_with_containers, dataset, account)
# Create multiple segments for the document
segments = []
for i in range(3):
segment = DocumentSegment(
id=str(uuid.uuid4()),
tenant_id=account.current_tenant.id,
dataset_id=document.dataset_id,
document_id=document.id,
position=i,
content=f"Segment content {i} with some text",
word_count=50 + i * 10,
tokens=25 + i * 5,
index_node_id=str(uuid.uuid4()),
created_by=account.id,
status="completed",
)
segments.append(segment)
# Create upload file
upload_file = self._create_test_upload_file(db_session_with_containers, account)
# Update document to reference the upload file
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
# Add all to database
for segment in segments:
db.session.add(segment)
db.session.commit()
# Verify initial state
assert db.session.query(DocumentSegment).filter_by(document_id=document.id).count() == 3
assert db.session.query(UploadFile).filter_by(id=upload_file.id).first() is not None
# Store original IDs for verification
document_id = document.id
segment_ids = [seg.id for seg in segments]
file_id = upload_file.id
# Execute the task
batch_clean_document_task(
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
)
# Verify that the task completed successfully
# The task should process all segments and clean up all associated resources
# Verify database cleanup
db.session.commit()
# Check that all segments are deleted
for segment_id in segment_ids:
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
assert deleted_segment is None
# Check that upload file is deleted
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
assert deleted_file is None
# Verify final database state
assert db.session.query(DocumentSegment).filter_by(document_id=document_id).count() == 0
assert db.session.query(UploadFile).filter_by(id=file_id).first() is None

View File

@@ -0,0 +1,737 @@
"""
Integration tests for batch_create_segment_to_index_task using testcontainers.
This module provides comprehensive integration tests for the batch segment creation
and indexing task using TestContainers infrastructure. The tests ensure that the
task properly processes CSV files, creates document segments, and establishes
vector indexes in a real database environment.
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing scenarios with actual PostgreSQL and Redis instances.
"""
import uuid
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import CreatorUserRole
from models.model import UploadFile
from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
class TestBatchCreateSegmentToIndexTask:
"""Integration tests for batch_create_segment_to_index_task using testcontainers."""
@pytest.fixture(autouse=True)
def cleanup_database(self, db_session_with_containers):
"""Clean up database before each test to ensure isolation."""
from extensions.ext_database import db
from extensions.ext_redis import redis_client
# Clear all test data
db.session.query(DocumentSegment).delete()
db.session.query(Document).delete()
db.session.query(Dataset).delete()
db.session.query(UploadFile).delete()
db.session.query(TenantAccountJoin).delete()
db.session.query(Tenant).delete()
db.session.query(Account).delete()
db.session.commit()
# Clear Redis cache
redis_client.flushdb()
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.batch_create_segment_to_index_task.storage") as mock_storage,
patch("tasks.batch_create_segment_to_index_task.ModelManager") as mock_model_manager,
patch("tasks.batch_create_segment_to_index_task.VectorService") as mock_vector_service,
):
# Setup default mock returns
mock_storage.download.return_value = None
# Mock embedding model for high quality indexing
mock_embedding_model = MagicMock()
mock_embedding_model.get_text_embedding_num_tokens.return_value = [10, 15, 20]
mock_model_manager_instance = MagicMock()
mock_model_manager_instance.get_model_instance.return_value = mock_embedding_model
mock_model_manager.return_value = mock_model_manager_instance
# Mock vector service
mock_vector_service.create_segments_vector.return_value = None
yield {
"storage": mock_storage,
"model_manager": mock_model_manager,
"vector_service": mock_vector_service,
"embedding_model": mock_embedding_model,
}
def _create_test_account_and_tenant(self, db_session_with_containers):
"""
Helper method to create a test account and tenant for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
tuple: (Account, Tenant) created instances
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
from extensions.ext_database import db
db.session.add(account)
db.session.commit()
# Create tenant for the account
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Set current tenant for account
account.current_tenant = tenant
return account, tenant
def _create_test_dataset(self, db_session_with_containers, account, tenant):
"""
Helper method to create a test dataset for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
account: Account instance
tenant: Tenant instance
Returns:
Dataset: Created dataset instance
"""
fake = Faker()
dataset = Dataset(
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(),
data_source_type="upload_file",
indexing_technique="high_quality",
embedding_model="text-embedding-ada-002",
embedding_model_provider="openai",
created_by=account.id,
)
from extensions.ext_database import db
db.session.add(dataset)
db.session.commit()
return dataset
def _create_test_document(self, db_session_with_containers, account, tenant, dataset):
"""
Helper method to create a test document for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
account: Account instance
tenant: Tenant instance
dataset: Dataset instance
Returns:
Document: Created document instance
"""
fake = Faker()
document = Document(
tenant_id=tenant.id,
dataset_id=dataset.id,
position=1,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="completed",
enabled=True,
archived=False,
doc_form="text_model",
word_count=0,
)
from extensions.ext_database import db
db.session.add(document)
db.session.commit()
return document
def _create_test_upload_file(self, db_session_with_containers, account, tenant):
"""
Helper method to create a test upload file for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
account: Account instance
tenant: Tenant instance
Returns:
UploadFile: Created upload file instance
"""
fake = Faker()
upload_file = UploadFile(
tenant_id=tenant.id,
storage_type="local",
key=f"test_files/{fake.file_name()}",
name=fake.file_name(),
size=1024,
extension=".csv",
mime_type="text/csv",
created_by_role=CreatorUserRole.ACCOUNT,
created_by=account.id,
created_at=datetime.now(),
used=False,
)
from extensions.ext_database import db
db.session.add(upload_file)
db.session.commit()
return upload_file
def _create_test_csv_content(self, content_type="text_model"):
"""
Helper method to create test CSV content.
Args:
content_type: Type of content to create ("text_model" or "qa_model")
Returns:
str: CSV content as string
"""
if content_type == "qa_model":
csv_content = "content,answer\n"
csv_content += "This is the first segment content,This is the first answer\n"
csv_content += "This is the second segment content,This is the second answer\n"
csv_content += "This is the third segment content,This is the third answer\n"
else:
csv_content = "content\n"
csv_content += "This is the first segment content\n"
csv_content += "This is the second segment content\n"
csv_content += "This is the third segment content\n"
return csv_content
def test_batch_create_segment_to_index_task_success_text_model(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful batch creation of segments for text model documents.
This test verifies that the task can successfully:
1. Process a CSV file with text content
2. Create document segments with proper metadata
3. Update document word count
4. Create vector indexes
5. Set Redis cache status
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Create CSV content
csv_content = self._create_test_csv_content("text_model")
# Mock storage to return our CSV content
mock_storage = mock_external_service_dependencies["storage"]
def mock_download(key, file_path):
Path(file_path).write_text(csv_content, encoding="utf-8")
mock_storage.download.side_effect = mock_download
# Execute the task
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=upload_file.id,
dataset_id=dataset.id,
document_id=document.id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify results
from extensions.ext_database import db
# Check that segments were created
segments = (
db.session.query(DocumentSegment)
.filter_by(document_id=document.id)
.order_by(DocumentSegment.position)
.all()
)
assert len(segments) == 3
# Verify segment content and metadata
for i, segment in enumerate(segments):
assert segment.tenant_id == tenant.id
assert segment.dataset_id == dataset.id
assert segment.document_id == document.id
assert segment.position == i + 1
assert segment.status == "completed"
assert segment.indexing_at is not None
assert segment.completed_at is not None
assert segment.answer is None # text_model doesn't have answers
# Check that document word count was updated
db.session.refresh(document)
assert document.word_count > 0
# Verify vector service was called
mock_vector_service = mock_external_service_dependencies["vector_service"]
mock_vector_service.create_segments_vector.assert_called_once()
# Check Redis cache was set
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"completed"
def test_batch_create_segment_to_index_task_dataset_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test task failure when dataset does not exist.
This test verifies that the task properly handles error cases:
1. Fails gracefully when dataset is not found
2. Sets appropriate Redis cache status
3. Logs error information
4. Maintains database integrity
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Use non-existent IDs
non_existent_dataset_id = str(uuid.uuid4())
non_existent_document_id = str(uuid.uuid4())
# Execute the task with non-existent dataset
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=upload_file.id,
dataset_id=non_existent_dataset_id,
document_id=non_existent_document_id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify error handling
# Check Redis cache was set to error status
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"error"
# Verify no segments were created (since dataset doesn't exist)
from extensions.ext_database import db
segments = db.session.query(DocumentSegment).all()
assert len(segments) == 0
# Verify no documents were modified
documents = db.session.query(Document).all()
assert len(documents) == 0
def test_batch_create_segment_to_index_task_document_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test task failure when document does not exist.
This test verifies that the task properly handles error cases:
1. Fails gracefully when document is not found
2. Sets appropriate Redis cache status
3. Maintains database integrity
4. Logs appropriate error information
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Use non-existent document ID
non_existent_document_id = str(uuid.uuid4())
# Execute the task with non-existent document
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=upload_file.id,
dataset_id=dataset.id,
document_id=non_existent_document_id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify error handling
# Check Redis cache was set to error status
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"error"
# Verify no segments were created
from extensions.ext_database import db
segments = db.session.query(DocumentSegment).all()
assert len(segments) == 0
# Verify dataset remains unchanged (no segments were added to the dataset)
db.session.refresh(dataset)
segments_for_dataset = db.session.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
assert len(segments_for_dataset) == 0
def test_batch_create_segment_to_index_task_document_not_available(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test task failure when document is not available for indexing.
This test verifies that the task properly handles error cases:
1. Fails when document is disabled
2. Fails when document is archived
3. Fails when document indexing status is not completed
4. Sets appropriate Redis cache status
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Create document with various unavailable states
test_cases = [
# Disabled document
Document(
tenant_id=tenant.id,
dataset_id=dataset.id,
position=1,
data_source_type="upload_file",
batch="test_batch",
name="disabled_document",
created_from="upload_file",
created_by=account.id,
indexing_status="completed",
enabled=False, # Document is disabled
archived=False,
doc_form="text_model",
word_count=0,
),
# Archived document
Document(
tenant_id=tenant.id,
dataset_id=dataset.id,
position=2,
data_source_type="upload_file",
batch="test_batch",
name="archived_document",
created_from="upload_file",
created_by=account.id,
indexing_status="completed",
enabled=True,
archived=True, # Document is archived
doc_form="text_model",
word_count=0,
),
# Document with incomplete indexing
Document(
tenant_id=tenant.id,
dataset_id=dataset.id,
position=3,
data_source_type="upload_file",
batch="test_batch",
name="incomplete_document",
created_from="upload_file",
created_by=account.id,
indexing_status="indexing", # Not completed
enabled=True,
archived=False,
doc_form="text_model",
word_count=0,
),
]
from extensions.ext_database import db
for document in test_cases:
db.session.add(document)
db.session.commit()
# Test each unavailable document
for document in test_cases:
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=upload_file.id,
dataset_id=dataset.id,
document_id=document.id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify error handling for each case
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"error"
# Verify no segments were created
segments = db.session.query(DocumentSegment).filter_by(document_id=document.id).all()
assert len(segments) == 0
def test_batch_create_segment_to_index_task_upload_file_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test task failure when upload file does not exist.
This test verifies that the task properly handles error cases:
1. Fails gracefully when upload file is not found
2. Sets appropriate Redis cache status
3. Maintains database integrity
4. Logs appropriate error information
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
# Use non-existent upload file ID
non_existent_upload_file_id = str(uuid.uuid4())
# Execute the task with non-existent upload file
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=non_existent_upload_file_id,
dataset_id=dataset.id,
document_id=document.id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify error handling
# Check Redis cache was set to error status
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"error"
# Verify no segments were created
from extensions.ext_database import db
segments = db.session.query(DocumentSegment).all()
assert len(segments) == 0
# Verify document remains unchanged
db.session.refresh(document)
assert document.word_count == 0
def test_batch_create_segment_to_index_task_empty_csv_file(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test task failure when CSV file is empty.
This test verifies that the task properly handles error cases:
1. Fails when CSV file contains no data
2. Sets appropriate Redis cache status
3. Maintains database integrity
4. Logs appropriate error information
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Create empty CSV content
empty_csv_content = "content\n" # Only header, no data rows
# Mock storage to return empty CSV content
mock_storage = mock_external_service_dependencies["storage"]
def mock_download(key, file_path):
Path(file_path).write_text(empty_csv_content, encoding="utf-8")
mock_storage.download.side_effect = mock_download
# Execute the task
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=upload_file.id,
dataset_id=dataset.id,
document_id=document.id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify error handling
# Check Redis cache was set to error status
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"error"
# Verify no segments were created
from extensions.ext_database import db
segments = db.session.query(DocumentSegment).all()
assert len(segments) == 0
# Verify document remains unchanged
db.session.refresh(document)
assert document.word_count == 0
def test_batch_create_segment_to_index_task_position_calculation(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test proper position calculation for segments when existing segments exist.
This test verifies that the task correctly:
1. Calculates positions for new segments based on existing ones
2. Handles position increment logic properly
3. Maintains proper segment ordering
4. Works with existing segment data
"""
# Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
# Create existing segments to test position calculation
existing_segments = []
for i in range(3):
segment = DocumentSegment(
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
position=i + 1,
content=f"Existing segment {i + 1}",
word_count=len(f"Existing segment {i + 1}"),
tokens=10,
created_by=account.id,
status="completed",
index_node_id=str(uuid.uuid4()),
index_node_hash=f"hash_{i}",
)
existing_segments.append(segment)
from extensions.ext_database import db
for segment in existing_segments:
db.session.add(segment)
db.session.commit()
# Create CSV content
csv_content = self._create_test_csv_content("text_model")
# Mock storage to return our CSV content
mock_storage = mock_external_service_dependencies["storage"]
def mock_download(key, file_path):
Path(file_path).write_text(csv_content, encoding="utf-8")
mock_storage.download.side_effect = mock_download
# Execute the task
job_id = str(uuid.uuid4())
batch_create_segment_to_index_task(
job_id=job_id,
upload_file_id=upload_file.id,
dataset_id=dataset.id,
document_id=document.id,
tenant_id=tenant.id,
user_id=account.id,
)
# Verify results
# Check that new segments were created with correct positions
all_segments = (
db.session.query(DocumentSegment)
.filter_by(document_id=document.id)
.order_by(DocumentSegment.position)
.all()
)
assert len(all_segments) == 6 # 3 existing + 3 new
# Verify position ordering
for i, segment in enumerate(all_segments):
assert segment.position == i + 1
# Verify new segments have correct positions (4, 5, 6)
new_segments = all_segments[3:]
for i, segment in enumerate(new_segments):
expected_position = 4 + i # Should start at position 4
assert segment.position == expected_position
assert segment.status == "completed"
assert segment.indexing_at is not None
assert segment.completed_at is not None
# Check that document word count was updated
db.session.refresh(document)
assert document.word_count > 0
# Verify vector service was called
mock_vector_service = mock_external_service_dependencies["vector_service"]
mock_vector_service.create_segments_vector.assert_called_once()
# Check Redis cache was set
from extensions.ext_redis import redis_client
cache_key = f"segment_batch_import_{job_id}"
cache_value = redis_client.get(cache_key)
assert cache_value == b"completed"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,578 @@
"""
TestContainers-based integration tests for delete_segment_from_index_task.
This module provides comprehensive integration testing for the delete_segment_from_index_task
using TestContainers to ensure realistic database interactions and proper isolation.
The task is responsible for removing document segments from the vector index when segments
are deleted from the dataset.
"""
import logging
from unittest.mock import MagicMock, patch
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from models import Account, Dataset, Document, DocumentSegment, Tenant
from tasks.delete_segment_from_index_task import delete_segment_from_index_task
logger = logging.getLogger(__name__)
class TestDeleteSegmentFromIndexTask:
"""
Comprehensive integration tests for delete_segment_from_index_task using testcontainers.
This test class covers all major functionality of the delete_segment_from_index_task:
- Successful segment deletion from index
- Dataset not found scenarios
- Document not found scenarios
- Document status validation (disabled, archived, not completed)
- Index processor integration and cleanup
- Exception handling and error scenarios
- Performance and timing verification
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing environment with actual database interactions.
"""
def _create_test_tenant(self, db_session_with_containers, fake=None):
"""
Helper method to create a test tenant with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
fake: Faker instance for generating test data
Returns:
Tenant: Created test tenant instance
"""
fake = fake or Faker()
tenant = Tenant(name=f"Test Tenant {fake.company()}", plan="basic", status="active")
tenant.id = fake.uuid4()
tenant.created_at = fake.date_time_this_year()
tenant.updated_at = tenant.created_at
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
return tenant
def _create_test_account(self, db_session_with_containers, tenant, fake=None):
"""
Helper method to create a test account with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
tenant: Tenant instance for the account
fake: Faker instance for generating test data
Returns:
Account: Created test account instance
"""
fake = fake or Faker()
account = Account(
name=fake.name(),
email=fake.email(),
avatar=fake.url(),
status="active",
interface_language="en-US",
)
account.id = fake.uuid4()
account.created_at = fake.date_time_this_year()
account.updated_at = account.created_at
db_session_with_containers.add(account)
db_session_with_containers.commit()
return account
def _create_test_dataset(self, db_session_with_containers, tenant, account, fake=None):
"""
Helper method to create a test dataset with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
tenant: Tenant instance for the dataset
account: Account instance for the dataset
fake: Faker instance for generating test data
Returns:
Dataset: Created test dataset instance
"""
fake = fake or Faker()
dataset = Dataset()
dataset.id = fake.uuid4()
dataset.tenant_id = tenant.id
dataset.name = f"Test Dataset {fake.word()}"
dataset.description = fake.text(max_nb_chars=200)
dataset.provider = "vendor"
dataset.permission = "only_me"
dataset.data_source_type = "upload_file"
dataset.indexing_technique = "high_quality"
dataset.index_struct = '{"type": "paragraph"}'
dataset.created_by = account.id
dataset.created_at = fake.date_time_this_year()
dataset.updated_by = account.id
dataset.updated_at = dataset.created_at
dataset.embedding_model = "text-embedding-ada-002"
dataset.embedding_model_provider = "openai"
dataset.built_in_field_enabled = False
db_session_with_containers.add(dataset)
db_session_with_containers.commit()
return dataset
def _create_test_document(self, db_session_with_containers, dataset, account, fake=None, **kwargs):
"""
Helper method to create a test document with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
dataset: Dataset instance for the document
account: Account instance for the document
fake: Faker instance for generating test data
**kwargs: Additional document attributes to override defaults
Returns:
Document: Created test document instance
"""
fake = fake or Faker()
document = Document()
document.id = fake.uuid4()
document.tenant_id = dataset.tenant_id
document.dataset_id = dataset.id
document.position = kwargs.get("position", 1)
document.data_source_type = kwargs.get("data_source_type", "upload_file")
document.data_source_info = kwargs.get("data_source_info", "{}")
document.batch = kwargs.get("batch", fake.uuid4())
document.name = kwargs.get("name", f"Test Document {fake.word()}")
document.created_from = kwargs.get("created_from", "api")
document.created_by = account.id
document.created_at = fake.date_time_this_year()
document.processing_started_at = kwargs.get("processing_started_at", fake.date_time_this_year())
document.file_id = kwargs.get("file_id", fake.uuid4())
document.word_count = kwargs.get("word_count", fake.random_int(min=100, max=1000))
document.parsing_completed_at = kwargs.get("parsing_completed_at", fake.date_time_this_year())
document.cleaning_completed_at = kwargs.get("cleaning_completed_at", fake.date_time_this_year())
document.splitting_completed_at = kwargs.get("splitting_completed_at", fake.date_time_this_year())
document.tokens = kwargs.get("tokens", fake.random_int(min=50, max=500))
document.indexing_latency = kwargs.get("indexing_latency", fake.random_number(digits=3))
document.completed_at = kwargs.get("completed_at", fake.date_time_this_year())
document.is_paused = kwargs.get("is_paused", False)
document.indexing_status = kwargs.get("indexing_status", "completed")
document.enabled = kwargs.get("enabled", True)
document.archived = kwargs.get("archived", False)
document.updated_at = fake.date_time_this_year()
document.doc_type = kwargs.get("doc_type", "text")
document.doc_metadata = kwargs.get("doc_metadata", {})
document.doc_form = kwargs.get("doc_form", IndexType.PARAGRAPH_INDEX)
document.doc_language = kwargs.get("doc_language", "en")
db_session_with_containers.add(document)
db_session_with_containers.commit()
return document
def _create_test_document_segments(self, db_session_with_containers, document, account, count=3, fake=None):
"""
Helper method to create test document segments with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
document: Document instance for the segments
account: Account instance for the segments
count: Number of segments to create
fake: Faker instance for generating test data
Returns:
list[DocumentSegment]: List of created test document segment instances
"""
fake = fake or Faker()
segments = []
for i in range(count):
segment = DocumentSegment()
segment.id = fake.uuid4()
segment.tenant_id = document.tenant_id
segment.dataset_id = document.dataset_id
segment.document_id = document.id
segment.position = i + 1
segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
segment.answer = f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}"
segment.word_count = fake.random_int(min=10, max=100)
segment.tokens = fake.random_int(min=5, max=50)
segment.keywords = [fake.word() for _ in range(3)]
segment.index_node_id = f"node_{fake.uuid4()}"
segment.index_node_hash = fake.sha256()
segment.hit_count = 0
segment.enabled = True
segment.status = "completed"
segment.created_by = account.id
segment.created_at = fake.date_time_this_year()
segment.updated_by = account.id
segment.updated_at = segment.created_at
db_session_with_containers.add(segment)
segments.append(segment)
db_session_with_containers.commit()
return segments
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
def test_delete_segment_from_index_task_success(self, mock_index_processor_factory, db_session_with_containers):
"""
Test successful segment deletion from index with comprehensive verification.
This test verifies:
- Proper task execution with valid dataset and document
- Index processor factory initialization with correct document form
- Index processor clean method called with correct parameters
- Database session properly closed after execution
- Task completes without exceptions
"""
fake = Faker()
# Create test data
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
# Extract index node IDs for the task
index_node_ids = [segment.index_node_id for segment in segments]
# Mock the index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed successfully
assert result is None # Task should return None on success
# Verify index processor factory was called with correct document form
mock_index_processor_factory.assert_called_once_with(document.doc_form)
# Verify index processor clean method was called with correct parameters
# Note: We can't directly compare Dataset objects as they are different instances
# from database queries, so we verify the call was made and check the parameters
assert mock_processor.clean.call_count == 1
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
assert call_args[0][1] == index_node_ids # Verify index node IDs match
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is True
def test_delete_segment_from_index_task_dataset_not_found(self, db_session_with_containers):
"""
Test task behavior when dataset is not found.
This test verifies:
- Task handles missing dataset gracefully
- No index processor operations are attempted
- Task returns early without exceptions
- Database session is properly closed
"""
fake = Faker()
non_existent_dataset_id = fake.uuid4()
non_existent_document_id = fake.uuid4()
index_node_ids = [f"node_{fake.uuid4()}" for _ in range(3)]
# Execute the task with non-existent dataset
result = delete_segment_from_index_task(index_node_ids, non_existent_dataset_id, non_existent_document_id)
# Verify the task completed without exceptions
assert result is None # Task should return None when dataset not found
def test_delete_segment_from_index_task_document_not_found(self, db_session_with_containers):
"""
Test task behavior when document is not found.
This test verifies:
- Task handles missing document gracefully
- No index processor operations are attempted
- Task returns early without exceptions
- Database session is properly closed
"""
fake = Faker()
# Create test data
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
non_existent_document_id = fake.uuid4()
index_node_ids = [f"node_{fake.uuid4()}" for _ in range(3)]
# Execute the task with non-existent document
result = delete_segment_from_index_task(index_node_ids, dataset.id, non_existent_document_id)
# Verify the task completed without exceptions
assert result is None # Task should return None when document not found
def test_delete_segment_from_index_task_document_disabled(self, db_session_with_containers):
"""
Test task behavior when document is disabled.
This test verifies:
- Task handles disabled document gracefully
- No index processor operations are attempted
- Task returns early without exceptions
- Database session is properly closed
"""
fake = Faker()
# Create test data with disabled document
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake, enabled=False)
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
# Execute the task with disabled document
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed without exceptions
assert result is None # Task should return None when document is disabled
def test_delete_segment_from_index_task_document_archived(self, db_session_with_containers):
"""
Test task behavior when document is archived.
This test verifies:
- Task handles archived document gracefully
- No index processor operations are attempted
- Task returns early without exceptions
- Database session is properly closed
"""
fake = Faker()
# Create test data with archived document
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake, archived=True)
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
# Execute the task with archived document
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed without exceptions
assert result is None # Task should return None when document is archived
def test_delete_segment_from_index_task_document_not_completed(self, db_session_with_containers):
"""
Test task behavior when document indexing is not completed.
This test verifies:
- Task handles incomplete indexing status gracefully
- No index processor operations are attempted
- Task returns early without exceptions
- Database session is properly closed
"""
fake = Faker()
# Create test data with incomplete indexing
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(
db_session_with_containers, dataset, account, fake, indexing_status="indexing"
)
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
# Execute the task with incomplete indexing
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed without exceptions
assert result is None # Task should return None when indexing is not completed
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
def test_delete_segment_from_index_task_index_processor_clean(
self, mock_index_processor_factory, db_session_with_containers
):
"""
Test index processor clean method integration with different document forms.
This test verifies:
- Index processor factory creates correct processor for different document forms
- Clean method is called with proper parameters for each document form
- Task handles different index types correctly
- Database session is properly managed
"""
fake = Faker()
# Test different document forms
document_forms = [IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX, IndexType.PARENT_CHILD_INDEX]
for doc_form in document_forms:
# Create test data for each document form
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake, doc_form=doc_form)
segments = self._create_test_document_segments(db_session_with_containers, document, account, 2, fake)
index_node_ids = [segment.index_node_id for segment in segments]
# Mock the index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed successfully
assert result is None
# Verify index processor factory was called with correct document form
mock_index_processor_factory.assert_called_with(doc_form)
# Verify index processor clean method was called with correct parameters
assert mock_processor.clean.call_count == 1
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
assert call_args[0][1] == index_node_ids # Verify index node IDs match
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is True
# Reset mocks for next iteration
mock_index_processor_factory.reset_mock()
mock_processor.reset_mock()
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
def test_delete_segment_from_index_task_exception_handling(
self, mock_index_processor_factory, db_session_with_containers
):
"""
Test exception handling in the task.
This test verifies:
- Task handles index processor exceptions gracefully
- Database session is properly closed even when exceptions occur
- Task logs exceptions appropriately
- No unhandled exceptions are raised
"""
fake = Faker()
# Create test data
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
index_node_ids = [segment.index_node_id for segment in segments]
# Mock the index processor to raise an exception
mock_processor = MagicMock()
mock_processor.clean.side_effect = Exception("Index processor error")
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task - should not raise exception
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed without raising exceptions
assert result is None # Task should return None even when exceptions occur
# Verify index processor clean method was called
assert mock_processor.clean.call_count == 1
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
assert call_args[0][1] == index_node_ids # Verify index node IDs match
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is True
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
def test_delete_segment_from_index_task_empty_index_node_ids(
self, mock_index_processor_factory, db_session_with_containers
):
"""
Test task behavior with empty index node IDs list.
This test verifies:
- Task handles empty index node IDs gracefully
- Index processor clean method is called with empty list
- Task completes successfully
- Database session is properly managed
"""
fake = Faker()
# Create test data
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
# Use empty index node IDs
index_node_ids = []
# Mock the index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed successfully
assert result is None
# Verify index processor clean method was called with empty list
assert mock_processor.clean.call_count == 1
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
assert call_args[0][1] == index_node_ids # Verify index node IDs match (empty list)
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is True
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
def test_delete_segment_from_index_task_large_index_node_ids(
self, mock_index_processor_factory, db_session_with_containers
):
"""
Test task behavior with large number of index node IDs.
This test verifies:
- Task handles large lists of index node IDs efficiently
- Index processor clean method is called with all node IDs
- Task completes successfully with large datasets
- Database session is properly managed
"""
fake = Faker()
# Create test data
tenant = self._create_test_tenant(db_session_with_containers, fake)
account = self._create_test_account(db_session_with_containers, tenant, fake)
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
# Create large number of segments
segments = self._create_test_document_segments(db_session_with_containers, document, account, 50, fake)
index_node_ids = [segment.index_node_id for segment in segments]
# Mock the index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
# Execute the task
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
# Verify the task completed successfully
assert result is None
# Verify index processor clean method was called with all node IDs
assert mock_processor.clean.call_count == 1
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
assert call_args[0][1] == index_node_ids # Verify index node IDs match
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is True
# Verify all node IDs were passed
assert len(call_args[0][1]) == 50

View File

@@ -0,0 +1,615 @@
"""
Integration tests for disable_segment_from_index_task using TestContainers.
This module provides comprehensive integration tests for the disable_segment_from_index_task
using real database and Redis containers to ensure the task works correctly with actual
data and external dependencies.
"""
import logging
import time
from datetime import UTC, datetime
from unittest.mock import patch
import pytest
from faker import Faker
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment
from tasks.disable_segment_from_index_task import disable_segment_from_index_task
logger = logging.getLogger(__name__)
class TestDisableSegmentFromIndexTask:
"""Integration tests for disable_segment_from_index_task using testcontainers."""
@pytest.fixture
def mock_index_processor(self):
"""Mock IndexProcessorFactory and its clean method."""
with patch("tasks.disable_segment_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = mock_factory.return_value.init_index_processor.return_value
mock_processor.clean.return_value = None
yield mock_processor
def _create_test_account_and_tenant(self, db_session_with_containers) -> tuple[Account, Tenant]:
"""
Helper method to create a test account and tenant for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
tuple: (account, tenant) - Created account and tenant instances
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
# Create tenant
tenant = Tenant(
name=fake.company(),
status="normal",
plan="basic",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join with owner role
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Set current tenant for account
account.current_tenant = tenant
return account, tenant
def _create_test_dataset(self, tenant: Tenant, account: Account) -> Dataset:
"""
Helper method to create a test dataset.
Args:
tenant: Tenant instance
account: Account instance
Returns:
Dataset: Created dataset instance
"""
fake = Faker()
dataset = Dataset(
tenant_id=tenant.id,
name=fake.sentence(nb_words=3),
description=fake.text(max_nb_chars=200),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db.session.add(dataset)
db.session.commit()
return dataset
def _create_test_document(
self, dataset: Dataset, tenant: Tenant, account: Account, doc_form: str = "text_model"
) -> Document:
"""
Helper method to create a test document.
Args:
dataset: Dataset instance
tenant: Tenant instance
account: Account instance
doc_form: Document form type
Returns:
Document: Created document instance
"""
fake = Faker()
document = Document(
tenant_id=tenant.id,
dataset_id=dataset.id,
position=1,
data_source_type="upload_file",
batch=fake.uuid4(),
name=fake.file_name(),
created_from="api",
created_by=account.id,
indexing_status="completed",
enabled=True,
archived=False,
doc_form=doc_form,
word_count=1000,
tokens=500,
completed_at=datetime.now(UTC),
)
db.session.add(document)
db.session.commit()
return document
def _create_test_segment(
self,
document: Document,
dataset: Dataset,
tenant: Tenant,
account: Account,
status: str = "completed",
enabled: bool = True,
) -> DocumentSegment:
"""
Helper method to create a test document segment.
Args:
document: Document instance
dataset: Dataset instance
tenant: Tenant instance
account: Account instance
status: Segment status
enabled: Whether segment is enabled
Returns:
DocumentSegment: Created segment instance
"""
fake = Faker()
segment = DocumentSegment(
tenant_id=tenant.id,
dataset_id=dataset.id,
document_id=document.id,
position=1,
content=fake.text(max_nb_chars=500),
word_count=100,
tokens=50,
index_node_id=fake.uuid4(),
index_node_hash=fake.sha256(),
status=status,
enabled=enabled,
created_by=account.id,
completed_at=datetime.now(UTC) if status == "completed" else None,
)
db.session.add(segment)
db.session.commit()
return segment
def test_disable_segment_success(self, db_session_with_containers, mock_index_processor):
"""
Test successful segment disabling from index.
This test verifies:
- Segment is found and validated
- Index processor clean method is called with correct parameters
- Redis cache is cleared
- Task completes successfully
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Set up Redis cache
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.setex(indexing_cache_key, 600, 1)
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task completed successfully
assert result is None # Task returns None on success
# Verify index processor was called correctly
mock_index_processor.clean.assert_called_once()
call_args = mock_index_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Check dataset ID
assert call_args[0][1] == [segment.index_node_id] # Check index node IDs
# Verify Redis cache was cleared
assert redis_client.get(indexing_cache_key) is None
# Verify segment is still in database
db.session.refresh(segment)
assert segment.id is not None
def test_disable_segment_not_found(self, db_session_with_containers, mock_index_processor):
"""
Test handling when segment is not found.
This test verifies:
- Task handles non-existent segment gracefully
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Use a non-existent segment ID
fake = Faker()
non_existent_segment_id = fake.uuid4()
# Act: Execute the task with non-existent segment
result = disable_segment_from_index_task(non_existent_segment_id)
# Assert: Verify the task handled the error gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_not_completed(self, db_session_with_containers, mock_index_processor):
"""
Test handling when segment is not in completed status.
This test verifies:
- Task rejects segments that are not completed
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Create test data with non-completed segment
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account, status="indexing", enabled=True)
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the invalid status gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_no_dataset(self, db_session_with_containers, mock_index_processor):
"""
Test handling when segment has no associated dataset.
This test verifies:
- Task handles segments without dataset gracefully
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Manually remove dataset association
segment.dataset_id = "00000000-0000-0000-0000-000000000000"
db.session.commit()
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the missing dataset gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_no_document(self, db_session_with_containers, mock_index_processor):
"""
Test handling when segment has no associated document.
This test verifies:
- Task handles segments without document gracefully
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Manually remove document association
segment.document_id = "00000000-0000-0000-0000-000000000000"
db.session.commit()
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the missing document gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_document_disabled(self, db_session_with_containers, mock_index_processor):
"""
Test handling when document is disabled.
This test verifies:
- Task handles disabled documents gracefully
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Create test data with disabled document
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
document.enabled = False
db.session.commit()
segment = self._create_test_segment(document, dataset, tenant, account)
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the disabled document gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_document_archived(self, db_session_with_containers, mock_index_processor):
"""
Test handling when document is archived.
This test verifies:
- Task handles archived documents gracefully
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Create test data with archived document
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
document.archived = True
db.session.commit()
segment = self._create_test_segment(document, dataset, tenant, account)
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the archived document gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_document_indexing_not_completed(self, db_session_with_containers, mock_index_processor):
"""
Test handling when document indexing is not completed.
This test verifies:
- Task handles documents with incomplete indexing gracefully
- No index processor operations are performed
- Task returns early without errors
"""
# Arrange: Create test data with incomplete indexing
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
document.indexing_status = "indexing"
db.session.commit()
segment = self._create_test_segment(document, dataset, tenant, account)
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the incomplete indexing gracefully
assert result is None
# Verify index processor was not called
mock_index_processor.clean.assert_not_called()
def test_disable_segment_index_processor_exception(self, db_session_with_containers, mock_index_processor):
"""
Test handling when index processor raises an exception.
This test verifies:
- Task handles index processor exceptions gracefully
- Segment is re-enabled on failure
- Redis cache is still cleared
- Database changes are committed
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Set up Redis cache
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.setex(indexing_cache_key, 600, 1)
# Configure mock to raise exception
mock_index_processor.clean.side_effect = Exception("Index processor error")
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task handled the exception gracefully
assert result is None
# Verify index processor was called
mock_index_processor.clean.assert_called_once()
call_args = mock_index_processor.clean.call_args
# Check that the call was made with the correct parameters
assert len(call_args[0]) == 2 # Check two arguments were passed
assert call_args[0][1] == [segment.index_node_id] # Check index node IDs
# Verify segment was re-enabled
db.session.refresh(segment)
assert segment.enabled is True
# Verify Redis cache was still cleared
assert redis_client.get(indexing_cache_key) is None
def test_disable_segment_different_doc_forms(self, db_session_with_containers, mock_index_processor):
"""
Test disabling segments with different document forms.
This test verifies:
- Task works with different document form types
- Correct index processor is initialized for each form
- Index processor clean method is called correctly
"""
# Test different document forms
doc_forms = ["text_model", "qa_model", "table_model"]
for doc_form in doc_forms:
# Arrange: Create test data for each form
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account, doc_form=doc_form)
segment = self._create_test_segment(document, dataset, tenant, account)
# Reset mock for each iteration
mock_index_processor.reset_mock()
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify the task completed successfully
assert result is None
# Verify correct index processor was initialized
mock_index_processor.clean.assert_called_once()
call_args = mock_index_processor.clean.call_args
assert call_args[0][0].id == dataset.id # Check dataset ID
assert call_args[0][1] == [segment.index_node_id] # Check index node IDs
def test_disable_segment_redis_cache_handling(self, db_session_with_containers, mock_index_processor):
"""
Test Redis cache handling during segment disabling.
This test verifies:
- Redis cache is properly set before task execution
- Cache is cleared after task completion
- Cache handling works with different scenarios
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Test with cache present
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.setex(indexing_cache_key, 600, 1)
assert redis_client.get(indexing_cache_key) is not None
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify cache was cleared
assert result is None
assert redis_client.get(indexing_cache_key) is None
# Test with no cache present
segment2 = self._create_test_segment(document, dataset, tenant, account)
result2 = disable_segment_from_index_task(segment2.id)
# Assert: Verify task still works without cache
assert result2 is None
def test_disable_segment_performance_timing(self, db_session_with_containers, mock_index_processor):
"""
Test performance timing of segment disabling task.
This test verifies:
- Task execution time is reasonable
- Performance logging works correctly
- Task completes within expected time bounds
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Act: Execute the task and measure time
start_time = time.perf_counter()
result = disable_segment_from_index_task(segment.id)
end_time = time.perf_counter()
# Assert: Verify task completed successfully and timing is reasonable
assert result is None
execution_time = end_time - start_time
assert execution_time < 5.0 # Should complete within 5 seconds
def test_disable_segment_database_session_management(self, db_session_with_containers, mock_index_processor):
"""
Test database session management during task execution.
This test verifies:
- Database sessions are properly managed
- Sessions are closed after task completion
- No session leaks occur
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segment = self._create_test_segment(document, dataset, tenant, account)
# Act: Execute the task
result = disable_segment_from_index_task(segment.id)
# Assert: Verify task completed and session management worked
assert result is None
# Verify segment is still accessible (session was properly managed)
db.session.refresh(segment)
assert segment.id is not None
def test_disable_segment_concurrent_execution(self, db_session_with_containers, mock_index_processor):
"""
Test concurrent execution of segment disabling tasks.
This test verifies:
- Multiple tasks can run concurrently
- Each task processes its own segment correctly
- No interference between concurrent tasks
"""
# Arrange: Create multiple test segments
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
dataset = self._create_test_dataset(tenant, account)
document = self._create_test_document(dataset, tenant, account)
segments = []
for i in range(3):
segment = self._create_test_segment(document, dataset, tenant, account)
segments.append(segment)
# Act: Execute tasks concurrently (simulated)
results = []
for segment in segments:
result = disable_segment_from_index_task(segment.id)
results.append(result)
# Assert: Verify all tasks completed successfully
assert all(result is None for result in results)
# Verify all segments were processed
assert mock_index_processor.clean.call_count == len(segments)
# Verify each segment was processed with correct parameters
for segment in segments:
# Check that clean was called with this segment's dataset and index_node_id
found = False
for call in mock_index_processor.clean.call_args_list:
if call[0][0].id == dataset.id and call[0][1] == [segment.index_node_id]:
found = True
break
assert found, f"Segment {segment.id} was not processed correctly"

View File

@@ -0,0 +1,733 @@
"""
TestContainers-based integration tests for disable_segments_from_index_task.
This module provides comprehensive integration testing for the disable_segments_from_index_task
using TestContainers to ensure realistic database interactions and proper isolation.
The task is responsible for removing document segments from the search index when they are disabled.
"""
from unittest.mock import MagicMock, patch
from faker import Faker
from models import Account, Dataset, DocumentSegment
from models import Document as DatasetDocument
from models.dataset import DatasetProcessRule
from tasks.disable_segments_from_index_task import disable_segments_from_index_task
class TestDisableSegmentsFromIndexTask:
"""
Comprehensive integration tests for disable_segments_from_index_task using testcontainers.
This test class covers all major functionality of the disable_segments_from_index_task:
- Successful segment disabling with proper index cleanup
- Error handling for various edge cases
- Database state validation after task execution
- Redis cache cleanup verification
- Index processor integration testing
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing environment with actual database interactions.
"""
def _create_test_account(self, db_session_with_containers, fake=None):
"""
Helper method to create a test account with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
fake: Faker instance for generating test data
Returns:
Account: Created test account instance
"""
fake = fake or Faker()
account = Account(
email=fake.email(),
name=fake.name(),
avatar=fake.url(),
status="active",
interface_language="en-US",
)
account.id = fake.uuid4()
# monkey-patch attributes for test setup
account.tenant_id = fake.uuid4()
account.type = "normal"
account.role = "owner"
account.created_at = fake.date_time_this_year()
account.updated_at = account.created_at
# Create a tenant for the account
from models.account import Tenant
tenant = Tenant(
name=f"Test Tenant {fake.company()}",
plan="basic",
status="active",
)
tenant.id = account.tenant_id
tenant.created_at = fake.date_time_this_year()
tenant.updated_at = tenant.created_at
from extensions.ext_database import db
db.session.add(tenant)
db.session.add(account)
db.session.commit()
# Set the current tenant for the account
account.current_tenant = tenant
return account
def _create_test_dataset(self, db_session_with_containers, account, fake=None):
"""
Helper method to create a test dataset with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
account: The account creating the dataset
fake: Faker instance for generating test data
Returns:
Dataset: Created test dataset instance
"""
fake = fake or Faker()
dataset = Dataset(
id=fake.uuid4(),
tenant_id=account.tenant_id,
name=f"Test Dataset {fake.word()}",
description=fake.text(max_nb_chars=200),
provider="vendor",
permission="only_me",
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
updated_by=account.id,
embedding_model="text-embedding-ada-002",
embedding_model_provider="openai",
built_in_field_enabled=False,
)
from extensions.ext_database import db
db.session.add(dataset)
db.session.commit()
return dataset
def _create_test_document(self, db_session_with_containers, dataset, account, fake=None):
"""
Helper method to create a test document with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
dataset: The dataset containing the document
account: The account creating the document
fake: Faker instance for generating test data
Returns:
DatasetDocument: Created test document instance
"""
fake = fake or Faker()
document = DatasetDocument()
document.id = fake.uuid4()
document.tenant_id = dataset.tenant_id
document.dataset_id = dataset.id
document.position = 1
document.data_source_type = "upload_file"
document.data_source_info = '{"upload_file_id": "test_file_id"}'
document.batch = fake.uuid4()
document.name = f"Test Document {fake.word()}.txt"
document.created_from = "upload_file"
document.created_by = account.id
document.created_api_request_id = fake.uuid4()
document.processing_started_at = fake.date_time_this_year()
document.file_id = fake.uuid4()
document.word_count = fake.random_int(min=100, max=1000)
document.parsing_completed_at = fake.date_time_this_year()
document.cleaning_completed_at = fake.date_time_this_year()
document.splitting_completed_at = fake.date_time_this_year()
document.tokens = fake.random_int(min=50, max=500)
document.indexing_started_at = fake.date_time_this_year()
document.indexing_completed_at = fake.date_time_this_year()
document.indexing_status = "completed"
document.enabled = True
document.archived = False
document.doc_form = "text_model" # Use text_model form for testing
document.doc_language = "en"
from extensions.ext_database import db
db.session.add(document)
db.session.commit()
return document
def _create_test_segments(self, db_session_with_containers, document, dataset, account, count=3, fake=None):
"""
Helper method to create test document segments with realistic data.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
document: The document containing the segments
dataset: The dataset containing the document
account: The account creating the segments
count: Number of segments to create
fake: Faker instance for generating test data
Returns:
List[DocumentSegment]: Created test segment instances
"""
fake = fake or Faker()
segments = []
for i in range(count):
segment = DocumentSegment()
segment.id = fake.uuid4()
segment.tenant_id = dataset.tenant_id
segment.dataset_id = dataset.id
segment.document_id = document.id
segment.position = i + 1
segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
segment.answer = f"Test answer {i + 1}" if i % 2 == 0 else None
segment.word_count = fake.random_int(min=10, max=100)
segment.tokens = fake.random_int(min=5, max=50)
segment.keywords = [fake.word() for _ in range(3)]
segment.index_node_id = f"node_{segment.id}"
segment.index_node_hash = fake.sha256()
segment.hit_count = 0
segment.enabled = True
segment.disabled_at = None
segment.disabled_by = None
segment.status = "completed"
segment.created_by = account.id
segment.updated_by = account.id
segment.indexing_at = fake.date_time_this_year()
segment.completed_at = fake.date_time_this_year()
segment.error = None
segment.stopped_at = None
segments.append(segment)
from extensions.ext_database import db
for segment in segments:
db.session.add(segment)
db.session.commit()
return segments
def _create_dataset_process_rule(self, db_session_with_containers, dataset, fake=None):
"""
Helper method to create a dataset process rule.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
dataset: The dataset for the process rule
fake: Faker instance for generating test data
Returns:
DatasetProcessRule: Created process rule instance
"""
fake = fake or Faker()
process_rule = DatasetProcessRule()
process_rule.id = fake.uuid4()
process_rule.tenant_id = dataset.tenant_id
process_rule.dataset_id = dataset.id
process_rule.mode = "automatic"
process_rule.rules = (
"{"
'"mode": "automatic", '
'"rules": {'
'"pre_processing_rules": [], "segmentation": '
'{"separator": "\\n\\n", "max_tokens": 1000, "chunk_overlap": 50}}'
"}"
)
process_rule.created_by = dataset.created_by
process_rule.updated_by = dataset.updated_by
from extensions.ext_database import db
db.session.add(process_rule)
db.session.commit()
return process_rule
def test_disable_segments_success(self, db_session_with_containers):
"""
Test successful disabling of segments from index.
This test verifies that the task can correctly disable segments from the index
when all conditions are met, including proper index cleanup and database state updates.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 3, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
segment_ids = [segment.id for segment in segments]
# Mock the index processor to avoid external dependencies
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Verify index processor was called correctly
mock_factory.assert_called_once_with(document.doc_form)
mock_processor.clean.assert_called_once()
# Verify the call arguments (checking by attributes rather than object identity)
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # First argument should be the dataset
assert sorted(call_args[0][1]) == sorted(
[segment.index_node_id for segment in segments]
) # Compare sorted lists to handle any order while preserving duplicates
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is False
# Verify Redis cache cleanup was called for each segment
assert mock_redis.delete.call_count == len(segments)
for segment in segments:
expected_key = f"segment_{segment.id}_indexing"
mock_redis.delete.assert_any_call(expected_key)
def test_disable_segments_dataset_not_found(self, db_session_with_containers):
"""
Test handling when dataset is not found.
This test ensures that the task correctly handles cases where the specified
dataset doesn't exist, logging appropriate messages and returning early.
"""
# Arrange
fake = Faker()
non_existent_dataset_id = fake.uuid4()
non_existent_document_id = fake.uuid4()
segment_ids = [fake.uuid4()]
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
# Assert
assert result is None # Task should complete without returning a value
# Redis should not be called when dataset is not found
mock_redis.delete.assert_not_called()
def test_disable_segments_document_not_found(self, db_session_with_containers):
"""
Test handling when document is not found.
This test ensures that the task correctly handles cases where the specified
document doesn't exist, logging appropriate messages and returning early.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
non_existent_document_id = fake.uuid4()
segment_ids = [fake.uuid4()]
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, non_existent_document_id)
# Assert
assert result is None # Task should complete without returning a value
# Redis should not be called when document is not found
mock_redis.delete.assert_not_called()
def test_disable_segments_document_invalid_status(self, db_session_with_containers):
"""
Test handling when document has invalid status for disabling.
This test ensures that the task correctly handles cases where the document
is not enabled, archived, or not completed, preventing invalid operations.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
# Test case 1: Document not enabled
document.enabled = False
from extensions.ext_database import db
db.session.commit()
segment_ids = [segment.id for segment in segments]
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Redis should not be called when document status is invalid
mock_redis.delete.assert_not_called()
# Test case 2: Document archived
document.enabled = True
document.archived = True
db.session.commit()
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
mock_redis.delete.assert_not_called()
# Test case 3: Document indexing not completed
document.enabled = True
document.archived = False
document.indexing_status = "indexing"
db.session.commit()
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
mock_redis.delete.assert_not_called()
def test_disable_segments_no_segments_found(self, db_session_with_containers):
"""
Test handling when no segments are found for the given IDs.
This test ensures that the task correctly handles cases where the specified
segment IDs don't exist or don't match the dataset/document criteria.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
# Use non-existent segment IDs
non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(non_existent_segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Redis should not be called when no segments are found
mock_redis.delete.assert_not_called()
def test_disable_segments_index_processor_error(self, db_session_with_containers):
"""
Test handling when index processor encounters an error.
This test verifies that the task correctly handles index processor errors
by rolling back segment states and ensuring proper cleanup.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
segment_ids = [segment.id for segment in segments]
# Mock the index processor to raise an exception
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_processor.clean.side_effect = Exception("Index processor error")
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Verify segments were rolled back to enabled state
from extensions.ext_database import db
db.session.refresh(segments[0])
db.session.refresh(segments[1])
# Check that segments are re-enabled after error
updated_segments = db.session.query(DocumentSegment).where(DocumentSegment.id.in_(segment_ids)).all()
for segment in updated_segments:
assert segment.enabled is True
assert segment.disabled_at is None
assert segment.disabled_by is None
# Verify Redis cache cleanup was still called
assert mock_redis.delete.call_count == len(segments)
def test_disable_segments_with_different_doc_forms(self, db_session_with_containers):
"""
Test disabling segments with different document forms.
This test verifies that the task correctly handles different document forms
(paragraph, qa, parent_child) and initializes the appropriate index processor.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
segment_ids = [segment.id for segment in segments]
# Test different document forms
doc_forms = ["text_model", "qa_model", "hierarchical_model"]
for doc_form in doc_forms:
# Update document form
document.doc_form = doc_form
from extensions.ext_database import db
db.session.commit()
# Mock the index processor factory
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
mock_factory.assert_called_with(doc_form)
def test_disable_segments_performance_timing(self, db_session_with_containers):
"""
Test that the task properly measures and logs performance timing.
This test verifies that the task correctly measures execution time
and logs performance metrics for monitoring purposes.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 3, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
segment_ids = [segment.id for segment in segments]
# Mock the index processor
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Mock time.perf_counter to control timing
with patch("tasks.disable_segments_from_index_task.time.perf_counter") as mock_perf_counter:
mock_perf_counter.side_effect = [1000.0, 1000.5] # 0.5 seconds execution time
# Mock logger to capture log messages
with patch("tasks.disable_segments_from_index_task.logger") as mock_logger:
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Verify performance logging
mock_logger.info.assert_called()
log_calls = [call[0][0] for call in mock_logger.info.call_args_list]
performance_log = next((call for call in log_calls if "latency" in call), None)
assert performance_log is not None
assert "0.5" in performance_log # Should log the execution time
def test_disable_segments_redis_cache_cleanup(self, db_session_with_containers):
"""
Test that Redis cache is properly cleaned up for all segments.
This test verifies that the task correctly removes indexing cache entries
from Redis for all processed segments, preventing stale cache issues.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 5, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
segment_ids = [segment.id for segment in segments]
# Mock the index processor
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client to track delete calls
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Verify Redis delete was called for each segment
assert mock_redis.delete.call_count == len(segments)
# Verify correct cache keys were used
expected_keys = [f"segment_{segment.id}_indexing" for segment in segments]
actual_calls = [call[0][0] for call in mock_redis.delete.call_args_list]
for expected_key in expected_keys:
assert expected_key in actual_calls
def test_disable_segments_database_session_cleanup(self, db_session_with_containers):
"""
Test that database session is properly closed after task execution.
This test verifies that the task correctly manages database sessions
and ensures proper cleanup to prevent connection leaks.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
segment_ids = [segment.id for segment in segments]
# Mock the index processor
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Mock db.session.close to verify it's called
with patch("tasks.disable_segments_from_index_task.db.session.close") as mock_close:
# Act
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Verify session was closed
mock_close.assert_called()
def test_disable_segments_empty_segment_ids(self, db_session_with_containers):
"""
Test handling when empty segment IDs list is provided.
This test ensures that the task correctly handles edge cases where
an empty list of segment IDs is provided.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
empty_segment_ids = []
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
# Act
result = disable_segments_from_index_task(empty_segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Redis should not be called when no segments are provided
mock_redis.delete.assert_not_called()
def test_disable_segments_mixed_valid_invalid_ids(self, db_session_with_containers):
"""
Test handling when some segment IDs are valid and others are invalid.
This test verifies that the task correctly processes only the valid
segment IDs and ignores invalid ones.
"""
# Arrange
fake = Faker()
account = self._create_test_account(db_session_with_containers, fake)
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
# Mix valid and invalid segment IDs
valid_segment_ids = [segment.id for segment in segments]
invalid_segment_ids = [fake.uuid4() for _ in range(2)]
mixed_segment_ids = valid_segment_ids + invalid_segment_ids
# Mock the index processor
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
mock_processor = MagicMock()
mock_factory.return_value.init_index_processor.return_value = mock_processor
# Mock Redis client
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
mock_redis.delete.return_value = True
# Act
result = disable_segments_from_index_task(mixed_segment_ids, dataset.id, document.id)
# Assert
assert result is None # Task should complete without returning a value
# Verify index processor was called with only valid segment node IDs
expected_node_ids = [segment.index_node_id for segment in segments]
mock_processor.clean.assert_called_once()
# Verify the call arguments
call_args = mock_processor.clean.call_args
assert call_args[0][0].id == dataset.id # First argument should be the dataset
assert sorted(call_args[0][1]) == sorted(
expected_node_ids
) # Compare sorted lists to handle any order while preserving duplicates
assert call_args[1]["with_keywords"] is True
assert call_args[1]["delete_child_chunks"] is False
# Verify Redis cleanup was called only for valid segments
assert mock_redis.delete.call_count == len(segments)

View File

@@ -0,0 +1,887 @@
from dataclasses import asdict
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from core.entities.document_task import DocumentTask
from enums.cloud_plan import CloudPlan
from extensions.ext_database import db
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document
from tasks.document_indexing_task import (
_document_indexing, # Core function
_document_indexing_with_tenant_queue, # Tenant queue wrapper function
document_indexing_task, # Deprecated old interface
normal_document_indexing_task, # New normal task
priority_document_indexing_task, # New priority task
)
class TestDocumentIndexingTasks:
"""Integration tests for document indexing tasks using testcontainers.
This test class covers:
- Core _document_indexing function
- Deprecated document_indexing_task function
- New normal_document_indexing_task function
- New priority_document_indexing_task function
- Tenant queue wrapper _document_indexing_with_tenant_queue function
"""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.document_indexing_task.IndexingRunner") as mock_indexing_runner,
patch("tasks.document_indexing_task.FeatureService") as mock_feature_service,
):
# Setup mock indexing runner
mock_runner_instance = MagicMock()
mock_indexing_runner.return_value = mock_runner_instance
# Setup mock feature service
mock_features = MagicMock()
mock_features.billing.enabled = False
mock_feature_service.get_features.return_value = mock_features
yield {
"indexing_runner": mock_indexing_runner,
"indexing_runner_instance": mock_runner_instance,
"feature_service": mock_feature_service,
"features": mock_features,
}
def _create_test_dataset_and_documents(
self, db_session_with_containers, mock_external_service_dependencies, document_count=3
):
"""
Helper method to create a test dataset and documents for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
mock_external_service_dependencies: Mock dependencies
document_count: Number of documents to create
Returns:
tuple: (dataset, documents) - Created dataset and document instances
"""
fake = Faker()
# Create account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Create dataset
dataset = Dataset(
id=fake.uuid4(),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db.session.add(dataset)
db.session.commit()
# Create documents
documents = []
for i in range(document_count):
document = Document(
id=fake.uuid4(),
tenant_id=tenant.id,
dataset_id=dataset.id,
position=i,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="waiting",
enabled=True,
)
db.session.add(document)
documents.append(document)
db.session.commit()
# Refresh dataset to ensure it's properly loaded
db.session.refresh(dataset)
return dataset, documents
def _create_test_dataset_with_billing_features(
self, db_session_with_containers, mock_external_service_dependencies, billing_enabled=True
):
"""
Helper method to create a test dataset with billing features configured.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
mock_external_service_dependencies: Mock dependencies
billing_enabled: Whether billing is enabled
Returns:
tuple: (dataset, documents) - Created dataset and document instances
"""
fake = Faker()
# Create account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Create dataset
dataset = Dataset(
id=fake.uuid4(),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db.session.add(dataset)
db.session.commit()
# Create documents
documents = []
for i in range(3):
document = Document(
id=fake.uuid4(),
tenant_id=tenant.id,
dataset_id=dataset.id,
position=i,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="waiting",
enabled=True,
)
db.session.add(document)
documents.append(document)
db.session.commit()
# Configure billing features
mock_external_service_dependencies["features"].billing.enabled = billing_enabled
if billing_enabled:
mock_external_service_dependencies["features"].billing.subscription.plan = CloudPlan.SANDBOX
mock_external_service_dependencies["features"].vector_space.limit = 100
mock_external_service_dependencies["features"].vector_space.size = 50
# Refresh dataset to ensure it's properly loaded
db.session.refresh(dataset)
return dataset, documents
def test_document_indexing_task_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful document indexing with multiple documents.
This test verifies:
- Proper dataset retrieval from database
- Correct document processing and status updates
- IndexingRunner integration
- Database state updates
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=3
)
document_ids = [doc.id for doc in documents]
# Act: Execute the task
_document_indexing(dataset.id, document_ids)
# Assert: Verify the expected outcomes
# Verify indexing runner was called correctly
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify documents were updated to parsing status
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
# Verify the run method was called with correct documents
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
assert call_args is not None
processed_documents = call_args[0][0] # First argument should be documents list
assert len(processed_documents) == 3
def test_document_indexing_task_dataset_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of non-existent dataset.
This test verifies:
- Proper error handling for missing datasets
- Early return without processing
- Database session cleanup
- No unnecessary indexing runner calls
"""
# Arrange: Use non-existent dataset ID
fake = Faker()
non_existent_dataset_id = fake.uuid4()
document_ids = [fake.uuid4() for _ in range(3)]
# Act: Execute the task with non-existent dataset
_document_indexing(non_existent_dataset_id, document_ids)
# Assert: Verify no processing occurred
mock_external_service_dependencies["indexing_runner"].assert_not_called()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_not_called()
def test_document_indexing_task_document_not_found_in_dataset(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling when some documents don't exist in the dataset.
This test verifies:
- Only existing documents are processed
- Non-existent documents are ignored
- Indexing runner receives only valid documents
- Database state updates correctly
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=2
)
# Mix existing and non-existent document IDs
fake = Faker()
existing_document_ids = [doc.id for doc in documents]
non_existent_document_ids = [fake.uuid4() for _ in range(2)]
all_document_ids = existing_document_ids + non_existent_document_ids
# Act: Execute the task with mixed document IDs
_document_indexing(dataset.id, all_document_ids)
# Assert: Verify only existing documents were processed
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify only existing documents were updated
# Re-query documents from database since _document_indexing uses a different session
for doc_id in existing_document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
# Verify the run method was called with only existing documents
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
assert call_args is not None
processed_documents = call_args[0][0] # First argument should be documents list
assert len(processed_documents) == 2 # Only existing documents
def test_document_indexing_task_indexing_runner_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of IndexingRunner exceptions.
This test verifies:
- Exceptions from IndexingRunner are properly caught
- Task completes without raising exceptions
- Database session is properly closed
- Error logging occurs
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=2
)
document_ids = [doc.id for doc in documents]
# Mock IndexingRunner to raise an exception
mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = Exception(
"Indexing runner failed"
)
# Act: Execute the task
_document_indexing(dataset.id, document_ids)
# Assert: Verify exception was handled gracefully
# The task should complete without raising exceptions
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify documents were still updated to parsing status before the exception
# Re-query documents from database since _document_indexing close the session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
def test_document_indexing_task_mixed_document_states(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test processing documents with mixed initial states.
This test verifies:
- Documents with different initial states are handled correctly
- Only valid documents are processed
- Database state updates are consistent
- IndexingRunner receives correct documents
"""
# Arrange: Create test data
dataset, base_documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=2
)
# Create additional documents with different states
fake = Faker()
extra_documents = []
# Document with different indexing status
doc1 = Document(
id=fake.uuid4(),
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
position=2,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=dataset.created_by,
indexing_status="completed", # Already completed
enabled=True,
)
db.session.add(doc1)
extra_documents.append(doc1)
# Document with disabled status
doc2 = Document(
id=fake.uuid4(),
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
position=3,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=dataset.created_by,
indexing_status="waiting",
enabled=False, # Disabled
)
db.session.add(doc2)
extra_documents.append(doc2)
db.session.commit()
all_documents = base_documents + extra_documents
document_ids = [doc.id for doc in all_documents]
# Act: Execute the task with mixed document states
_document_indexing(dataset.id, document_ids)
# Assert: Verify processing
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify all documents were updated to parsing status
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
# Verify the run method was called with all documents
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
assert call_args is not None
processed_documents = call_args[0][0] # First argument should be documents list
assert len(processed_documents) == 4
def test_document_indexing_task_billing_sandbox_plan_batch_limit(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test billing validation for sandbox plan batch upload limit.
This test verifies:
- Sandbox plan batch upload limit enforcement
- Error handling for batch upload limit exceeded
- Document status updates to error state
- Proper error message recording
"""
# Arrange: Create test data with billing enabled
dataset, documents = self._create_test_dataset_with_billing_features(
db_session_with_containers, mock_external_service_dependencies, billing_enabled=True
)
# Configure sandbox plan with batch limit
mock_external_service_dependencies["features"].billing.subscription.plan = CloudPlan.SANDBOX
# Create more documents than sandbox plan allows (limit is 1)
fake = Faker()
extra_documents = []
for i in range(2): # Total will be 5 documents (3 existing + 2 new)
document = Document(
id=fake.uuid4(),
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
position=i + 3,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=dataset.created_by,
indexing_status="waiting",
enabled=True,
)
db.session.add(document)
extra_documents.append(document)
db.session.commit()
all_documents = documents + extra_documents
document_ids = [doc.id for doc in all_documents]
# Act: Execute the task with too many documents for sandbox plan
_document_indexing(dataset.id, document_ids)
# Assert: Verify error handling
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "error"
assert updated_document.error is not None
assert "batch upload" in updated_document.error
assert updated_document.stopped_at is not None
# Verify no indexing runner was called
mock_external_service_dependencies["indexing_runner"].assert_not_called()
def test_document_indexing_task_billing_disabled_success(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful processing when billing is disabled.
This test verifies:
- Processing continues normally when billing is disabled
- No billing validation occurs
- Documents are processed successfully
- IndexingRunner is called correctly
"""
# Arrange: Create test data with billing disabled
dataset, documents = self._create_test_dataset_with_billing_features(
db_session_with_containers, mock_external_service_dependencies, billing_enabled=False
)
document_ids = [doc.id for doc in documents]
# Act: Execute the task with billing disabled
_document_indexing(dataset.id, document_ids)
# Assert: Verify successful processing
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify documents were updated to parsing status
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
def test_document_indexing_task_document_is_paused_error(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of DocumentIsPausedError from IndexingRunner.
This test verifies:
- DocumentIsPausedError is properly caught and handled
- Task completes without raising exceptions
- Appropriate logging occurs
- Database session is properly closed
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=2
)
document_ids = [doc.id for doc in documents]
# Mock IndexingRunner to raise DocumentIsPausedError
from core.indexing_runner import DocumentIsPausedError
mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = DocumentIsPausedError(
"Document indexing is paused"
)
# Act: Execute the task
_document_indexing(dataset.id, document_ids)
# Assert: Verify exception was handled gracefully
# The task should complete without raising exceptions
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify documents were still updated to parsing status before the exception
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
# ==================== NEW TESTS FOR REFACTORED FUNCTIONS ====================
def test_old_document_indexing_task_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test document_indexing_task basic functionality.
This test verifies:
- Task function calls the wrapper correctly
- Basic parameter passing works
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
document_ids = [doc.id for doc in documents]
# Act: Execute the deprecated task (it only takes 2 parameters)
document_indexing_task(dataset.id, document_ids)
# Assert: Verify processing occurred (core logic is tested in _document_indexing tests)
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
def test_normal_document_indexing_task_success(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test normal_document_indexing_task basic functionality.
This test verifies:
- Task function calls the wrapper correctly
- Basic parameter passing works
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
document_ids = [doc.id for doc in documents]
tenant_id = dataset.tenant_id
# Act: Execute the new normal task
normal_document_indexing_task(tenant_id, dataset.id, document_ids)
# Assert: Verify processing occurred (core logic is tested in _document_indexing tests)
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
def test_priority_document_indexing_task_success(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test priority_document_indexing_task basic functionality.
This test verifies:
- Task function calls the wrapper correctly
- Basic parameter passing works
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
document_ids = [doc.id for doc in documents]
tenant_id = dataset.tenant_id
# Act: Execute the new priority task
priority_document_indexing_task(tenant_id, dataset.id, document_ids)
# Assert: Verify processing occurred (core logic is tested in _document_indexing tests)
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
def test_document_indexing_with_tenant_queue_success(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test _document_indexing_with_tenant_queue function with no waiting tasks.
This test verifies:
- Core indexing logic execution (same as _document_indexing)
- Tenant queue cleanup when no waiting tasks
- Task function parameter passing
- Queue management after processing
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=2
)
document_ids = [doc.id for doc in documents]
tenant_id = dataset.tenant_id
# Mock the task function
from unittest.mock import MagicMock
mock_task_func = MagicMock()
# Act: Execute the wrapper function
_document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func)
# Assert: Verify core processing occurred (same as _document_indexing)
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify documents were updated (same as _document_indexing)
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
# Verify the run method was called with correct documents
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
assert call_args is not None
processed_documents = call_args[0][0]
assert len(processed_documents) == 2
# Verify task function was not called (no waiting tasks)
mock_task_func.delay.assert_not_called()
def test_document_indexing_with_tenant_queue_with_waiting_tasks(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test _document_indexing_with_tenant_queue function with waiting tasks in queue using real Redis.
This test verifies:
- Core indexing logic execution
- Real Redis-based tenant queue processing of waiting tasks
- Task function calls for waiting tasks
- Queue management with multiple tasks using actual Redis operations
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
document_ids = [doc.id for doc in documents]
tenant_id = dataset.tenant_id
dataset_id = dataset.id
# Mock the task function
from unittest.mock import MagicMock
mock_task_func = MagicMock()
# Use real Redis for TenantIsolatedTaskQueue
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
# Create real queue instance
queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing")
# Add waiting tasks to the real Redis queue
waiting_tasks = [
DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-1"]),
DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-2"]),
]
# Convert DocumentTask objects to dictionaries for serialization
waiting_task_dicts = [asdict(task) for task in waiting_tasks]
queue.push_tasks(waiting_task_dicts)
# Act: Execute the wrapper function
_document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func)
# Assert: Verify core processing occurred
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify task function was called for each waiting task
assert mock_task_func.delay.call_count == 1
# Verify correct parameters for each call
calls = mock_task_func.delay.call_args_list
assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
# Verify queue is empty after processing (tasks were pulled)
remaining_tasks = queue.pull_tasks(count=10) # Pull more than we added
assert len(remaining_tasks) == 1
def test_document_indexing_with_tenant_queue_error_handling(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test error handling in _document_indexing_with_tenant_queue using real Redis.
This test verifies:
- Exception handling during core processing
- Tenant queue cleanup even on errors using real Redis
- Proper error logging
- Function completes without raising exceptions
- Queue management continues despite core processing errors
"""
# Arrange: Create test data
dataset, documents = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
document_ids = [doc.id for doc in documents]
tenant_id = dataset.tenant_id
dataset_id = dataset.id
# Mock IndexingRunner to raise an exception
mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = Exception("Test error")
# Mock the task function
from unittest.mock import MagicMock
mock_task_func = MagicMock()
# Use real Redis for TenantIsolatedTaskQueue
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
# Create real queue instance
queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing")
# Add waiting task to the real Redis queue
waiting_task = DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-1"])
queue.push_tasks([asdict(waiting_task)])
# Act: Execute the wrapper function
_document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func)
# Assert: Verify error was handled gracefully
# The function should not raise exceptions
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify documents were still updated to parsing status before the exception
# Re-query documents from database since _document_indexing uses a different session
for doc_id in document_ids:
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
assert updated_document.indexing_status == "parsing"
assert updated_document.processing_started_at is not None
# Verify waiting task was still processed despite core processing error
mock_task_func.delay.assert_called_once()
# Verify correct parameters for the call
call = mock_task_func.delay.call_args
assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 0
def test_document_indexing_with_tenant_queue_tenant_isolation(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test tenant isolation in _document_indexing_with_tenant_queue using real Redis.
This test verifies:
- Different tenants have isolated queues
- Tasks from one tenant don't affect another tenant's queue
- Queue operations are properly scoped to tenant
"""
# Arrange: Create test data for two different tenants
dataset1, documents1 = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
dataset2, documents2 = self._create_test_dataset_and_documents(
db_session_with_containers, mock_external_service_dependencies, document_count=1
)
tenant1_id = dataset1.tenant_id
tenant2_id = dataset2.tenant_id
dataset1_id = dataset1.id
dataset2_id = dataset2.id
document_ids1 = [doc.id for doc in documents1]
document_ids2 = [doc.id for doc in documents2]
# Mock the task function
from unittest.mock import MagicMock
mock_task_func = MagicMock()
# Use real Redis for TenantIsolatedTaskQueue
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
# Create queue instances for both tenants
queue1 = TenantIsolatedTaskQueue(tenant1_id, "document_indexing")
queue2 = TenantIsolatedTaskQueue(tenant2_id, "document_indexing")
# Add waiting tasks to both queues
waiting_task1 = DocumentTask(tenant_id=tenant1_id, dataset_id=dataset1.id, document_ids=["tenant1-doc-1"])
waiting_task2 = DocumentTask(tenant_id=tenant2_id, dataset_id=dataset2.id, document_ids=["tenant2-doc-1"])
queue1.push_tasks([asdict(waiting_task1)])
queue2.push_tasks([asdict(waiting_task2)])
# Act: Execute the wrapper function for tenant1 only
_document_indexing_with_tenant_queue(tenant1_id, dataset1.id, document_ids1, mock_task_func)
# Assert: Verify core processing occurred for tenant1
mock_external_service_dependencies["indexing_runner"].assert_called_once()
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
# Verify only tenant1's waiting task was processed
mock_task_func.delay.assert_called_once()
call = mock_task_func.delay.call_args
assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]}
# Verify tenant1's queue is empty
remaining_tasks1 = queue1.pull_tasks(count=10)
assert len(remaining_tasks1) == 0
# Verify tenant2's queue still has its task (isolation)
remaining_tasks2 = queue2.pull_tasks(count=10)
assert len(remaining_tasks2) == 1
# Verify queue keys are different
assert queue1._queue != queue2._queue
assert queue1._task_key != queue2._task_key

View File

@@ -0,0 +1,450 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from core.rag.index_processor.constant.index_type import IndexType
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Dataset, Document, DocumentSegment
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
class TestEnableSegmentsToIndexTask:
"""Integration tests for enable_segments_to_index_task using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.enable_segments_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
):
# Setup mock index processor
mock_processor = MagicMock()
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
yield {
"index_processor_factory": mock_index_processor_factory,
"index_processor": mock_processor,
}
def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
"""
Helper method to create a test dataset and document for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
mock_external_service_dependencies: Mock dependencies
Returns:
tuple: (dataset, document) - Created dataset and document instances
"""
fake = Faker()
# Create account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Create dataset
dataset = Dataset(
id=fake.uuid4(),
tenant_id=tenant.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
data_source_type="upload_file",
indexing_technique="high_quality",
created_by=account.id,
)
db.session.add(dataset)
db.session.commit()
# Create document
document = Document(
id=fake.uuid4(),
tenant_id=tenant.id,
dataset_id=dataset.id,
position=1,
data_source_type="upload_file",
batch="test_batch",
name=fake.file_name(),
created_from="upload_file",
created_by=account.id,
indexing_status="completed",
enabled=True,
doc_form=IndexType.PARAGRAPH_INDEX,
)
db.session.add(document)
db.session.commit()
# Refresh dataset to ensure doc_form property works correctly
db.session.refresh(dataset)
return dataset, document
def _create_test_segments(
self, db_session_with_containers, document, dataset, count=3, enabled=False, status="completed"
):
"""
Helper method to create test document segments.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
document: Document instance
dataset: Dataset instance
count: Number of segments to create
enabled: Whether segments should be enabled
status: Status of the segments
Returns:
list: List of created DocumentSegment instances
"""
fake = Faker()
segments = []
for i in range(count):
text = fake.text(max_nb_chars=200)
segment = DocumentSegment(
id=fake.uuid4(),
tenant_id=document.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
position=i,
content=text,
word_count=len(text.split()),
tokens=len(text.split()) * 2,
index_node_id=f"node_{i}",
index_node_hash=f"hash_{i}",
enabled=enabled,
status=status,
created_by=document.created_by,
)
db.session.add(segment)
segments.append(segment)
db.session.commit()
return segments
def test_enable_segments_to_index_with_different_index_type(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test segments indexing with different index types.
This test verifies:
- Proper handling of different index types
- Index processor factory integration
- Document processing with various configurations
- Redis cache key deletion
"""
# Arrange: Create test data with different index type
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Update document to use different index type
document.doc_form = IndexType.QA_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
db.session.refresh(dataset)
# Create segments
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify different index type handling
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3
# Verify Redis cache keys were deleted
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0
def test_enable_segments_to_index_dataset_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of non-existent dataset.
This test verifies:
- Proper error handling for missing datasets
- Early return without processing
- Database session cleanup
- No unnecessary index processor calls
"""
# Arrange: Use non-existent dataset ID
fake = Faker()
non_existent_dataset_id = fake.uuid4()
non_existent_document_id = fake.uuid4()
segment_ids = [fake.uuid4()]
# Act: Execute the task with non-existent dataset
enable_segments_to_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_document_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of non-existent document.
This test verifies:
- Proper error handling for missing documents
- Early return without processing
- Database session cleanup
- No unnecessary index processor calls
"""
# Arrange: Create dataset but use non-existent document ID
dataset, _ = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
fake = Faker()
non_existent_document_id = fake.uuid4()
segment_ids = [fake.uuid4()]
# Act: Execute the task with non-existent document
enable_segments_to_index_task(segment_ids, dataset.id, non_existent_document_id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_invalid_document_status(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling of document with invalid status.
This test verifies:
- Early return when document is disabled, archived, or not completed
- No index processing for documents not ready for indexing
- Proper database session cleanup
- No unnecessary external service calls
"""
# Arrange: Create test data with invalid document status
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Test different invalid statuses
invalid_statuses = [
("disabled", {"enabled": False}),
("archived", {"archived": True}),
("not_completed", {"indexing_status": "processing"}),
]
for _, status_attrs in invalid_statuses:
# Reset document status
document.enabled = True
document.archived = False
document.indexing_status = "completed"
db.session.commit()
# Set invalid status
for attr, value in status_attrs.items():
setattr(document, attr, value)
db.session.commit()
# Create segments
segments = self._create_test_segments(db_session_with_containers, document, dataset)
segment_ids = [segment.id for segment in segments]
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify no processing occurred
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
mock_external_service_dependencies["index_processor"].load.assert_not_called()
# Clean up segments for next iteration
for segment in segments:
db.session.delete(segment)
db.session.commit()
def test_enable_segments_to_index_segments_not_found(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test handling when no segments are found.
This test verifies:
- Proper handling when segments don't exist
- Early return without processing
- Database session cleanup
- Index processor is created but load is not called
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Use non-existent segment IDs
fake = Faker()
non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
# Act: Execute the task with non-existent segments
enable_segments_to_index_task(non_existent_segment_ids, dataset.id, document.id)
# Assert: Verify index processor was created but load was not called
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
mock_external_service_dependencies["index_processor"].load.assert_not_called()
def test_enable_segments_to_index_with_parent_child_structure(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test segments indexing with parent-child structure.
This test verifies:
- Proper handling of PARENT_CHILD_INDEX type
- Child document creation from segments
- Correct document structure for parent-child indexing
- Index processor receives properly structured documents
- Redis cache key deletion
"""
# Arrange: Create test data with parent-child index type
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
# Update document to use parent-child index type
document.doc_form = IndexType.PARENT_CHILD_INDEX
db.session.commit()
# Refresh dataset to ensure doc_form property reflects the updated document
db.session.refresh(dataset)
# Create segments with mock child chunks
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Mock the get_child_chunks method for each segment
with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
# Setup mock to return child chunks for each segment
mock_child_chunks = []
for i in range(2): # Each segment has 2 child chunks
mock_child = MagicMock()
mock_child.content = f"child_content_{i}"
mock_child.index_node_id = f"child_node_{i}"
mock_child.index_node_hash = f"child_hash_{i}"
mock_child_chunks.append(mock_child)
mock_get_child_chunks.return_value = mock_child_chunks
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify parent-child index processing
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
IndexType.PARENT_CHILD_INDEX
)
mock_external_service_dependencies["index_processor"].load.assert_called_once()
# Verify the load method was called with correct parameters
call_args = mock_external_service_dependencies["index_processor"].load.call_args
assert call_args is not None
documents = call_args[0][1] # Second argument should be documents list
assert len(documents) == 3 # 3 segments
# Verify each document has children
for doc in documents:
assert hasattr(doc, "children")
assert len(doc.children) == 2 # Each document has 2 children
# Verify Redis cache keys were deleted
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0
def test_enable_segments_to_index_general_exception_handling(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test general exception handling during indexing process.
This test verifies:
- Exceptions are properly caught and handled
- Segment status is set to error
- Segments are disabled
- Error information is recorded
- Redis cache is still cleared
- Database session is properly closed
"""
# Arrange: Create test data
dataset, document = self._create_test_dataset_and_document(
db_session_with_containers, mock_external_service_dependencies
)
segments = self._create_test_segments(db_session_with_containers, document, dataset)
# Set up Redis cache keys
segment_ids = [segment.id for segment in segments]
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
redis_client.set(indexing_cache_key, "processing", ex=300)
# Mock the index processor to raise an exception
mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
# Act: Execute the task
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
# Assert: Verify error handling
for segment in segments:
db.session.refresh(segment)
assert segment.enabled is False
assert segment.status == "error"
assert segment.error is not None
assert "Index processing failed" in segment.error
assert segment.disabled_at is not None
# Verify Redis cache keys were still cleared despite error
for segment in segments:
indexing_cache_key = f"segment_{segment.id}_indexing"
assert redis_client.exists(indexing_cache_key) == 0

View File

@@ -0,0 +1,242 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from extensions.ext_database import db
from libs.email_i18n import EmailType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from tasks.mail_account_deletion_task import send_account_deletion_verification_code, send_deletion_success_task
class TestMailAccountDeletionTask:
"""Integration tests for mail account deletion tasks using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.mail_account_deletion_task.mail") as mock_mail,
patch("tasks.mail_account_deletion_task.get_email_i18n_service") as mock_get_email_service,
):
# Setup mock mail service
mock_mail.is_inited.return_value = True
# Setup mock email service
mock_email_service = MagicMock()
mock_get_email_service.return_value = mock_email_service
yield {
"mail": mock_mail,
"get_email_service": mock_get_email_service,
"email_service": mock_email_service,
}
def _create_test_account(self, db_session_with_containers):
"""
Helper method to create a test account for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
Account: Created account instance
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
# Create tenant
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
return account
def test_send_deletion_success_task_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful account deletion success email sending.
This test verifies:
- Proper email service initialization check
- Correct email service method calls
- Template context is properly formatted
- Email type is correctly specified
"""
# Arrange: Create test data
account = self._create_test_account(db_session_with_containers)
test_email = account.email
test_language = "en-US"
# Act: Execute the task
send_deletion_success_task(test_email, test_language)
# Assert: Verify the expected outcomes
# Verify mail service was checked
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
# Verify email service was retrieved
mock_external_service_dependencies["get_email_service"].assert_called_once()
# Verify email was sent with correct parameters
mock_external_service_dependencies["email_service"].send_email.assert_called_once_with(
email_type=EmailType.ACCOUNT_DELETION_SUCCESS,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"email": test_email,
},
)
def test_send_deletion_success_task_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test account deletion success email when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls are made
- No exceptions are raised
"""
# Arrange: Setup mail service to return not initialized
mock_external_service_dependencies["mail"].is_inited.return_value = False
account = self._create_test_account(db_session_with_containers)
test_email = account.email
# Act: Execute the task
send_deletion_success_task(test_email)
# Assert: Verify no email service calls were made
mock_external_service_dependencies["get_email_service"].assert_not_called()
mock_external_service_dependencies["email_service"].send_email.assert_not_called()
def test_send_deletion_success_task_email_service_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test account deletion success email when email service raises exception.
This test verifies:
- Exception is properly caught and logged
- Task completes without raising exception
- Error logging is recorded
"""
# Arrange: Setup email service to raise exception
mock_external_service_dependencies["email_service"].send_email.side_effect = Exception("Email service failed")
account = self._create_test_account(db_session_with_containers)
test_email = account.email
# Act: Execute the task (should not raise exception)
send_deletion_success_task(test_email)
# Assert: Verify email service was called but exception was handled
mock_external_service_dependencies["email_service"].send_email.assert_called_once()
def test_send_account_deletion_verification_code_success(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful account deletion verification code email sending.
This test verifies:
- Proper email service initialization check
- Correct email service method calls
- Template context includes verification code
- Email type is correctly specified
"""
# Arrange: Create test data
account = self._create_test_account(db_session_with_containers)
test_email = account.email
test_code = "123456"
test_language = "en-US"
# Act: Execute the task
send_account_deletion_verification_code(test_email, test_code, test_language)
# Assert: Verify the expected outcomes
# Verify mail service was checked
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
# Verify email service was retrieved
mock_external_service_dependencies["get_email_service"].assert_called_once()
# Verify email was sent with correct parameters
mock_external_service_dependencies["email_service"].send_email.assert_called_once_with(
email_type=EmailType.ACCOUNT_DELETION_VERIFICATION,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"code": test_code,
},
)
def test_send_account_deletion_verification_code_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test account deletion verification code email when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls are made
- No exceptions are raised
"""
# Arrange: Setup mail service to return not initialized
mock_external_service_dependencies["mail"].is_inited.return_value = False
account = self._create_test_account(db_session_with_containers)
test_email = account.email
test_code = "123456"
# Act: Execute the task
send_account_deletion_verification_code(test_email, test_code)
# Assert: Verify no email service calls were made
mock_external_service_dependencies["get_email_service"].assert_not_called()
mock_external_service_dependencies["email_service"].send_email.assert_not_called()
def test_send_account_deletion_verification_code_email_service_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test account deletion verification code email when email service raises exception.
This test verifies:
- Exception is properly caught and logged
- Task completes without raising exception
- Error logging is recorded
"""
# Arrange: Setup email service to raise exception
mock_external_service_dependencies["email_service"].send_email.side_effect = Exception("Email service failed")
account = self._create_test_account(db_session_with_containers)
test_email = account.email
test_code = "123456"
# Act: Execute the task (should not raise exception)
send_account_deletion_verification_code(test_email, test_code)
# Assert: Verify email service was called but exception was handled
mock_external_service_dependencies["email_service"].send_email.assert_called_once()

View File

@@ -0,0 +1,282 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from libs.email_i18n import EmailType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from tasks.mail_change_mail_task import send_change_mail_completed_notification_task, send_change_mail_task
class TestMailChangeMailTask:
"""Integration tests for mail_change_mail_task using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.mail_change_mail_task.mail") as mock_mail,
patch("tasks.mail_change_mail_task.get_email_i18n_service") as mock_get_email_i18n_service,
):
# Setup mock mail service
mock_mail.is_inited.return_value = True
# Setup mock email i18n service
mock_email_service = MagicMock()
mock_get_email_i18n_service.return_value = mock_email_service
yield {
"mail": mock_mail,
"email_i18n_service": mock_email_service,
"get_email_i18n_service": mock_get_email_i18n_service,
}
def _create_test_account(self, db_session_with_containers):
"""
Helper method to create a test account for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
Account: Created account instance
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db_session_with_containers.add(account)
db_session_with_containers.commit()
# Create tenant
tenant = Tenant(
name=fake.company(),
status="normal",
)
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db_session_with_containers.add(join)
db_session_with_containers.commit()
return account
def test_send_change_mail_task_success_old_email_phase(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful change email task execution for old_email phase.
This test verifies:
- Proper mail service initialization check
- Correct email service method call with old_email phase
- Successful task completion
"""
# Arrange: Create test data
account = self._create_test_account(db_session_with_containers)
test_language = "en-US"
test_email = account.email
test_code = "123456"
test_phase = "old_email"
# Act: Execute the task
send_change_mail_task(test_language, test_email, test_code, test_phase)
# Assert: Verify the expected outcomes
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_called_once_with(
language_code=test_language,
to=test_email,
code=test_code,
phase=test_phase,
)
def test_send_change_mail_task_success_new_email_phase(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful change email task execution for new_email phase.
This test verifies:
- Proper mail service initialization check
- Correct email service method call with new_email phase
- Successful task completion
"""
# Arrange: Create test data
account = self._create_test_account(db_session_with_containers)
test_language = "zh-Hans"
test_email = "new@example.com"
test_code = "789012"
test_phase = "new_email"
# Act: Execute the task
send_change_mail_task(test_language, test_email, test_code, test_phase)
# Assert: Verify the expected outcomes
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_called_once_with(
language_code=test_language,
to=test_email,
code=test_code,
phase=test_phase,
)
def test_send_change_mail_task_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test change email task when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls when mail is not available
"""
# Arrange: Setup mail service as not initialized
mock_external_service_dependencies["mail"].is_inited.return_value = False
test_language = "en-US"
test_email = "test@example.com"
test_code = "123456"
test_phase = "old_email"
# Act: Execute the task
send_change_mail_task(test_language, test_email, test_code, test_phase)
# Assert: Verify no email service calls
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_not_called()
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_not_called()
def test_send_change_mail_task_email_service_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test change email task when email service raises an exception.
This test verifies:
- Exception is properly caught and logged
- Task completes without raising exception
"""
# Arrange: Setup email service to raise exception
mock_external_service_dependencies["email_i18n_service"].send_change_email.side_effect = Exception(
"Email service failed"
)
test_language = "en-US"
test_email = "test@example.com"
test_code = "123456"
test_phase = "old_email"
# Act: Execute the task (should not raise exception)
send_change_mail_task(test_language, test_email, test_code, test_phase)
# Assert: Verify email service was called despite exception
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_called_once_with(
language_code=test_language,
to=test_email,
code=test_code,
phase=test_phase,
)
def test_send_change_mail_completed_notification_task_success(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful change email completed notification task execution.
This test verifies:
- Proper mail service initialization check
- Correct email service method call with CHANGE_EMAIL_COMPLETED type
- Template context is properly constructed
- Successful task completion
"""
# Arrange: Create test data
account = self._create_test_account(db_session_with_containers)
test_language = "en-US"
test_email = account.email
# Act: Execute the task
send_change_mail_completed_notification_task(test_language, test_email)
# Assert: Verify the expected outcomes
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
mock_external_service_dependencies["email_i18n_service"].send_email.assert_called_once_with(
email_type=EmailType.CHANGE_EMAIL_COMPLETED,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"email": test_email,
},
)
def test_send_change_mail_completed_notification_task_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test change email completed notification task when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls when mail is not available
"""
# Arrange: Setup mail service as not initialized
mock_external_service_dependencies["mail"].is_inited.return_value = False
test_language = "en-US"
test_email = "test@example.com"
# Act: Execute the task
send_change_mail_completed_notification_task(test_language, test_email)
# Assert: Verify no email service calls
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_not_called()
mock_external_service_dependencies["email_i18n_service"].send_email.assert_not_called()
def test_send_change_mail_completed_notification_task_email_service_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test change email completed notification task when email service raises an exception.
This test verifies:
- Exception is properly caught and logged
- Task completes without raising exception
"""
# Arrange: Setup email service to raise exception
mock_external_service_dependencies["email_i18n_service"].send_email.side_effect = Exception(
"Email service failed"
)
test_language = "en-US"
test_email = "test@example.com"
# Act: Execute the task (should not raise exception)
send_change_mail_completed_notification_task(test_language, test_email)
# Assert: Verify email service was called despite exception
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
mock_external_service_dependencies["email_i18n_service"].send_email.assert_called_once_with(
email_type=EmailType.CHANGE_EMAIL_COMPLETED,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"email": test_email,
},
)

View File

@@ -0,0 +1,598 @@
"""
TestContainers-based integration tests for send_email_code_login_mail_task.
This module provides comprehensive integration tests for the email code login mail task
using TestContainers infrastructure. The tests ensure that the task properly sends
email verification codes for login with internationalization support and handles
various error scenarios in a real database environment.
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing scenarios with actual PostgreSQL and Redis instances.
"""
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from libs.email_i18n import EmailType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from tasks.mail_email_code_login import send_email_code_login_mail_task
class TestSendEmailCodeLoginMailTask:
"""
Comprehensive integration tests for send_email_code_login_mail_task using testcontainers.
This test class covers all major functionality of the email code login mail task:
- Successful email sending with different languages
- Email service integration and template rendering
- Error handling for various failure scenarios
- Performance metrics and logging verification
- Edge cases and boundary conditions
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing environment with actual database interactions.
"""
@pytest.fixture(autouse=True)
def cleanup_database(self, db_session_with_containers):
"""Clean up database before each test to ensure isolation."""
from extensions.ext_redis import redis_client
# Clear all test data
db_session_with_containers.query(TenantAccountJoin).delete()
db_session_with_containers.query(Tenant).delete()
db_session_with_containers.query(Account).delete()
db_session_with_containers.commit()
# Clear Redis cache
redis_client.flushdb()
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.mail_email_code_login.mail") as mock_mail,
patch("tasks.mail_email_code_login.get_email_i18n_service") as mock_email_service,
):
# Setup default mock returns
mock_mail.is_inited.return_value = True
# Mock email service
mock_email_service_instance = MagicMock()
mock_email_service_instance.send_email.return_value = None
mock_email_service.return_value = mock_email_service_instance
yield {
"mail": mock_mail,
"email_service": mock_email_service,
"email_service_instance": mock_email_service_instance,
}
def _create_test_account(self, db_session_with_containers, fake=None):
"""
Helper method to create a test account for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
fake: Faker instance for generating test data
Returns:
Account: Created account instance
"""
if fake is None:
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db_session_with_containers.add(account)
db_session_with_containers.commit()
return account
def _create_test_tenant_and_account(self, db_session_with_containers, fake=None):
"""
Helper method to create a test tenant and account for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
fake: Faker instance for generating test data
Returns:
tuple: (Account, Tenant) created instances
"""
if fake is None:
fake = Faker()
# Create account using the existing helper method
account = self._create_test_account(db_session_with_containers, fake)
# Create tenant
tenant = Tenant(
name=fake.company(),
plan="basic",
status="active",
)
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
# Create tenant-account relationship
tenant_account_join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
)
db_session_with_containers.add(tenant_account_join)
db_session_with_containers.commit()
return account, tenant
def test_send_email_code_login_mail_task_success_english(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful email code login mail sending in English.
This test verifies that the task can successfully:
1. Send email code login mail with English language
2. Use proper email service integration
3. Pass correct template context to email service
4. Log performance metrics correctly
5. Complete task execution without errors
"""
# Arrange: Setup test data
fake = Faker()
test_email = fake.email()
test_code = "123456"
test_language = "en-US"
# Act: Execute the task
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify expected outcomes
mock_mail = mock_external_service_dependencies["mail"]
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify mail service was checked for initialization
mock_mail.is_inited.assert_called_once()
# Verify email service was called with correct parameters
mock_email_service_instance.send_email.assert_called_once_with(
email_type=EmailType.EMAIL_CODE_LOGIN,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"code": test_code,
},
)
def test_send_email_code_login_mail_task_success_chinese(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful email code login mail sending in Chinese.
This test verifies that the task can successfully:
1. Send email code login mail with Chinese language
2. Handle different language codes properly
3. Use correct template context for Chinese emails
4. Complete task execution without errors
"""
# Arrange: Setup test data
fake = Faker()
test_email = fake.email()
test_code = "789012"
test_language = "zh-Hans"
# Act: Execute the task
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify expected outcomes
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify email service was called with Chinese language
mock_email_service_instance.send_email.assert_called_once_with(
email_type=EmailType.EMAIL_CODE_LOGIN,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"code": test_code,
},
)
def test_send_email_code_login_mail_task_success_multiple_languages(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test successful email code login mail sending with multiple languages.
This test verifies that the task can successfully:
1. Handle various language codes correctly
2. Send emails with different language configurations
3. Maintain proper template context for each language
4. Complete multiple task executions without conflicts
"""
# Arrange: Setup test data
fake = Faker()
test_languages = ["en-US", "zh-Hans", "zh-CN", "ja-JP", "ko-KR"]
test_emails = [fake.email() for _ in test_languages]
test_codes = [fake.numerify("######") for _ in test_languages]
# Act: Execute the task for each language
for i, language in enumerate(test_languages):
send_email_code_login_mail_task(
language=language,
to=test_emails[i],
code=test_codes[i],
)
# Assert: Verify expected outcomes
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify email service was called for each language
assert mock_email_service_instance.send_email.call_count == len(test_languages)
# Verify each call had correct parameters
for i, language in enumerate(test_languages):
call_args = mock_email_service_instance.send_email.call_args_list[i]
assert call_args[1]["email_type"] == EmailType.EMAIL_CODE_LOGIN
assert call_args[1]["language_code"] == language
assert call_args[1]["to"] == test_emails[i]
assert call_args[1]["template_context"]["code"] == test_codes[i]
def test_send_email_code_login_mail_task_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email code login mail task when mail service is not initialized.
This test verifies that the task can properly:
1. Check mail service initialization status
2. Return early when mail is not initialized
3. Not attempt to send email when service is unavailable
4. Handle gracefully without errors
"""
# Arrange: Setup test data
fake = Faker()
test_email = fake.email()
test_code = "123456"
test_language = "en-US"
# Mock mail service as not initialized
mock_mail = mock_external_service_dependencies["mail"]
mock_mail.is_inited.return_value = False
# Act: Execute the task
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify expected outcomes
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify mail service was checked for initialization
mock_mail.is_inited.assert_called_once()
# Verify email service was not called
mock_email_service_instance.send_email.assert_not_called()
def test_send_email_code_login_mail_task_email_service_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email code login mail task when email service raises an exception.
This test verifies that the task can properly:
1. Handle email service exceptions gracefully
2. Log appropriate error messages
3. Continue execution without crashing
4. Maintain proper error handling
"""
# Arrange: Setup test data
fake = Faker()
test_email = fake.email()
test_code = "123456"
test_language = "en-US"
# Mock email service to raise an exception
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
mock_email_service_instance.send_email.side_effect = Exception("Email service unavailable")
# Act: Execute the task - it should handle the exception gracefully
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify expected outcomes
mock_mail = mock_external_service_dependencies["mail"]
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify mail service was checked for initialization
mock_mail.is_inited.assert_called_once()
# Verify email service was called (and failed)
mock_email_service_instance.send_email.assert_called_once_with(
email_type=EmailType.EMAIL_CODE_LOGIN,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"code": test_code,
},
)
def test_send_email_code_login_mail_task_invalid_parameters(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email code login mail task with invalid parameters.
This test verifies that the task can properly:
1. Handle empty or None email addresses
2. Process empty or None verification codes
3. Handle invalid language codes
4. Maintain proper error handling for invalid inputs
"""
# Arrange: Setup test data
fake = Faker()
test_language = "en-US"
# Test cases for invalid parameters
invalid_test_cases = [
{"email": "", "code": "123456", "description": "empty email"},
{"email": None, "code": "123456", "description": "None email"},
{"email": fake.email(), "code": "", "description": "empty code"},
{"email": fake.email(), "code": None, "description": "None code"},
{"email": "invalid-email", "code": "123456", "description": "invalid email format"},
]
for test_case in invalid_test_cases:
# Reset mocks for each test case
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
mock_email_service_instance.reset_mock()
# Act: Execute the task with invalid parameters
send_email_code_login_mail_task(
language=test_language,
to=test_case["email"],
code=test_case["code"],
)
# Assert: Verify that email service was still called
# The task should pass parameters to email service as-is
# and let the email service handle validation
mock_email_service_instance.send_email.assert_called_once()
def test_send_email_code_login_mail_task_edge_cases(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email code login mail task with edge cases and boundary conditions.
This test verifies that the task can properly:
1. Handle very long email addresses
2. Process very long verification codes
3. Handle special characters in parameters
4. Process extreme language codes
"""
# Arrange: Setup test data
fake = Faker()
test_language = "en-US"
# Edge case test data
edge_cases = [
{
"email": "a" * 100 + "@example.com", # Very long email
"code": "1" * 20, # Very long code
"description": "very long email and code",
},
{
"email": "test+tag@example.com", # Email with special characters
"code": "123-456", # Code with special characters
"description": "special characters",
},
{
"email": "test@sub.domain.example.com", # Complex domain
"code": "000000", # All zeros
"description": "complex domain and all zeros code",
},
{
"email": "test@example.co.uk", # International domain
"code": "999999", # All nines
"description": "international domain and all nines code",
},
]
for test_case in edge_cases:
# Reset mocks for each test case
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
mock_email_service_instance.reset_mock()
# Act: Execute the task with edge case data
send_email_code_login_mail_task(
language=test_language,
to=test_case["email"],
code=test_case["code"],
)
# Assert: Verify that email service was called with edge case data
mock_email_service_instance.send_email.assert_called_once_with(
email_type=EmailType.EMAIL_CODE_LOGIN,
language_code=test_language,
to=test_case["email"],
template_context={
"to": test_case["email"],
"code": test_case["code"],
},
)
def test_send_email_code_login_mail_task_database_integration(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email code login mail task with database integration.
This test verifies that the task can properly:
1. Work with real database connections
2. Handle database session management
3. Maintain proper database state
4. Complete without database-related errors
"""
# Arrange: Setup test data with database
fake = Faker()
account, tenant = self._create_test_tenant_and_account(db_session_with_containers, fake)
test_email = account.email
test_code = "123456"
test_language = "en-US"
# Act: Execute the task
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify expected outcomes
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify email service was called with database account email
mock_email_service_instance.send_email.assert_called_once_with(
email_type=EmailType.EMAIL_CODE_LOGIN,
language_code=test_language,
to=test_email,
template_context={
"to": test_email,
"code": test_code,
},
)
# Verify database state is maintained
db_session_with_containers.refresh(account)
assert account.email == test_email
assert account.status == "active"
def test_send_email_code_login_mail_task_redis_integration(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email code login mail task with Redis integration.
This test verifies that the task can properly:
1. Work with Redis cache connections
2. Handle Redis operations without errors
3. Maintain proper cache state
4. Complete without Redis-related errors
"""
# Arrange: Setup test data
fake = Faker()
test_email = fake.email()
test_code = "123456"
test_language = "en-US"
# Setup Redis cache data
from extensions.ext_redis import redis_client
cache_key = f"email_code_login_test_{test_email}"
redis_client.set(cache_key, "test_value", ex=300)
# Act: Execute the task
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify expected outcomes
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
# Verify email service was called
mock_email_service_instance.send_email.assert_called_once()
# Verify Redis cache is still accessible
assert redis_client.exists(cache_key) == 1
assert redis_client.get(cache_key) == b"test_value"
# Clean up Redis cache
redis_client.delete(cache_key)
def test_send_email_code_login_mail_task_error_handling_comprehensive(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test comprehensive error handling for email code login mail task.
This test verifies that the task can properly:
1. Handle various types of exceptions
2. Log appropriate error messages
3. Continue execution despite errors
4. Maintain proper error reporting
"""
# Arrange: Setup test data
fake = Faker()
test_email = fake.email()
test_code = "123456"
test_language = "en-US"
# Test different exception types
exception_types = [
("ValueError", ValueError("Invalid email format")),
("RuntimeError", RuntimeError("Service unavailable")),
("ConnectionError", ConnectionError("Network error")),
("TimeoutError", TimeoutError("Request timeout")),
("Exception", Exception("Generic error")),
]
for error_name, exception in exception_types:
# Reset mocks for each test case
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
mock_email_service_instance.reset_mock()
mock_email_service_instance.send_email.side_effect = exception
# Mock logging to capture error messages
with patch("tasks.mail_email_code_login.logger") as mock_logger:
# Act: Execute the task - it should handle the exception gracefully
send_email_code_login_mail_task(
language=test_language,
to=test_email,
code=test_code,
)
# Assert: Verify error handling
# Verify email service was called (and failed)
mock_email_service_instance.send_email.assert_called_once()
# Verify error was logged
error_calls = [
call
for call in mock_logger.exception.call_args_list
if f"Send email code login mail to {test_email} failed" in str(call)
]
# Check if any exception call was made (the exact message format may vary)
assert mock_logger.exception.call_count >= 1, f"Error should be logged for {error_name}"
# Reset side effect for next iteration
mock_email_service_instance.send_email.side_effect = None

View File

@@ -0,0 +1,261 @@
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from tasks.mail_inner_task import send_inner_email_task
class TestMailInnerTask:
"""Integration tests for send_inner_email_task using testcontainers."""
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.mail_inner_task.mail") as mock_mail,
patch("tasks.mail_inner_task.get_email_i18n_service") as mock_get_email_i18n_service,
patch("tasks.mail_inner_task._render_template_with_strategy") as mock_render_template,
):
# Setup mock mail service
mock_mail.is_inited.return_value = True
# Setup mock email i18n service
mock_email_service = MagicMock()
mock_get_email_i18n_service.return_value = mock_email_service
# Setup mock template rendering
mock_render_template.return_value = "<html>Test email content</html>"
yield {
"mail": mock_mail,
"email_service": mock_email_service,
"render_template": mock_render_template,
}
def _create_test_email_data(self, fake: Faker) -> dict:
"""
Helper method to create test email data for testing.
Args:
fake: Faker instance for generating test data
Returns:
dict: Test email data including recipients, subject, body, and substitutions
"""
return {
"to": [fake.email() for _ in range(3)],
"subject": fake.sentence(nb_words=4),
"body": "Hello {{name}}, this is a test email from {{company}}.",
"substitutions": {
"name": fake.name(),
"company": fake.company(),
"date": fake.date(),
},
}
def test_send_inner_email_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful email sending with valid data.
This test verifies:
- Proper email service initialization check
- Template rendering with substitutions
- Email service integration
- Multiple recipient handling
"""
# Arrange: Create test data
fake = Faker()
email_data = self._create_test_email_data(fake)
# Act: Execute the task
send_inner_email_task(
to=email_data["to"],
subject=email_data["subject"],
body=email_data["body"],
substitutions=email_data["substitutions"],
)
# Assert: Verify the expected outcomes
# Verify mail service was checked for initialization
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
# Verify template rendering was called with correct parameters
mock_external_service_dependencies["render_template"].assert_called_once_with(
email_data["body"], email_data["substitutions"]
)
# Verify email service was called once with the full recipient list
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_raw_email.assert_called_once_with(
to=email_data["to"],
subject=email_data["subject"],
html_content="<html>Test email content</html>",
)
def test_send_inner_email_single_recipient(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test email sending with single recipient.
This test verifies:
- Single recipient handling
- Template rendering
- Email service integration
"""
# Arrange: Create test data with single recipient
fake = Faker()
email_data = {
"to": [fake.email()],
"subject": fake.sentence(nb_words=3),
"body": "Welcome {{user_name}}!",
"substitutions": {
"user_name": fake.name(),
},
}
# Act: Execute the task
send_inner_email_task(
to=email_data["to"],
subject=email_data["subject"],
body=email_data["body"],
substitutions=email_data["substitutions"],
)
# Assert: Verify the expected outcomes
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_raw_email.assert_called_once_with(
to=email_data["to"],
subject=email_data["subject"],
html_content="<html>Test email content</html>",
)
def test_send_inner_email_empty_substitutions(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test email sending with empty substitutions.
This test verifies:
- Template rendering with empty substitutions
- Email service integration
- Handling of minimal template context
"""
# Arrange: Create test data with empty substitutions
fake = Faker()
email_data = {
"to": [fake.email()],
"subject": fake.sentence(nb_words=3),
"body": "This is a simple email without variables.",
"substitutions": {},
}
# Act: Execute the task
send_inner_email_task(
to=email_data["to"],
subject=email_data["subject"],
body=email_data["body"],
substitutions=email_data["substitutions"],
)
# Assert: Verify the expected outcomes
mock_external_service_dependencies["render_template"].assert_called_once_with(email_data["body"], {})
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_raw_email.assert_called_once_with(
to=email_data["to"],
subject=email_data["subject"],
html_content="<html>Test email content</html>",
)
def test_send_inner_email_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email sending when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No template rendering occurs
- No email service calls
- No exceptions raised
"""
# Arrange: Setup mail service as not initialized
mock_external_service_dependencies["mail"].is_inited.return_value = False
fake = Faker()
email_data = self._create_test_email_data(fake)
# Act: Execute the task
send_inner_email_task(
to=email_data["to"],
subject=email_data["subject"],
body=email_data["body"],
substitutions=email_data["substitutions"],
)
# Assert: Verify no processing occurred
mock_external_service_dependencies["render_template"].assert_not_called()
mock_external_service_dependencies["email_service"].send_raw_email.assert_not_called()
def test_send_inner_email_template_rendering_error(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email sending when template rendering fails.
This test verifies:
- Exception handling during template rendering
- No email service calls when template fails
"""
# Arrange: Setup template rendering to raise an exception
mock_external_service_dependencies["render_template"].side_effect = Exception("Template rendering failed")
fake = Faker()
email_data = self._create_test_email_data(fake)
# Act: Execute the task
send_inner_email_task(
to=email_data["to"],
subject=email_data["subject"],
body=email_data["body"],
substitutions=email_data["substitutions"],
)
# Assert: Verify template rendering was attempted
mock_external_service_dependencies["render_template"].assert_called_once()
# Verify no email service calls due to exception
mock_external_service_dependencies["email_service"].send_raw_email.assert_not_called()
def test_send_inner_email_service_error(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test email sending when email service fails.
This test verifies:
- Exception handling during email sending
- Graceful error handling
"""
# Arrange: Setup email service to raise an exception
mock_external_service_dependencies["email_service"].send_raw_email.side_effect = Exception(
"Email service failed"
)
fake = Faker()
email_data = self._create_test_email_data(fake)
# Act: Execute the task
send_inner_email_task(
to=email_data["to"],
subject=email_data["subject"],
body=email_data["body"],
substitutions=email_data["substitutions"],
)
# Assert: Verify template rendering occurred
mock_external_service_dependencies["render_template"].assert_called_once()
# Verify email service was called (and failed)
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_raw_email.assert_called_once_with(
to=email_data["to"],
subject=email_data["subject"],
html_content="<html>Test email content</html>",
)

View File

@@ -0,0 +1,544 @@
"""
Integration tests for mail_invite_member_task using testcontainers.
This module provides integration tests for the invite member email task
using TestContainers infrastructure. The tests ensure that the task properly sends
invitation emails with internationalization support, handles error scenarios,
and integrates correctly with the database and Redis for token management.
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing scenarios with actual PostgreSQL and Redis instances.
"""
import json
import uuid
from datetime import UTC, datetime
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from extensions.ext_redis import redis_client
from libs.email_i18n import EmailType
from models.account import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole
from tasks.mail_invite_member_task import send_invite_member_mail_task
class TestMailInviteMemberTask:
"""
Integration tests for send_invite_member_mail_task using testcontainers.
This test class covers the core functionality of the invite member email task:
- Email sending with proper internationalization
- Template context generation and URL construction
- Error handling for failure scenarios
- Integration with Redis for token validation
- Mail service initialization checks
- Real database integration with actual invitation flow
All tests use the testcontainers infrastructure to ensure proper database isolation
and realistic testing environment with actual database and Redis interactions.
"""
@pytest.fixture(autouse=True)
def cleanup_database(self, db_session_with_containers):
"""Clean up database before each test to ensure isolation."""
# Clear all test data
db_session_with_containers.query(TenantAccountJoin).delete()
db_session_with_containers.query(Tenant).delete()
db_session_with_containers.query(Account).delete()
db_session_with_containers.commit()
# Clear Redis cache
redis_client.flushdb()
@pytest.fixture
def mock_external_service_dependencies(self):
"""Mock setup for external service dependencies."""
with (
patch("tasks.mail_invite_member_task.mail") as mock_mail,
patch("tasks.mail_invite_member_task.get_email_i18n_service") as mock_email_service,
patch("tasks.mail_invite_member_task.dify_config") as mock_config,
):
# Setup mail service mock
mock_mail.is_inited.return_value = True
# Setup email service mock
mock_email_service_instance = MagicMock()
mock_email_service_instance.send_email.return_value = None
mock_email_service.return_value = mock_email_service_instance
# Setup config mock
mock_config.CONSOLE_WEB_URL = "https://console.dify.ai"
yield {
"mail": mock_mail,
"email_service": mock_email_service_instance,
"config": mock_config,
}
def _create_test_account_and_tenant(self, db_session_with_containers):
"""
Helper method to create a test account and tenant for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
tuple: (Account, Tenant) created instances
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
password=fake.password(),
interface_language="en-US",
status=AccountStatus.ACTIVE,
)
account.created_at = datetime.now(UTC)
account.updated_at = datetime.now(UTC)
db_session_with_containers.add(account)
db_session_with_containers.commit()
db_session_with_containers.refresh(account)
# Create tenant
tenant = Tenant(
name=fake.company(),
)
tenant.created_at = datetime.now(UTC)
tenant.updated_at = datetime.now(UTC)
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
db_session_with_containers.refresh(tenant)
# Create tenant member relationship
tenant_join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
)
tenant_join.created_at = datetime.now(UTC)
db_session_with_containers.add(tenant_join)
db_session_with_containers.commit()
return account, tenant
def _create_invitation_token(self, tenant, account):
"""
Helper method to create a valid invitation token in Redis.
Args:
tenant: Tenant instance
account: Account instance
Returns:
str: Generated invitation token
"""
token = str(uuid.uuid4())
invitation_data = {
"account_id": account.id,
"email": account.email,
"workspace_id": tenant.id,
}
cache_key = f"member_invite:token:{token}"
redis_client.setex(cache_key, 24 * 60 * 60, json.dumps(invitation_data)) # 24 hours
return token
def _create_pending_account_for_invitation(self, db_session_with_containers, email, tenant):
"""
Helper method to create a pending account for invitation testing.
Args:
db_session_with_containers: Database session
email: Email address for the account
tenant: Tenant instance
Returns:
Account: Created pending account
"""
account = Account(
email=email,
name=email.split("@")[0],
password="",
interface_language="en-US",
status=AccountStatus.PENDING,
)
account.created_at = datetime.now(UTC)
account.updated_at = datetime.now(UTC)
db_session_with_containers.add(account)
db_session_with_containers.commit()
db_session_with_containers.refresh(account)
# Create tenant member relationship
tenant_join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.NORMAL,
)
tenant_join.created_at = datetime.now(UTC)
db_session_with_containers.add(tenant_join)
db_session_with_containers.commit()
return account
def test_send_invite_member_mail_success(self, db_session_with_containers, mock_external_service_dependencies):
"""
Test successful invitation email sending with all parameters.
This test verifies:
- Email service is called with correct parameters
- Template context includes all required fields
- URL is constructed correctly with token
- Performance logging is recorded
- No exceptions are raised
"""
# Arrange: Create test data
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
invitee_email = "test@example.com"
language = "en-US"
token = self._create_invitation_token(tenant, inviter)
inviter_name = inviter.name
workspace_name = tenant.name
# Act: Execute the task
send_invite_member_mail_task(
language=language,
to=invitee_email,
token=token,
inviter_name=inviter_name,
workspace_name=workspace_name,
)
# Assert: Verify email service was called correctly
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_email.assert_called_once()
# Verify call arguments
call_args = mock_email_service.send_email.call_args
assert call_args[1]["email_type"] == EmailType.INVITE_MEMBER
assert call_args[1]["language_code"] == language
assert call_args[1]["to"] == invitee_email
# Verify template context
template_context = call_args[1]["template_context"]
assert template_context["to"] == invitee_email
assert template_context["inviter_name"] == inviter_name
assert template_context["workspace_name"] == workspace_name
assert template_context["url"] == f"https://console.dify.ai/activate?token={token}"
def test_send_invite_member_mail_different_languages(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test invitation email sending with different language codes.
This test verifies:
- Email service handles different language codes correctly
- Template context is passed correctly for each language
- No language-specific errors occur
"""
# Arrange: Create test data
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
token = self._create_invitation_token(tenant, inviter)
test_languages = ["en-US", "zh-CN", "ja-JP", "fr-FR", "de-DE", "es-ES"]
for language in test_languages:
# Act: Execute the task with different language
send_invite_member_mail_task(
language=language,
to="test@example.com",
token=token,
inviter_name=inviter.name,
workspace_name=tenant.name,
)
# Assert: Verify language code was passed correctly
mock_email_service = mock_external_service_dependencies["email_service"]
call_args = mock_email_service.send_email.call_args
assert call_args[1]["language_code"] == language
def test_send_invite_member_mail_mail_not_initialized(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test behavior when mail service is not initialized.
This test verifies:
- Task returns early when mail is not initialized
- Email service is not called
- No exceptions are raised
"""
# Arrange: Setup mail service as not initialized
mock_mail = mock_external_service_dependencies["mail"]
mock_mail.is_inited.return_value = False
# Act: Execute the task
result = send_invite_member_mail_task(
language="en-US",
to="test@example.com",
token="test-token",
inviter_name="Test User",
workspace_name="Test Workspace",
)
# Assert: Verify early return
assert result is None
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_email.assert_not_called()
def test_send_invite_member_mail_email_service_exception(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test error handling when email service raises an exception.
This test verifies:
- Exception is caught and logged
- Task completes without raising exception
- Error logging is performed
"""
# Arrange: Setup email service to raise exception
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_email.side_effect = Exception("Email service failed")
# Act & Assert: Execute task and verify exception is handled
with patch("tasks.mail_invite_member_task.logger") as mock_logger:
send_invite_member_mail_task(
language="en-US",
to="test@example.com",
token="test-token",
inviter_name="Test User",
workspace_name="Test Workspace",
)
# Verify error was logged
mock_logger.exception.assert_called_once()
error_call = mock_logger.exception.call_args[0][0]
assert "Send invite member mail to %s failed" in error_call
def test_send_invite_member_mail_template_context_validation(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test template context contains all required fields for email rendering.
This test verifies:
- All required template context fields are present
- Field values match expected data
- URL construction is correct
- No missing or None values in context
"""
# Arrange: Create test data with specific values
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
token = "test-token-123"
invitee_email = "invitee@example.com"
inviter_name = "John Doe"
workspace_name = "Acme Corp"
# Act: Execute the task
send_invite_member_mail_task(
language="en-US",
to=invitee_email,
token=token,
inviter_name=inviter_name,
workspace_name=workspace_name,
)
# Assert: Verify template context
mock_email_service = mock_external_service_dependencies["email_service"]
call_args = mock_email_service.send_email.call_args
template_context = call_args[1]["template_context"]
# Verify all required fields are present
required_fields = ["to", "inviter_name", "workspace_name", "url"]
for field in required_fields:
assert field in template_context
assert template_context[field] is not None
assert template_context[field] != ""
# Verify specific values
assert template_context["to"] == invitee_email
assert template_context["inviter_name"] == inviter_name
assert template_context["workspace_name"] == workspace_name
assert template_context["url"] == f"https://console.dify.ai/activate?token={token}"
def test_send_invite_member_mail_integration_with_redis_token(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test integration with Redis token validation.
This test verifies:
- Task works with real Redis token data
- Token validation can be performed after email sending
- Redis data integrity is maintained
"""
# Arrange: Create test data and store token in Redis
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
token = self._create_invitation_token(tenant, inviter)
# Verify token exists in Redis before sending email
cache_key = f"member_invite:token:{token}"
assert redis_client.exists(cache_key) == 1
# Act: Execute the task
send_invite_member_mail_task(
language="en-US",
to=inviter.email,
token=token,
inviter_name=inviter.name,
workspace_name=tenant.name,
)
# Assert: Verify token still exists after email sending
assert redis_client.exists(cache_key) == 1
# Verify token data integrity
token_data = redis_client.get(cache_key)
assert token_data is not None
invitation_data = json.loads(token_data)
assert invitation_data["account_id"] == inviter.id
assert invitation_data["email"] == inviter.email
assert invitation_data["workspace_id"] == tenant.id
def test_send_invite_member_mail_with_special_characters(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test email sending with special characters in names and workspace names.
This test verifies:
- Special characters are handled correctly in template context
- Email service receives properly formatted data
- No encoding issues occur
"""
# Arrange: Create test data with special characters
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
token = self._create_invitation_token(tenant, inviter)
special_cases = [
("John O'Connor", "Acme & Co."),
("José María", "Café & Restaurant"),
("李小明", "北京科技有限公司"),
("François & Marie", "L'École Internationale"),
("Александр", "ООО Технологии"),
("محمد أحمد", "شركة التقنية المتقدمة"),
]
for inviter_name, workspace_name in special_cases:
# Act: Execute the task
send_invite_member_mail_task(
language="en-US",
to="test@example.com",
token=token,
inviter_name=inviter_name,
workspace_name=workspace_name,
)
# Assert: Verify special characters are preserved
mock_email_service = mock_external_service_dependencies["email_service"]
call_args = mock_email_service.send_email.call_args
template_context = call_args[1]["template_context"]
assert template_context["inviter_name"] == inviter_name
assert template_context["workspace_name"] == workspace_name
def test_send_invite_member_mail_real_database_integration(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test real database integration with actual invitation flow.
This test verifies:
- Task works with real database entities
- Account and tenant relationships are properly maintained
- Database state is consistent after email sending
- Real invitation data flow is tested
"""
# Arrange: Create real database entities
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
invitee_email = "newmember@example.com"
# Create a pending account for invitation (simulating real invitation flow)
pending_account = self._create_pending_account_for_invitation(db_session_with_containers, invitee_email, tenant)
# Create invitation token with real account data
token = self._create_invitation_token(tenant, pending_account)
# Act: Execute the task with real data
send_invite_member_mail_task(
language="en-US",
to=invitee_email,
token=token,
inviter_name=inviter.name,
workspace_name=tenant.name,
)
# Assert: Verify email service was called with real data
mock_email_service = mock_external_service_dependencies["email_service"]
mock_email_service.send_email.assert_called_once()
# Verify database state is maintained
db_session_with_containers.refresh(pending_account)
db_session_with_containers.refresh(tenant)
assert pending_account.status == AccountStatus.PENDING
assert pending_account.email == invitee_email
assert tenant.name is not None
# Verify tenant relationship exists
tenant_join = (
db_session_with_containers.query(TenantAccountJoin)
.filter_by(tenant_id=tenant.id, account_id=pending_account.id)
.first()
)
assert tenant_join is not None
assert tenant_join.role == TenantAccountRole.NORMAL
def test_send_invite_member_mail_token_lifecycle_management(
self, db_session_with_containers, mock_external_service_dependencies
):
"""
Test token lifecycle management and validation.
This test verifies:
- Token is properly stored in Redis with correct TTL
- Token data structure is correct
- Token can be retrieved and validated after email sending
- Token expiration is handled correctly
"""
# Arrange: Create test data
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
token = self._create_invitation_token(tenant, inviter)
# Act: Execute the task
send_invite_member_mail_task(
language="en-US",
to=inviter.email,
token=token,
inviter_name=inviter.name,
workspace_name=tenant.name,
)
# Assert: Verify token lifecycle
cache_key = f"member_invite:token:{token}"
# Token should still exist
assert redis_client.exists(cache_key) == 1
# Token should have correct TTL (approximately 24 hours)
ttl = redis_client.ttl(cache_key)
assert 23 * 60 * 60 <= ttl <= 24 * 60 * 60 # Allow some tolerance
# Token data should be valid
token_data = redis_client.get(cache_key)
assert token_data is not None
invitation_data = json.loads(token_data)
assert invitation_data["account_id"] == inviter.id
assert invitation_data["email"] == inviter.email
assert invitation_data["workspace_id"] == tenant.id

View File

@@ -0,0 +1,401 @@
"""
TestContainers-based integration tests for mail_owner_transfer_task.
This module provides comprehensive integration tests for the mail owner transfer tasks
using TestContainers to ensure real email service integration and proper functionality
testing with actual database and service dependencies.
"""
import logging
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from libs.email_i18n import EmailType
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
from tasks.mail_owner_transfer_task import (
send_new_owner_transfer_notify_email_task,
send_old_owner_transfer_notify_email_task,
send_owner_transfer_confirm_task,
)
logger = logging.getLogger(__name__)
class TestMailOwnerTransferTask:
"""Integration tests for mail owner transfer tasks using testcontainers."""
@pytest.fixture
def mock_mail_dependencies(self):
"""Mock setup for mail service dependencies."""
with (
patch("tasks.mail_owner_transfer_task.mail") as mock_mail,
patch("tasks.mail_owner_transfer_task.get_email_i18n_service") as mock_get_email_service,
):
# Setup mock mail service
mock_mail.is_inited.return_value = True
# Setup mock email service
mock_email_service = MagicMock()
mock_get_email_service.return_value = mock_email_service
yield {
"mail": mock_mail,
"email_service": mock_email_service,
"get_email_service": mock_get_email_service,
}
def _create_test_account_and_tenant(self, db_session_with_containers):
"""
Helper method to create test account and tenant for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
tuple: (account, tenant) - Created account and tenant instances
"""
fake = Faker()
# Create account
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db_session_with_containers.add(account)
db_session_with_containers.commit()
# Create tenant
tenant = Tenant(
name=fake.company(),
status="normal",
)
db_session_with_containers.add(tenant)
db_session_with_containers.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER.value,
current=True,
)
db_session_with_containers.add(join)
db_session_with_containers.commit()
return account, tenant
def test_send_owner_transfer_confirm_task_success(self, db_session_with_containers, mock_mail_dependencies):
"""
Test successful owner transfer confirmation email sending.
This test verifies:
- Proper email service initialization check
- Correct email service method calls with right parameters
- Email template context is properly constructed
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
test_language = "en-US"
test_email = account.email
test_code = "123456"
test_workspace = tenant.name
# Act: Execute the task
send_owner_transfer_confirm_task(
language=test_language,
to=test_email,
code=test_code,
workspace=test_workspace,
)
# Assert: Verify the expected outcomes
mock_mail_dependencies["mail"].is_inited.assert_called_once()
mock_mail_dependencies["get_email_service"].assert_called_once()
# Verify email service was called with correct parameters
mock_mail_dependencies["email_service"].send_email.assert_called_once()
call_args = mock_mail_dependencies["email_service"].send_email.call_args
assert call_args[1]["email_type"] == EmailType.OWNER_TRANSFER_CONFIRM
assert call_args[1]["language_code"] == test_language
assert call_args[1]["to"] == test_email
assert call_args[1]["template_context"]["to"] == test_email
assert call_args[1]["template_context"]["code"] == test_code
assert call_args[1]["template_context"]["WorkspaceName"] == test_workspace
def test_send_owner_transfer_confirm_task_mail_not_initialized(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test owner transfer confirmation email when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls are made
- No exceptions are raised
"""
# Arrange: Set mail service as not initialized
mock_mail_dependencies["mail"].is_inited.return_value = False
test_language = "en-US"
test_email = "test@example.com"
test_code = "123456"
test_workspace = "Test Workspace"
# Act: Execute the task
send_owner_transfer_confirm_task(
language=test_language,
to=test_email,
code=test_code,
workspace=test_workspace,
)
# Assert: Verify no email service calls were made
mock_mail_dependencies["get_email_service"].assert_not_called()
mock_mail_dependencies["email_service"].send_email.assert_not_called()
def test_send_owner_transfer_confirm_task_exception_handling(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test exception handling in owner transfer confirmation email.
This test verifies:
- Exceptions are properly caught and logged
- No exceptions are propagated to caller
- Email service calls are attempted
- Error logging works correctly
"""
# Arrange: Setup email service to raise exception
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
test_language = "en-US"
test_email = "test@example.com"
test_code = "123456"
test_workspace = "Test Workspace"
# Act & Assert: Verify no exception is raised
try:
send_owner_transfer_confirm_task(
language=test_language,
to=test_email,
code=test_code,
workspace=test_workspace,
)
except Exception as e:
pytest.fail(f"Task should not raise exceptions, but raised: {e}")
# Verify email service was called despite the exception
mock_mail_dependencies["email_service"].send_email.assert_called_once()
def test_send_old_owner_transfer_notify_email_task_success(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test successful old owner transfer notification email sending.
This test verifies:
- Proper email service initialization check
- Correct email service method calls with right parameters
- Email template context includes new owner email
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
test_language = "en-US"
test_email = account.email
test_workspace = tenant.name
test_new_owner_email = "newowner@example.com"
# Act: Execute the task
send_old_owner_transfer_notify_email_task(
language=test_language,
to=test_email,
workspace=test_workspace,
new_owner_email=test_new_owner_email,
)
# Assert: Verify the expected outcomes
mock_mail_dependencies["mail"].is_inited.assert_called_once()
mock_mail_dependencies["get_email_service"].assert_called_once()
# Verify email service was called with correct parameters
mock_mail_dependencies["email_service"].send_email.assert_called_once()
call_args = mock_mail_dependencies["email_service"].send_email.call_args
assert call_args[1]["email_type"] == EmailType.OWNER_TRANSFER_OLD_NOTIFY
assert call_args[1]["language_code"] == test_language
assert call_args[1]["to"] == test_email
assert call_args[1]["template_context"]["to"] == test_email
assert call_args[1]["template_context"]["WorkspaceName"] == test_workspace
assert call_args[1]["template_context"]["NewOwnerEmail"] == test_new_owner_email
def test_send_old_owner_transfer_notify_email_task_mail_not_initialized(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test old owner transfer notification email when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls are made
- No exceptions are raised
"""
# Arrange: Set mail service as not initialized
mock_mail_dependencies["mail"].is_inited.return_value = False
test_language = "en-US"
test_email = "test@example.com"
test_workspace = "Test Workspace"
test_new_owner_email = "newowner@example.com"
# Act: Execute the task
send_old_owner_transfer_notify_email_task(
language=test_language,
to=test_email,
workspace=test_workspace,
new_owner_email=test_new_owner_email,
)
# Assert: Verify no email service calls were made
mock_mail_dependencies["get_email_service"].assert_not_called()
mock_mail_dependencies["email_service"].send_email.assert_not_called()
def test_send_old_owner_transfer_notify_email_task_exception_handling(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test exception handling in old owner transfer notification email.
This test verifies:
- Exceptions are properly caught and logged
- No exceptions are propagated to caller
- Email service calls are attempted
- Error logging works correctly
"""
# Arrange: Setup email service to raise exception
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
test_language = "en-US"
test_email = "test@example.com"
test_workspace = "Test Workspace"
test_new_owner_email = "newowner@example.com"
# Act & Assert: Verify no exception is raised
try:
send_old_owner_transfer_notify_email_task(
language=test_language,
to=test_email,
workspace=test_workspace,
new_owner_email=test_new_owner_email,
)
except Exception as e:
pytest.fail(f"Task should not raise exceptions, but raised: {e}")
# Verify email service was called despite the exception
mock_mail_dependencies["email_service"].send_email.assert_called_once()
def test_send_new_owner_transfer_notify_email_task_success(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test successful new owner transfer notification email sending.
This test verifies:
- Proper email service initialization check
- Correct email service method calls with right parameters
- Email template context is properly constructed
"""
# Arrange: Create test data
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
test_language = "en-US"
test_email = account.email
test_workspace = tenant.name
# Act: Execute the task
send_new_owner_transfer_notify_email_task(
language=test_language,
to=test_email,
workspace=test_workspace,
)
# Assert: Verify the expected outcomes
mock_mail_dependencies["mail"].is_inited.assert_called_once()
mock_mail_dependencies["get_email_service"].assert_called_once()
# Verify email service was called with correct parameters
mock_mail_dependencies["email_service"].send_email.assert_called_once()
call_args = mock_mail_dependencies["email_service"].send_email.call_args
assert call_args[1]["email_type"] == EmailType.OWNER_TRANSFER_NEW_NOTIFY
assert call_args[1]["language_code"] == test_language
assert call_args[1]["to"] == test_email
assert call_args[1]["template_context"]["to"] == test_email
assert call_args[1]["template_context"]["WorkspaceName"] == test_workspace
def test_send_new_owner_transfer_notify_email_task_mail_not_initialized(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test new owner transfer notification email when mail service is not initialized.
This test verifies:
- Early return when mail service is not initialized
- No email service calls are made
- No exceptions are raised
"""
# Arrange: Set mail service as not initialized
mock_mail_dependencies["mail"].is_inited.return_value = False
test_language = "en-US"
test_email = "test@example.com"
test_workspace = "Test Workspace"
# Act: Execute the task
send_new_owner_transfer_notify_email_task(
language=test_language,
to=test_email,
workspace=test_workspace,
)
# Assert: Verify no email service calls were made
mock_mail_dependencies["get_email_service"].assert_not_called()
mock_mail_dependencies["email_service"].send_email.assert_not_called()
def test_send_new_owner_transfer_notify_email_task_exception_handling(
self, db_session_with_containers, mock_mail_dependencies
):
"""
Test exception handling in new owner transfer notification email.
This test verifies:
- Exceptions are properly caught and logged
- No exceptions are propagated to caller
- Email service calls are attempted
- Error logging works correctly
"""
# Arrange: Setup email service to raise exception
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
test_language = "en-US"
test_email = "test@example.com"
test_workspace = "Test Workspace"
# Act & Assert: Verify no exception is raised
try:
send_new_owner_transfer_notify_email_task(
language=test_language,
to=test_email,
workspace=test_workspace,
)
except Exception as e:
pytest.fail(f"Task should not raise exceptions, but raised: {e}")
# Verify email service was called despite the exception
mock_mail_dependencies["email_service"].send_email.assert_called_once()

View File

@@ -0,0 +1,134 @@
"""
TestContainers-based integration tests for mail_register_task.py
This module provides integration tests for email registration tasks
using TestContainers to ensure real database and service interactions.
"""
from unittest.mock import MagicMock, patch
import pytest
from faker import Faker
from libs.email_i18n import EmailType
from tasks.mail_register_task import send_email_register_mail_task, send_email_register_mail_task_when_account_exist
class TestMailRegisterTask:
"""Integration tests for mail_register_task using testcontainers."""
@pytest.fixture
def mock_mail_dependencies(self):
"""Mock setup for mail service dependencies."""
with (
patch("tasks.mail_register_task.mail") as mock_mail,
patch("tasks.mail_register_task.get_email_i18n_service") as mock_get_email_service,
):
# Setup mock mail service
mock_mail.is_inited.return_value = True
# Setup mock email i18n service
mock_email_service = MagicMock()
mock_get_email_service.return_value = mock_email_service
yield {
"mail": mock_mail,
"email_service": mock_email_service,
"get_email_service": mock_get_email_service,
}
def test_send_email_register_mail_task_success(self, db_session_with_containers, mock_mail_dependencies):
"""Test successful email registration mail sending."""
fake = Faker()
language = "en-US"
to_email = fake.email()
code = fake.numerify("######")
send_email_register_mail_task(language=language, to=to_email, code=code)
mock_mail_dependencies["mail"].is_inited.assert_called_once()
mock_mail_dependencies["email_service"].send_email.assert_called_once_with(
email_type=EmailType.EMAIL_REGISTER,
language_code=language,
to=to_email,
template_context={
"to": to_email,
"code": code,
},
)
def test_send_email_register_mail_task_mail_not_initialized(
self, db_session_with_containers, mock_mail_dependencies
):
"""Test email registration task when mail service is not initialized."""
mock_mail_dependencies["mail"].is_inited.return_value = False
send_email_register_mail_task(language="en-US", to="test@example.com", code="123456")
mock_mail_dependencies["get_email_service"].assert_not_called()
mock_mail_dependencies["email_service"].send_email.assert_not_called()
def test_send_email_register_mail_task_exception_handling(self, db_session_with_containers, mock_mail_dependencies):
"""Test email registration task exception handling."""
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
fake = Faker()
to_email = fake.email()
code = fake.numerify("######")
with patch("tasks.mail_register_task.logger") as mock_logger:
send_email_register_mail_task(language="en-US", to=to_email, code=code)
mock_logger.exception.assert_called_once_with("Send email register mail to %s failed", to_email)
def test_send_email_register_mail_task_when_account_exist_success(
self, db_session_with_containers, mock_mail_dependencies
):
"""Test successful email registration mail sending when account exists."""
fake = Faker()
language = "en-US"
to_email = fake.email()
account_name = fake.name()
with patch("tasks.mail_register_task.dify_config") as mock_config:
mock_config.CONSOLE_WEB_URL = "https://console.dify.ai"
send_email_register_mail_task_when_account_exist(language=language, to=to_email, account_name=account_name)
mock_mail_dependencies["email_service"].send_email.assert_called_once_with(
email_type=EmailType.EMAIL_REGISTER_WHEN_ACCOUNT_EXIST,
language_code=language,
to=to_email,
template_context={
"to": to_email,
"login_url": "https://console.dify.ai/signin",
"reset_password_url": "https://console.dify.ai/reset-password",
"account_name": account_name,
},
)
def test_send_email_register_mail_task_when_account_exist_mail_not_initialized(
self, db_session_with_containers, mock_mail_dependencies
):
"""Test account exist email task when mail service is not initialized."""
mock_mail_dependencies["mail"].is_inited.return_value = False
send_email_register_mail_task_when_account_exist(
language="en-US", to="test@example.com", account_name="Test User"
)
mock_mail_dependencies["get_email_service"].assert_not_called()
mock_mail_dependencies["email_service"].send_email.assert_not_called()
def test_send_email_register_mail_task_when_account_exist_exception_handling(
self, db_session_with_containers, mock_mail_dependencies
):
"""Test account exist email task exception handling."""
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
fake = Faker()
to_email = fake.email()
account_name = fake.name()
with patch("tasks.mail_register_task.logger") as mock_logger:
send_email_register_mail_task_when_account_exist(language="en-US", to=to_email, account_name=account_name)
mock_logger.exception.assert_called_once_with("Send email register mail to %s failed", to_email)

View File

@@ -0,0 +1,936 @@
import json
import uuid
from unittest.mock import patch
import pytest
from faker import Faker
from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity
from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
from extensions.ext_database import db
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
from models.dataset import Pipeline
from models.workflow import Workflow
from tasks.rag_pipeline.priority_rag_pipeline_run_task import (
priority_rag_pipeline_run_task,
run_single_rag_pipeline_task,
)
from tasks.rag_pipeline.rag_pipeline_run_task import rag_pipeline_run_task
class TestRagPipelineRunTasks:
"""Integration tests for RAG pipeline run tasks using testcontainers.
This test class covers:
- priority_rag_pipeline_run_task function
- rag_pipeline_run_task function
- run_single_rag_pipeline_task function
- Real Redis-based TenantIsolatedTaskQueue operations
- PipelineGenerator._generate method mocking and parameter validation
- File operations and cleanup
- Error handling and queue management
"""
@pytest.fixture
def mock_pipeline_generator(self):
"""Mock PipelineGenerator._generate method."""
with patch("core.app.apps.pipeline.pipeline_generator.PipelineGenerator._generate") as mock_generate:
# Mock the _generate method to return a simple response
mock_generate.return_value = {"answer": "Test response", "metadata": {"test": "data"}}
yield mock_generate
@pytest.fixture
def mock_file_service(self):
"""Mock FileService for file operations."""
with (
patch("services.file_service.FileService.get_file_content") as mock_get_content,
patch("services.file_service.FileService.delete_file") as mock_delete_file,
):
yield {
"get_content": mock_get_content,
"delete_file": mock_delete_file,
}
def _create_test_pipeline_and_workflow(self, db_session_with_containers):
"""
Helper method to create test pipeline and workflow for testing.
Args:
db_session_with_containers: Database session from testcontainers infrastructure
Returns:
tuple: (account, tenant, pipeline, workflow) - Created entities
"""
fake = Faker()
# Create account and tenant
account = Account(
email=fake.email(),
name=fake.name(),
interface_language="en-US",
status="active",
)
db.session.add(account)
db.session.commit()
tenant = Tenant(
name=fake.company(),
status="normal",
)
db.session.add(tenant)
db.session.commit()
# Create tenant-account join
join = TenantAccountJoin(
tenant_id=tenant.id,
account_id=account.id,
role=TenantAccountRole.OWNER,
current=True,
)
db.session.add(join)
db.session.commit()
# Create workflow
workflow = Workflow(
id=str(uuid.uuid4()),
tenant_id=tenant.id,
app_id=str(uuid.uuid4()),
type="workflow",
version="draft",
graph="{}",
features="{}",
marked_name=fake.company(),
marked_comment=fake.text(max_nb_chars=100),
created_by=account.id,
environment_variables=[],
conversation_variables=[],
rag_pipeline_variables=[],
)
db.session.add(workflow)
db.session.commit()
# Create pipeline
pipeline = Pipeline(
tenant_id=tenant.id,
workflow_id=workflow.id,
name=fake.company(),
description=fake.text(max_nb_chars=100),
created_by=account.id,
)
pipeline.id = str(uuid.uuid4())
db.session.add(pipeline)
db.session.commit()
# Refresh entities to ensure they're properly loaded
db.session.refresh(account)
db.session.refresh(tenant)
db.session.refresh(workflow)
db.session.refresh(pipeline)
return account, tenant, pipeline, workflow
def _create_rag_pipeline_invoke_entities(self, account, tenant, pipeline, workflow, count=2):
"""
Helper method to create RAG pipeline invoke entities for testing.
Args:
account: Account instance
tenant: Tenant instance
pipeline: Pipeline instance
workflow: Workflow instance
count: Number of entities to create
Returns:
list: List of RagPipelineInvokeEntity instances
"""
fake = Faker()
entities = []
for i in range(count):
# Create application generate entity
app_config = {
"app_id": str(uuid.uuid4()),
"app_name": fake.company(),
"mode": "workflow",
"workflow_id": workflow.id,
"tenant_id": tenant.id,
"app_mode": "workflow",
}
application_generate_entity = {
"task_id": str(uuid.uuid4()),
"app_config": app_config,
"inputs": {"query": f"Test query {i}"},
"files": [],
"user_id": account.id,
"stream": False,
"invoke_from": "published",
"workflow_execution_id": str(uuid.uuid4()),
"pipeline_config": {
"app_id": str(uuid.uuid4()),
"app_name": fake.company(),
"mode": "workflow",
"workflow_id": workflow.id,
"tenant_id": tenant.id,
"app_mode": "workflow",
},
"datasource_type": "upload_file",
"datasource_info": {},
"dataset_id": str(uuid.uuid4()),
"batch": "test_batch",
}
entity = RagPipelineInvokeEntity(
pipeline_id=pipeline.id,
application_generate_entity=application_generate_entity,
user_id=account.id,
tenant_id=tenant.id,
workflow_id=workflow.id,
streaming=False,
workflow_execution_id=str(uuid.uuid4()),
workflow_thread_pool_id=str(uuid.uuid4()),
)
entities.append(entity)
return entities
def _create_file_content_for_entities(self, entities):
"""
Helper method to create file content for RAG pipeline invoke entities.
Args:
entities: List of RagPipelineInvokeEntity instances
Returns:
str: JSON string containing serialized entities
"""
entities_data = [entity.model_dump() for entity in entities]
return json.dumps(entities_data)
def test_priority_rag_pipeline_run_task_success(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test successful priority RAG pipeline run task execution.
This test verifies:
- Task execution with multiple RAG pipeline invoke entities
- File content retrieval and parsing
- PipelineGenerator._generate method calls with correct parameters
- Thread pool execution
- File cleanup after execution
- Queue management with no waiting tasks
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=2)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Act: Execute the priority task
priority_rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify expected outcomes
# Verify file operations
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
# Verify PipelineGenerator._generate was called for each entity
assert mock_pipeline_generator.call_count == 2
# Verify call parameters for each entity
calls = mock_pipeline_generator.call_args_list
for call in calls:
call_kwargs = call[1] # Get keyword arguments
assert call_kwargs["pipeline"].id == pipeline.id
assert call_kwargs["workflow_id"] == workflow.id
assert call_kwargs["user"].id == account.id
assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED
assert call_kwargs["streaming"] == False
assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity)
def test_rag_pipeline_run_task_success(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test successful regular RAG pipeline run task execution.
This test verifies:
- Task execution with multiple RAG pipeline invoke entities
- File content retrieval and parsing
- PipelineGenerator._generate method calls with correct parameters
- Thread pool execution
- File cleanup after execution
- Queue management with no waiting tasks
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=3)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Act: Execute the regular task
rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify expected outcomes
# Verify file operations
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
# Verify PipelineGenerator._generate was called for each entity
assert mock_pipeline_generator.call_count == 3
# Verify call parameters for each entity
calls = mock_pipeline_generator.call_args_list
for call in calls:
call_kwargs = call[1] # Get keyword arguments
assert call_kwargs["pipeline"].id == pipeline.id
assert call_kwargs["workflow_id"] == workflow.id
assert call_kwargs["user"].id == account.id
assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED
assert call_kwargs["streaming"] == False
assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity)
def test_priority_rag_pipeline_run_task_with_waiting_tasks(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test priority RAG pipeline run task with waiting tasks in queue using real Redis.
This test verifies:
- Core task execution
- Real Redis-based tenant queue processing of waiting tasks
- Task function calls for waiting tasks
- Queue management with multiple tasks using actual Redis operations
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Use real Redis for TenantIsolatedTaskQueue
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Add waiting tasks to the real Redis queue
waiting_file_ids = [str(uuid.uuid4()) for _ in range(2)]
queue.push_tasks(waiting_file_ids)
# Mock the task function calls
with patch(
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
) as mock_delay:
# Act: Execute the priority task
priority_rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify core processing occurred
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting tasks were processed, pull 1 task a time by default
assert mock_delay.call_count == 1
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
assert call_kwargs.get("tenant_id") == tenant.id
# Verify queue still has remaining tasks (only 1 was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 1 # 2 original - 1 pulled = 1 remaining
def test_rag_pipeline_run_task_legacy_compatibility(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test regular RAG pipeline run task with legacy Redis queue format for backward compatibility.
This test simulates the scenario where:
- Old code writes file IDs directly to Redis list using lpush
- New worker processes these legacy queue entries
- Ensures backward compatibility during deployment transition
Legacy format: redis_client.lpush(tenant_self_pipeline_task_queue, upload_file.id)
New format: TenantIsolatedTaskQueue.push_tasks([file_id])
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Simulate legacy Redis queue format - direct file IDs in Redis list
from extensions.ext_redis import redis_client
# Legacy queue key format (old code)
legacy_queue_key = f"tenant_self_pipeline_task_queue:{tenant.id}"
legacy_task_key = f"tenant_pipeline_task:{tenant.id}"
# Add legacy format data to Redis (simulating old code behavior)
legacy_file_ids = [str(uuid.uuid4()) for _ in range(3)]
for file_id_legacy in legacy_file_ids:
redis_client.lpush(legacy_queue_key, file_id_legacy)
# Set the task key to indicate there are waiting tasks (legacy behavior)
redis_client.set(legacy_task_key, 1, ex=60 * 60)
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Act: Execute the priority task with new code but legacy queue data
rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify core processing occurred
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting tasks were processed, pull 1 task a time by default
assert mock_delay.call_count == 1
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
assert call_kwargs.get("tenant_id") == tenant.id
# Verify that new code can process legacy queue entries
# The new TenantIsolatedTaskQueue should be able to read from the legacy format
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Verify queue still has remaining tasks (only 1 was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 2 # 3 original - 1 pulled = 2 remaining
# Cleanup: Remove legacy test data
redis_client.delete(legacy_queue_key)
redis_client.delete(legacy_task_key)
def test_rag_pipeline_run_task_with_waiting_tasks(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test regular RAG pipeline run task with waiting tasks in queue using real Redis.
This test verifies:
- Core task execution
- Real Redis-based tenant queue processing of waiting tasks
- Task function calls for waiting tasks
- Queue management with multiple tasks using actual Redis operations
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Use real Redis for TenantIsolatedTaskQueue
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Add waiting tasks to the real Redis queue
waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)]
queue.push_tasks(waiting_file_ids)
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Act: Execute the regular task
rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify core processing occurred
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting tasks were processed, pull 1 task a time by default
assert mock_delay.call_count == 1
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
assert call_kwargs.get("tenant_id") == tenant.id
# Verify queue still has remaining tasks (only 1 was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 2 # 3 original - 1 pulled = 2 remaining
def test_priority_rag_pipeline_run_task_error_handling(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test error handling in priority RAG pipeline run task using real Redis.
This test verifies:
- Exception handling during core processing
- Tenant queue cleanup even on errors using real Redis
- Proper error logging
- Function completes without raising exceptions
- Queue management continues despite core processing errors
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Mock PipelineGenerator to raise an exception
mock_pipeline_generator.side_effect = Exception("Pipeline generation failed")
# Use real Redis for TenantIsolatedTaskQueue
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Add waiting task to the real Redis queue
waiting_file_id = str(uuid.uuid4())
queue.push_tasks([waiting_file_id])
# Mock the task function calls
with patch(
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
) as mock_delay:
# Act: Execute the priority task (should not raise exception)
priority_rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify error was handled gracefully
# The function should not raise exceptions
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting task was still processed despite core processing error
mock_delay.assert_called_once()
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert call_kwargs.get("tenant_id") == tenant.id
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 0
def test_rag_pipeline_run_task_error_handling(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test error handling in regular RAG pipeline run task using real Redis.
This test verifies:
- Exception handling during core processing
- Tenant queue cleanup even on errors using real Redis
- Proper error logging
- Function completes without raising exceptions
- Queue management continues despite core processing errors
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
file_content = self._create_file_content_for_entities(entities)
# Mock file service
file_id = str(uuid.uuid4())
mock_file_service["get_content"].return_value = file_content
# Mock PipelineGenerator to raise an exception
mock_pipeline_generator.side_effect = Exception("Pipeline generation failed")
# Use real Redis for TenantIsolatedTaskQueue
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Add waiting task to the real Redis queue
waiting_file_id = str(uuid.uuid4())
queue.push_tasks([waiting_file_id])
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Act: Execute the regular task (should not raise exception)
rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify error was handled gracefully
# The function should not raise exceptions
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_file_service["delete_file"].assert_called_once_with(file_id)
assert mock_pipeline_generator.call_count == 1
# Verify waiting task was still processed despite core processing error
mock_delay.assert_called_once()
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert call_kwargs.get("tenant_id") == tenant.id
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 0
def test_priority_rag_pipeline_run_task_tenant_isolation(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test tenant isolation in priority RAG pipeline run task using real Redis.
This test verifies:
- Different tenants have isolated queues
- Tasks from one tenant don't affect another tenant's queue
- Queue operations are properly scoped to tenant
"""
# Arrange: Create test data for two different tenants
account1, tenant1, pipeline1, workflow1 = self._create_test_pipeline_and_workflow(db_session_with_containers)
account2, tenant2, pipeline2, workflow2 = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities1 = self._create_rag_pipeline_invoke_entities(account1, tenant1, pipeline1, workflow1, count=1)
entities2 = self._create_rag_pipeline_invoke_entities(account2, tenant2, pipeline2, workflow2, count=1)
file_content1 = self._create_file_content_for_entities(entities1)
file_content2 = self._create_file_content_for_entities(entities2)
# Mock file service
file_id1 = str(uuid.uuid4())
file_id2 = str(uuid.uuid4())
mock_file_service["get_content"].side_effect = [file_content1, file_content2]
# Use real Redis for TenantIsolatedTaskQueue
queue1 = TenantIsolatedTaskQueue(tenant1.id, "pipeline")
queue2 = TenantIsolatedTaskQueue(tenant2.id, "pipeline")
# Add waiting tasks to both queues
waiting_file_id1 = str(uuid.uuid4())
waiting_file_id2 = str(uuid.uuid4())
queue1.push_tasks([waiting_file_id1])
queue2.push_tasks([waiting_file_id2])
# Mock the task function calls
with patch(
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
) as mock_delay:
# Act: Execute the priority task for tenant1 only
priority_rag_pipeline_run_task(file_id1, tenant1.id)
# Assert: Verify core processing occurred for tenant1
assert mock_file_service["get_content"].call_count == 1
assert mock_file_service["delete_file"].call_count == 1
assert mock_pipeline_generator.call_count == 1
# Verify only tenant1's waiting task was processed
mock_delay.assert_called_once()
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
assert call_kwargs.get("tenant_id") == tenant1.id
# Verify tenant1's queue is empty
remaining_tasks1 = queue1.pull_tasks(count=10)
assert len(remaining_tasks1) == 0
# Verify tenant2's queue still has its task (isolation)
remaining_tasks2 = queue2.pull_tasks(count=10)
assert len(remaining_tasks2) == 1
# Verify queue keys are different
assert queue1._queue != queue2._queue
assert queue1._task_key != queue2._task_key
def test_rag_pipeline_run_task_tenant_isolation(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test tenant isolation in regular RAG pipeline run task using real Redis.
This test verifies:
- Different tenants have isolated queues
- Tasks from one tenant don't affect another tenant's queue
- Queue operations are properly scoped to tenant
"""
# Arrange: Create test data for two different tenants
account1, tenant1, pipeline1, workflow1 = self._create_test_pipeline_and_workflow(db_session_with_containers)
account2, tenant2, pipeline2, workflow2 = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities1 = self._create_rag_pipeline_invoke_entities(account1, tenant1, pipeline1, workflow1, count=1)
entities2 = self._create_rag_pipeline_invoke_entities(account2, tenant2, pipeline2, workflow2, count=1)
file_content1 = self._create_file_content_for_entities(entities1)
file_content2 = self._create_file_content_for_entities(entities2)
# Mock file service
file_id1 = str(uuid.uuid4())
file_id2 = str(uuid.uuid4())
mock_file_service["get_content"].side_effect = [file_content1, file_content2]
# Use real Redis for TenantIsolatedTaskQueue
queue1 = TenantIsolatedTaskQueue(tenant1.id, "pipeline")
queue2 = TenantIsolatedTaskQueue(tenant2.id, "pipeline")
# Add waiting tasks to both queues
waiting_file_id1 = str(uuid.uuid4())
waiting_file_id2 = str(uuid.uuid4())
queue1.push_tasks([waiting_file_id1])
queue2.push_tasks([waiting_file_id2])
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Act: Execute the regular task for tenant1 only
rag_pipeline_run_task(file_id1, tenant1.id)
# Assert: Verify core processing occurred for tenant1
assert mock_file_service["get_content"].call_count == 1
assert mock_file_service["delete_file"].call_count == 1
assert mock_pipeline_generator.call_count == 1
# Verify only tenant1's waiting task was processed
mock_delay.assert_called_once()
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
assert call_kwargs.get("tenant_id") == tenant1.id
# Verify tenant1's queue is empty
remaining_tasks1 = queue1.pull_tasks(count=10)
assert len(remaining_tasks1) == 0
# Verify tenant2's queue still has its task (isolation)
remaining_tasks2 = queue2.pull_tasks(count=10)
assert len(remaining_tasks2) == 1
# Verify queue keys are different
assert queue1._queue != queue2._queue
assert queue1._task_key != queue2._task_key
def test_run_single_rag_pipeline_task_success(
self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers
):
"""
Test successful run_single_rag_pipeline_task execution.
This test verifies:
- Single RAG pipeline task execution within Flask app context
- Entity validation and database queries
- PipelineGenerator._generate method call with correct parameters
- Proper Flask context handling
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
entity_data = entities[0].model_dump()
# Act: Execute the single task
with flask_app_with_containers.app_context():
run_single_rag_pipeline_task(entity_data, flask_app_with_containers)
# Assert: Verify expected outcomes
# Verify PipelineGenerator._generate was called
assert mock_pipeline_generator.call_count == 1
# Verify call parameters
call = mock_pipeline_generator.call_args
call_kwargs = call[1] # Get keyword arguments
assert call_kwargs["pipeline"].id == pipeline.id
assert call_kwargs["workflow_id"] == workflow.id
assert call_kwargs["user"].id == account.id
assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED
assert call_kwargs["streaming"] == False
assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity)
def test_run_single_rag_pipeline_task_entity_validation_error(
self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers
):
"""
Test run_single_rag_pipeline_task with invalid entity data.
This test verifies:
- Proper error handling for invalid entity data
- Exception logging
- Function raises ValueError for missing entities
"""
# Arrange: Create entity data with valid UUIDs but non-existent entities
fake = Faker()
invalid_entity_data = {
"pipeline_id": str(uuid.uuid4()),
"application_generate_entity": {
"app_config": {
"app_id": str(uuid.uuid4()),
"app_name": "Test App",
"mode": "workflow",
"workflow_id": str(uuid.uuid4()),
},
"inputs": {"query": "Test query"},
"query": "Test query",
"response_mode": "blocking",
"user": str(uuid.uuid4()),
"files": [],
"conversation_id": str(uuid.uuid4()),
},
"user_id": str(uuid.uuid4()),
"tenant_id": str(uuid.uuid4()),
"workflow_id": str(uuid.uuid4()),
"streaming": False,
"workflow_execution_id": str(uuid.uuid4()),
"workflow_thread_pool_id": str(uuid.uuid4()),
}
# Act & Assert: Execute the single task with non-existent entities (should raise ValueError)
with flask_app_with_containers.app_context():
with pytest.raises(ValueError, match="Account .* not found"):
run_single_rag_pipeline_task(invalid_entity_data, flask_app_with_containers)
# Assert: Pipeline generator should not be called
mock_pipeline_generator.assert_not_called()
def test_run_single_rag_pipeline_task_database_entity_not_found(
self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers
):
"""
Test run_single_rag_pipeline_task with non-existent database entities.
This test verifies:
- Proper error handling for missing database entities
- Exception logging
- Function raises ValueError for missing entities
"""
# Arrange: Create test data with non-existent IDs
fake = Faker()
entity_data = {
"pipeline_id": str(uuid.uuid4()),
"application_generate_entity": {
"app_config": {
"app_id": str(uuid.uuid4()),
"app_name": "Test App",
"mode": "workflow",
"workflow_id": str(uuid.uuid4()),
},
"inputs": {"query": "Test query"},
"query": "Test query",
"response_mode": "blocking",
"user": str(uuid.uuid4()),
"files": [],
"conversation_id": str(uuid.uuid4()),
},
"user_id": str(uuid.uuid4()),
"tenant_id": str(uuid.uuid4()),
"workflow_id": str(uuid.uuid4()),
"streaming": False,
"workflow_execution_id": str(uuid.uuid4()),
"workflow_thread_pool_id": str(uuid.uuid4()),
}
# Act & Assert: Execute the single task with non-existent entities (should raise ValueError)
with flask_app_with_containers.app_context():
with pytest.raises(ValueError, match="Account .* not found"):
run_single_rag_pipeline_task(entity_data, flask_app_with_containers)
# Assert: Pipeline generator should not be called
mock_pipeline_generator.assert_not_called()
def test_priority_rag_pipeline_run_task_file_not_found(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test priority RAG pipeline run task with non-existent file.
This test verifies:
- Proper error handling for missing files
- Exception logging
- Function raises Exception for file errors
- Queue management continues despite file errors
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
# Mock file service to raise exception
file_id = str(uuid.uuid4())
mock_file_service["get_content"].side_effect = Exception("File not found")
# Use real Redis for TenantIsolatedTaskQueue
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Add waiting task to the real Redis queue
waiting_file_id = str(uuid.uuid4())
queue.push_tasks([waiting_file_id])
# Mock the task function calls
with patch(
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
) as mock_delay:
# Act & Assert: Execute the priority task (should raise Exception)
with pytest.raises(Exception, match="File not found"):
priority_rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify error was handled gracefully
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_pipeline_generator.assert_not_called()
# Verify waiting task was still processed despite file error
mock_delay.assert_called_once()
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert call_kwargs.get("tenant_id") == tenant.id
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 0
def test_rag_pipeline_run_task_file_not_found(
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
):
"""
Test regular RAG pipeline run task with non-existent file.
This test verifies:
- Proper error handling for missing files
- Exception logging
- Function raises Exception for file errors
- Queue management continues despite file errors
"""
# Arrange: Create test data
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
# Mock file service to raise exception
file_id = str(uuid.uuid4())
mock_file_service["get_content"].side_effect = Exception("File not found")
# Use real Redis for TenantIsolatedTaskQueue
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
# Add waiting task to the real Redis queue
waiting_file_id = str(uuid.uuid4())
queue.push_tasks([waiting_file_id])
# Mock the task function calls
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
# Act & Assert: Execute the regular task (should raise Exception)
with pytest.raises(Exception, match="File not found"):
rag_pipeline_run_task(file_id, tenant.id)
# Assert: Verify error was handled gracefully
mock_file_service["get_content"].assert_called_once_with(file_id)
mock_pipeline_generator.assert_not_called()
# Verify waiting task was still processed despite file error
mock_delay.assert_called_once()
# Verify correct parameters for the call
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
assert call_kwargs.get("tenant_id") == tenant.id
# Verify queue is empty after processing (task was pulled)
remaining_tasks = queue.pull_tasks(count=10)
assert len(remaining_tasks) == 0