dify
This commit is contained in:
@@ -0,0 +1,793 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, DatasetAutoDisableLog, Document, DocumentSegment
|
||||
from tasks.add_document_to_index_task import add_document_to_index_task
|
||||
|
||||
|
||||
class TestAddDocumentToIndexTask:
|
||||
"""Integration tests for add_document_to_index_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.add_document_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
|
||||
):
|
||||
# Setup mock index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
yield {
|
||||
"index_processor_factory": mock_index_processor_factory,
|
||||
"index_processor": mock_processor,
|
||||
}
|
||||
|
||||
def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Helper method to create a test dataset and document for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
|
||||
Returns:
|
||||
tuple: (dataset, document) - Created dataset and document instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(max_nb_chars=100),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Create document
|
||||
document = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=True,
|
||||
doc_form=IndexType.PARAGRAPH_INDEX,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property works correctly
|
||||
db.session.refresh(dataset)
|
||||
|
||||
return dataset, document
|
||||
|
||||
def _create_test_segments(self, db_session_with_containers, document, dataset):
|
||||
"""
|
||||
Helper method to create test document segments.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
document: Document instance
|
||||
dataset: Dataset instance
|
||||
|
||||
Returns:
|
||||
list: List of created DocumentSegment instances
|
||||
"""
|
||||
fake = Faker()
|
||||
segments = []
|
||||
|
||||
for i in range(3):
|
||||
segment = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=i,
|
||||
content=fake.text(max_nb_chars=200),
|
||||
word_count=len(fake.text(max_nb_chars=200).split()),
|
||||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id=f"node_{i}",
|
||||
index_node_hash=f"hash_{i}",
|
||||
enabled=False,
|
||||
status="completed",
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment)
|
||||
segments.append(segment)
|
||||
|
||||
db.session.commit()
|
||||
return segments
|
||||
|
||||
def test_add_document_to_index_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful document indexing with paragraph index type.
|
||||
|
||||
This test verifies:
|
||||
- Proper document retrieval from database
|
||||
- Correct segment processing and document creation
|
||||
- Index processor integration
|
||||
- Database state updates
|
||||
- Segment status changes
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache key to simulate indexing in progress
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300) # 5 minutes expiry
|
||||
|
||||
# Verify cache key exists
|
||||
assert redis_client.exists(indexing_cache_key) == 1
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
# Verify index processor was called correctly
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify database state changes
|
||||
db.session.refresh(document)
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is True
|
||||
assert segment.disabled_at is None
|
||||
assert segment.disabled_by is None
|
||||
|
||||
# Verify Redis cache key was deleted
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_with_different_index_type(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test document indexing with different index types.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling of different index types
|
||||
- Index processor factory integration
|
||||
- Document processing with various configurations
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data with different index type
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Update document to use different index type
|
||||
document.doc_form = IndexType.QA_INDEX
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property reflects the updated document
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# Create segments
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify different index type handling
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3
|
||||
|
||||
# Verify database state changes
|
||||
db.session.refresh(document)
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is True
|
||||
assert segment.disabled_at is None
|
||||
assert segment.disabled_by is None
|
||||
|
||||
# Verify Redis cache key was deleted
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_document_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of non-existent document.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing documents
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- No unnecessary index processor calls
|
||||
- Redis cache key not affected (since it was never created)
|
||||
"""
|
||||
# Arrange: Use non-existent document ID
|
||||
fake = Faker()
|
||||
non_existent_id = str(fake.uuid4())
|
||||
|
||||
# Act: Execute the task with non-existent document
|
||||
add_document_to_index_task(non_existent_id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
# Note: redis_client.delete is not called when document is not found
|
||||
# because indexing_cache_key is not defined in that case
|
||||
|
||||
def test_add_document_to_index_invalid_indexing_status(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of document with invalid indexing status.
|
||||
|
||||
This test verifies:
|
||||
- Early return when indexing_status is not "completed"
|
||||
- No index processing for documents not ready for indexing
|
||||
- Proper database session cleanup
|
||||
- No unnecessary external service calls
|
||||
- Redis cache key not affected
|
||||
"""
|
||||
# Arrange: Create test data with invalid indexing status
|
||||
_, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Set invalid indexing status
|
||||
document.indexing_status = "processing"
|
||||
db.session.commit()
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_add_document_to_index_dataset_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling when document's dataset doesn't exist.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling when dataset is missing
|
||||
- Document status is set to error
|
||||
- Document is disabled
|
||||
- Error information is recorded
|
||||
- Redis cache is cleared despite error
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Delete the dataset to simulate dataset not found scenario
|
||||
db.session.delete(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify error handling
|
||||
db.session.refresh(document)
|
||||
assert document.enabled is False
|
||||
assert document.indexing_status == "error"
|
||||
assert document.error is not None
|
||||
assert "doesn't exist" in document.error
|
||||
assert document.disabled_at is not None
|
||||
|
||||
# Verify no index processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
# Verify redis cache was cleared despite error
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_with_parent_child_structure(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test document indexing with parent-child structure.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling of PARENT_CHILD_INDEX type
|
||||
- Child document creation from segments
|
||||
- Correct document structure for parent-child indexing
|
||||
- Index processor receives properly structured documents
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data with parent-child index type
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Update document to use parent-child index type
|
||||
document.doc_form = IndexType.PARENT_CHILD_INDEX
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property reflects the updated document
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# Create segments with mock child chunks
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Mock the get_child_chunks method for each segment
|
||||
with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
|
||||
# Setup mock to return child chunks for each segment
|
||||
mock_child_chunks = []
|
||||
for i in range(2): # Each segment has 2 child chunks
|
||||
mock_child = MagicMock()
|
||||
mock_child.content = f"child_content_{i}"
|
||||
mock_child.index_node_id = f"child_node_{i}"
|
||||
mock_child.index_node_hash = f"child_hash_{i}"
|
||||
mock_child_chunks.append(mock_child)
|
||||
|
||||
mock_get_child_chunks.return_value = mock_child_chunks
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify parent-child index processing
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
|
||||
IndexType.PARENT_CHILD_INDEX
|
||||
)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3 # 3 segments
|
||||
|
||||
# Verify each document has children
|
||||
for doc in documents:
|
||||
assert hasattr(doc, "children")
|
||||
assert len(doc.children) == 2 # Each document has 2 children
|
||||
|
||||
# Verify database state changes
|
||||
db.session.refresh(document)
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is True
|
||||
assert segment.disabled_at is None
|
||||
assert segment.disabled_by is None
|
||||
|
||||
# Verify redis cache was cleared
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_with_already_enabled_segments(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test document indexing when segments are already enabled.
|
||||
|
||||
This test verifies:
|
||||
- Segments with status="completed" are processed regardless of enabled status
|
||||
- Index processing occurs with all completed segments
|
||||
- Auto disable log deletion still occurs
|
||||
- Redis cache is cleared
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Create segments that are already enabled
|
||||
fake = Faker()
|
||||
segments = []
|
||||
for i in range(3):
|
||||
segment = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=i,
|
||||
content=fake.text(max_nb_chars=200),
|
||||
word_count=len(fake.text(max_nb_chars=200).split()),
|
||||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id=f"node_{i}",
|
||||
index_node_hash=f"hash_{i}",
|
||||
enabled=True, # Already enabled
|
||||
status="completed",
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment)
|
||||
segments.append(segment)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify index processing occurred with all completed segments
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with all completed segments
|
||||
# (implementation doesn't filter by enabled status, only by status="completed")
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3 # All completed segments are processed
|
||||
|
||||
# Verify redis cache was cleared
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_auto_disable_log_deletion(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test that auto disable logs are properly deleted during indexing.
|
||||
|
||||
This test verifies:
|
||||
- Auto disable log entries are deleted for the document
|
||||
- Database state is properly managed
|
||||
- Index processing continues normally
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Create some auto disable log entries
|
||||
fake = Faker()
|
||||
auto_disable_logs = []
|
||||
for _ in range(2):
|
||||
log_entry = DatasetAutoDisableLog(
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
)
|
||||
log_entry.id = str(fake.uuid4())
|
||||
db.session.add(log_entry)
|
||||
auto_disable_logs.append(log_entry)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Verify logs exist before processing
|
||||
existing_logs = (
|
||||
db.session.query(DatasetAutoDisableLog).where(DatasetAutoDisableLog.document_id == document.id).all()
|
||||
)
|
||||
assert len(existing_logs) == 2
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify auto disable logs were deleted
|
||||
remaining_logs = (
|
||||
db.session.query(DatasetAutoDisableLog).where(DatasetAutoDisableLog.document_id == document.id).all()
|
||||
)
|
||||
assert len(remaining_logs) == 0
|
||||
|
||||
# Verify index processing occurred normally
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify segments were enabled
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is True
|
||||
|
||||
# Verify redis cache was cleared
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_general_exception_handling(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test general exception handling during indexing process.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions are properly caught and handled
|
||||
- Document status is set to error
|
||||
- Document is disabled
|
||||
- Error information is recorded
|
||||
- Redis cache is still cleared
|
||||
- Database session is properly closed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Mock the index processor to raise an exception
|
||||
mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify error handling
|
||||
db.session.refresh(document)
|
||||
assert document.enabled is False
|
||||
assert document.indexing_status == "error"
|
||||
assert document.error is not None
|
||||
assert "Index processing failed" in document.error
|
||||
assert document.disabled_at is not None
|
||||
|
||||
# Verify segments were not enabled due to error
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is False # Should remain disabled due to error
|
||||
|
||||
# Verify redis cache was still cleared despite error
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_segment_filtering_edge_cases(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test segment filtering with various edge cases.
|
||||
|
||||
This test verifies:
|
||||
- Only segments with status="completed" are processed (regardless of enabled status)
|
||||
- Segments with status!="completed" are NOT processed
|
||||
- Segments are ordered by position correctly
|
||||
- Mixed segment states are handled properly
|
||||
- All segments are updated to enabled=True after processing
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Create segments with mixed states
|
||||
fake = Faker()
|
||||
segments = []
|
||||
|
||||
# Segment 1: Should be processed (enabled=False, status="completed")
|
||||
segment1 = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=0,
|
||||
content=fake.text(max_nb_chars=200),
|
||||
word_count=len(fake.text(max_nb_chars=200).split()),
|
||||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id="node_0",
|
||||
index_node_hash="hash_0",
|
||||
enabled=False,
|
||||
status="completed",
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment1)
|
||||
segments.append(segment1)
|
||||
|
||||
# Segment 2: Should be processed (enabled=True, status="completed")
|
||||
# Note: Implementation doesn't filter by enabled status, only by status="completed"
|
||||
segment2 = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=1,
|
||||
content=fake.text(max_nb_chars=200),
|
||||
word_count=len(fake.text(max_nb_chars=200).split()),
|
||||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id="node_1",
|
||||
index_node_hash="hash_1",
|
||||
enabled=True, # Already enabled, but will still be processed
|
||||
status="completed",
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment2)
|
||||
segments.append(segment2)
|
||||
|
||||
# Segment 3: Should NOT be processed (enabled=False, status="processing")
|
||||
segment3 = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=2,
|
||||
content=fake.text(max_nb_chars=200),
|
||||
word_count=len(fake.text(max_nb_chars=200).split()),
|
||||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id="node_2",
|
||||
index_node_hash="hash_2",
|
||||
enabled=False,
|
||||
status="processing", # Not completed
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment3)
|
||||
segments.append(segment3)
|
||||
|
||||
# Segment 4: Should be processed (enabled=False, status="completed")
|
||||
segment4 = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=3,
|
||||
content=fake.text(max_nb_chars=200),
|
||||
word_count=len(fake.text(max_nb_chars=200).split()),
|
||||
tokens=len(fake.text(max_nb_chars=200).split()) * 2,
|
||||
index_node_id="node_3",
|
||||
index_node_hash="hash_3",
|
||||
enabled=False,
|
||||
status="completed",
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment4)
|
||||
segments.append(segment4)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify only eligible segments were processed
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3 # 3 segments with status="completed" should be processed
|
||||
|
||||
# Verify correct segments were processed (by position order)
|
||||
# Segments 1, 2, 4 should be processed (positions 0, 1, 3)
|
||||
# Segment 3 is skipped (position 2, status="processing")
|
||||
assert documents[0].metadata["doc_id"] == "node_0" # segment1, position 0
|
||||
assert documents[1].metadata["doc_id"] == "node_1" # segment2, position 1
|
||||
assert documents[2].metadata["doc_id"] == "node_3" # segment4, position 3
|
||||
|
||||
# Verify database state changes
|
||||
db.session.refresh(document)
|
||||
db.session.refresh(segment1)
|
||||
db.session.refresh(segment2)
|
||||
db.session.refresh(segment3)
|
||||
db.session.refresh(segment4)
|
||||
|
||||
# All segments should be enabled because the task updates ALL segments for the document
|
||||
assert segment1.enabled is True
|
||||
assert segment2.enabled is True # Was already enabled, stays True
|
||||
assert segment3.enabled is True # Was not processed but still updated to True
|
||||
assert segment4.enabled is True
|
||||
|
||||
# Verify redis cache was cleared
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_add_document_to_index_comprehensive_error_scenarios(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test comprehensive error scenarios and recovery.
|
||||
|
||||
This test verifies:
|
||||
- Multiple types of exceptions are handled properly
|
||||
- Error state is consistently managed
|
||||
- Resource cleanup occurs in all error cases
|
||||
- Database session management is robust
|
||||
- Redis cache key deletion in all scenarios
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Test different exception types
|
||||
test_exceptions = [
|
||||
("Database connection error", Exception("Database connection failed")),
|
||||
("Index processor error", RuntimeError("Index processor initialization failed")),
|
||||
("Memory error", MemoryError("Out of memory")),
|
||||
("Value error", ValueError("Invalid index type")),
|
||||
]
|
||||
|
||||
for error_name, exception in test_exceptions:
|
||||
# Reset mocks for each test
|
||||
mock_external_service_dependencies["index_processor"].load.side_effect = exception
|
||||
|
||||
# Reset document state
|
||||
document.enabled = True
|
||||
document.indexing_status = "completed"
|
||||
document.error = None
|
||||
document.disabled_at = None
|
||||
db.session.commit()
|
||||
|
||||
# Set up Redis cache key
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
add_document_to_index_task(document.id)
|
||||
|
||||
# Assert: Verify consistent error handling
|
||||
db.session.refresh(document)
|
||||
assert document.enabled is False, f"Document should be disabled for {error_name}"
|
||||
assert document.indexing_status == "error", f"Document status should be error for {error_name}"
|
||||
assert document.error is not None, f"Error should be recorded for {error_name}"
|
||||
assert str(exception) in document.error, f"Error message should contain exception for {error_name}"
|
||||
assert document.disabled_at is not None, f"Disabled timestamp should be set for {error_name}"
|
||||
|
||||
# Verify segments remain disabled due to error
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is False, f"Segments should remain disabled for {error_name}"
|
||||
|
||||
# Verify redis cache was still cleared despite error
|
||||
assert redis_client.exists(indexing_cache_key) == 0, f"Redis cache should be cleared for {error_name}"
|
||||
@@ -0,0 +1,720 @@
|
||||
"""
|
||||
Integration tests for batch_clean_document_task using testcontainers.
|
||||
|
||||
This module tests the batch document cleaning functionality with real database
|
||||
and storage containers to ensure proper cleanup of documents, segments, and files.
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from extensions.ext_database import db
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from models.model import UploadFile
|
||||
from tasks.batch_clean_document_task import batch_clean_document_task
|
||||
|
||||
|
||||
class TestBatchCleanDocumentTask:
|
||||
"""Integration tests for batch_clean_document_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("extensions.ext_storage.storage") as mock_storage,
|
||||
patch("core.rag.index_processor.index_processor_factory.IndexProcessorFactory") as mock_index_factory,
|
||||
patch("core.tools.utils.web_reader_tool.get_image_upload_file_ids") as mock_get_image_ids,
|
||||
):
|
||||
# Setup default mock returns
|
||||
mock_storage.delete.return_value = None
|
||||
|
||||
# Mock index processor
|
||||
mock_index_processor = Mock()
|
||||
mock_index_processor.clean.return_value = None
|
||||
mock_index_factory.return_value.init_index_processor.return_value = mock_index_processor
|
||||
|
||||
# Mock image file ID extraction
|
||||
mock_get_image_ids.return_value = []
|
||||
|
||||
yield {
|
||||
"storage": mock_storage,
|
||||
"index_factory": mock_index_factory,
|
||||
"index_processor": mock_index_processor,
|
||||
"get_image_ids": mock_get_image_ids,
|
||||
}
|
||||
|
||||
def _create_test_account(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create a test account for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
Account: Created account instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant for the account
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Set current tenant for account
|
||||
account.current_tenant = tenant
|
||||
|
||||
return account
|
||||
|
||||
def _create_test_dataset(self, db_session_with_containers, account):
|
||||
"""
|
||||
Helper method to create a test dataset for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
account: Account instance
|
||||
|
||||
Returns:
|
||||
Dataset: Created dataset instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
dataset = Dataset(
|
||||
id=str(uuid.uuid4()),
|
||||
tenant_id=account.current_tenant.id,
|
||||
name=fake.word(),
|
||||
description=fake.sentence(),
|
||||
data_source_type="upload_file",
|
||||
created_by=account.id,
|
||||
embedding_model="text-embedding-ada-002",
|
||||
embedding_model_provider="openai",
|
||||
)
|
||||
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
return dataset
|
||||
|
||||
def _create_test_document(self, db_session_with_containers, dataset, account):
|
||||
"""
|
||||
Helper method to create a test document for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
dataset: Dataset instance
|
||||
account: Account instance
|
||||
|
||||
Returns:
|
||||
Document: Created document instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
document = Document(
|
||||
id=str(uuid.uuid4()),
|
||||
tenant_id=account.current_tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=0,
|
||||
name=fake.word(),
|
||||
data_source_type="upload_file",
|
||||
data_source_info=json.dumps({"upload_file_id": str(uuid.uuid4())}),
|
||||
batch="test_batch",
|
||||
created_from="test",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
doc_form="text_model",
|
||||
)
|
||||
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
return document
|
||||
|
||||
def _create_test_document_segment(self, db_session_with_containers, document, account):
|
||||
"""
|
||||
Helper method to create a test document segment for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
document: Document instance
|
||||
account: Account instance
|
||||
|
||||
Returns:
|
||||
DocumentSegment: Created document segment instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
segment = DocumentSegment(
|
||||
id=str(uuid.uuid4()),
|
||||
tenant_id=account.current_tenant.id,
|
||||
dataset_id=document.dataset_id,
|
||||
document_id=document.id,
|
||||
position=0,
|
||||
content=fake.text(),
|
||||
word_count=100,
|
||||
tokens=50,
|
||||
index_node_id=str(uuid.uuid4()),
|
||||
created_by=account.id,
|
||||
status="completed",
|
||||
)
|
||||
|
||||
db.session.add(segment)
|
||||
db.session.commit()
|
||||
|
||||
return segment
|
||||
|
||||
def _create_test_upload_file(self, db_session_with_containers, account):
|
||||
"""
|
||||
Helper method to create a test upload file for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
account: Account instance
|
||||
|
||||
Returns:
|
||||
UploadFile: Created upload file instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
from models.enums import CreatorUserRole
|
||||
|
||||
upload_file = UploadFile(
|
||||
tenant_id=account.current_tenant.id,
|
||||
storage_type="local",
|
||||
key=f"test_files/{fake.file_name()}",
|
||||
name=fake.file_name(),
|
||||
size=1024,
|
||||
extension="txt",
|
||||
mime_type="text/plain",
|
||||
created_by_role=CreatorUserRole.ACCOUNT,
|
||||
created_by=account.id,
|
||||
created_at=naive_utc_now(),
|
||||
used=False,
|
||||
)
|
||||
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
|
||||
return upload_file
|
||||
|
||||
def test_batch_clean_document_task_successful_cleanup(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful cleanup of documents with segments and files.
|
||||
|
||||
This test verifies that the task properly cleans up:
|
||||
- Document segments from the index
|
||||
- Associated image files from storage
|
||||
- Upload files from storage and database
|
||||
"""
|
||||
# Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
segment = self._create_test_document_segment(db_session_with_containers, document, account)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account)
|
||||
|
||||
# Update document to reference the upload file
|
||||
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
|
||||
db.session.commit()
|
||||
|
||||
# Store original IDs for verification
|
||||
document_id = document.id
|
||||
segment_id = segment.id
|
||||
file_id = upload_file.id
|
||||
|
||||
# Execute the task
|
||||
batch_clean_document_task(
|
||||
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
|
||||
)
|
||||
|
||||
# Verify that the task completed successfully
|
||||
# The task should have processed the segment and cleaned up the database
|
||||
|
||||
# Verify database cleanup
|
||||
db.session.commit() # Ensure all changes are committed
|
||||
|
||||
# Check that segment is deleted
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
# Check that upload file is deleted
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
def test_batch_clean_document_task_with_image_files(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup of documents containing image references.
|
||||
|
||||
This test verifies that the task properly handles documents with
|
||||
image content and cleans up associated segments.
|
||||
"""
|
||||
# Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
|
||||
# Create segment with simple content (no image references)
|
||||
segment = DocumentSegment(
|
||||
id=str(uuid.uuid4()),
|
||||
tenant_id=account.current_tenant.id,
|
||||
dataset_id=document.dataset_id,
|
||||
document_id=document.id,
|
||||
position=0,
|
||||
content="Simple text content without images",
|
||||
word_count=100,
|
||||
tokens=50,
|
||||
index_node_id=str(uuid.uuid4()),
|
||||
created_by=account.id,
|
||||
status="completed",
|
||||
)
|
||||
|
||||
db.session.add(segment)
|
||||
db.session.commit()
|
||||
|
||||
# Store original IDs for verification
|
||||
segment_id = segment.id
|
||||
document_id = document.id
|
||||
|
||||
# Execute the task
|
||||
batch_clean_document_task(
|
||||
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[]
|
||||
)
|
||||
|
||||
# Verify database cleanup
|
||||
db.session.commit()
|
||||
|
||||
# Check that segment is deleted
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
# Verify that the task completed successfully by checking the log output
|
||||
# The task should have processed the segment and cleaned up the database
|
||||
|
||||
def test_batch_clean_document_task_no_segments(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup when document has no segments.
|
||||
|
||||
This test verifies that the task handles documents without segments
|
||||
gracefully and still cleans up associated files.
|
||||
"""
|
||||
# Create test data without segments
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account)
|
||||
|
||||
# Update document to reference the upload file
|
||||
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
|
||||
db.session.commit()
|
||||
|
||||
# Store original IDs for verification
|
||||
document_id = document.id
|
||||
file_id = upload_file.id
|
||||
|
||||
# Execute the task
|
||||
batch_clean_document_task(
|
||||
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
|
||||
)
|
||||
|
||||
# Verify that the task completed successfully
|
||||
# Since there are no segments, the task should handle this gracefully
|
||||
|
||||
# Verify database cleanup
|
||||
db.session.commit()
|
||||
|
||||
# Check that upload file is deleted
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
# Verify database cleanup
|
||||
db.session.commit()
|
||||
|
||||
# Check that upload file is deleted
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
def test_batch_clean_document_task_dataset_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup when dataset is not found.
|
||||
|
||||
This test verifies that the task properly handles the case where
|
||||
the specified dataset does not exist in the database.
|
||||
"""
|
||||
# Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
|
||||
# Store original IDs for verification
|
||||
document_id = document.id
|
||||
dataset_id = dataset.id
|
||||
|
||||
# Delete the dataset to simulate not found scenario
|
||||
db.session.delete(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Execute the task with non-existent dataset
|
||||
batch_clean_document_task(document_ids=[document_id], dataset_id=dataset_id, doc_form="text_model", file_ids=[])
|
||||
|
||||
# Verify that no index processing occurred
|
||||
mock_external_service_dependencies["index_processor"].clean.assert_not_called()
|
||||
|
||||
# Verify that no storage operations occurred
|
||||
mock_external_service_dependencies["storage"].delete.assert_not_called()
|
||||
|
||||
# Verify that no database cleanup occurred
|
||||
db.session.commit()
|
||||
|
||||
# Document should still exist since cleanup failed
|
||||
existing_document = db.session.query(Document).filter_by(id=document_id).first()
|
||||
assert existing_document is not None
|
||||
|
||||
def test_batch_clean_document_task_storage_cleanup_failure(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup when storage operations fail.
|
||||
|
||||
This test verifies that the task continues processing even when
|
||||
storage cleanup operations fail, ensuring database cleanup still occurs.
|
||||
"""
|
||||
# Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
segment = self._create_test_document_segment(db_session_with_containers, document, account)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account)
|
||||
|
||||
# Update document to reference the upload file
|
||||
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
|
||||
db.session.commit()
|
||||
|
||||
# Store original IDs for verification
|
||||
document_id = document.id
|
||||
segment_id = segment.id
|
||||
file_id = upload_file.id
|
||||
|
||||
# Mock storage.delete to raise an exception
|
||||
mock_external_service_dependencies["storage"].delete.side_effect = Exception("Storage error")
|
||||
|
||||
# Execute the task
|
||||
batch_clean_document_task(
|
||||
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
|
||||
)
|
||||
|
||||
# Verify that the task completed successfully despite storage failure
|
||||
# The task should continue processing even when storage operations fail
|
||||
|
||||
# Verify database cleanup still occurred despite storage failure
|
||||
db.session.commit()
|
||||
|
||||
# Check that segment is deleted from database
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
# Check that upload file is deleted from database
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
def test_batch_clean_document_task_multiple_documents(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup of multiple documents in a single batch operation.
|
||||
|
||||
This test verifies that the task can handle multiple documents
|
||||
efficiently and cleans up all associated resources.
|
||||
"""
|
||||
# Create test data for multiple documents
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
|
||||
documents = []
|
||||
segments = []
|
||||
upload_files = []
|
||||
|
||||
# Create 3 documents with segments and files
|
||||
for i in range(3):
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
segment = self._create_test_document_segment(db_session_with_containers, document, account)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account)
|
||||
|
||||
# Update document to reference the upload file
|
||||
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
|
||||
|
||||
documents.append(document)
|
||||
segments.append(segment)
|
||||
upload_files.append(upload_file)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Store original IDs for verification
|
||||
document_ids = [doc.id for doc in documents]
|
||||
segment_ids = [seg.id for seg in segments]
|
||||
file_ids = [file.id for file in upload_files]
|
||||
|
||||
# Execute the task with multiple documents
|
||||
batch_clean_document_task(
|
||||
document_ids=document_ids, dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=file_ids
|
||||
)
|
||||
|
||||
# Verify that the task completed successfully for all documents
|
||||
# The task should process all documents and clean up all associated resources
|
||||
|
||||
# Verify database cleanup for all resources
|
||||
db.session.commit()
|
||||
|
||||
# Check that all segments are deleted
|
||||
for segment_id in segment_ids:
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
# Check that all upload files are deleted
|
||||
for file_id in file_ids:
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
def test_batch_clean_document_task_different_doc_forms(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup with different document form types.
|
||||
|
||||
This test verifies that the task properly handles different
|
||||
document form types and creates the appropriate index processor.
|
||||
"""
|
||||
# Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
|
||||
# Test different doc_form types
|
||||
doc_forms = ["text_model", "qa_model", "hierarchical_model"]
|
||||
|
||||
for doc_form in doc_forms:
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
db.session.commit()
|
||||
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
# Update document doc_form
|
||||
document.doc_form = doc_form
|
||||
db.session.commit()
|
||||
|
||||
segment = self._create_test_document_segment(db_session_with_containers, document, account)
|
||||
|
||||
# Store the ID before the object is deleted
|
||||
segment_id = segment.id
|
||||
|
||||
try:
|
||||
# Execute the task
|
||||
batch_clean_document_task(
|
||||
document_ids=[document.id], dataset_id=dataset.id, doc_form=doc_form, file_ids=[]
|
||||
)
|
||||
|
||||
# Verify that the task completed successfully for this doc_form
|
||||
# The task should handle different document forms correctly
|
||||
|
||||
# Verify database cleanup
|
||||
db.session.commit()
|
||||
|
||||
# Check that segment is deleted
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
except Exception as e:
|
||||
# If the task fails due to external service issues (e.g., plugin daemon),
|
||||
# we should still verify that the database state is consistent
|
||||
# This is a common scenario in test environments where external services may not be available
|
||||
db.session.commit()
|
||||
|
||||
# Check if the segment still exists (task may have failed before deletion)
|
||||
existing_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
if existing_segment is not None:
|
||||
# If segment still exists, the task failed before deletion
|
||||
# This is acceptable in test environments with external service issues
|
||||
pass
|
||||
else:
|
||||
# If segment was deleted, the task succeeded
|
||||
pass
|
||||
|
||||
def test_batch_clean_document_task_large_batch_performance(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test cleanup performance with a large batch of documents.
|
||||
|
||||
This test verifies that the task can handle large batches efficiently
|
||||
and maintains performance characteristics.
|
||||
"""
|
||||
import time
|
||||
|
||||
# Create test data for large batch
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
|
||||
documents = []
|
||||
segments = []
|
||||
upload_files = []
|
||||
|
||||
# Create 10 documents with segments and files (larger batch)
|
||||
batch_size = 10
|
||||
for i in range(batch_size):
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
segment = self._create_test_document_segment(db_session_with_containers, document, account)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account)
|
||||
|
||||
# Update document to reference the upload file
|
||||
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
|
||||
|
||||
documents.append(document)
|
||||
segments.append(segment)
|
||||
upload_files.append(upload_file)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Store original IDs for verification
|
||||
document_ids = [doc.id for doc in documents]
|
||||
segment_ids = [seg.id for seg in segments]
|
||||
file_ids = [file.id for file in upload_files]
|
||||
|
||||
# Measure execution time
|
||||
start_time = time.perf_counter()
|
||||
|
||||
# Execute the task with large batch
|
||||
batch_clean_document_task(
|
||||
document_ids=document_ids, dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=file_ids
|
||||
)
|
||||
|
||||
end_time = time.perf_counter()
|
||||
execution_time = end_time - start_time
|
||||
|
||||
# Verify performance characteristics (should complete within reasonable time)
|
||||
assert execution_time < 5.0 # Should complete within 5 seconds
|
||||
|
||||
# Verify that the task completed successfully for the large batch
|
||||
# The task should handle large batches efficiently
|
||||
|
||||
# Verify database cleanup for all resources
|
||||
db.session.commit()
|
||||
|
||||
# Check that all segments are deleted
|
||||
for segment_id in segment_ids:
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
# Check that all upload files are deleted
|
||||
for file_id in file_ids:
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
def test_batch_clean_document_task_integration_with_real_database(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test full integration with real database operations.
|
||||
|
||||
This test verifies that the task integrates properly with the
|
||||
actual database and maintains data consistency throughout the process.
|
||||
"""
|
||||
# Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account)
|
||||
|
||||
# Create document with complex structure
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account)
|
||||
|
||||
# Create multiple segments for the document
|
||||
segments = []
|
||||
for i in range(3):
|
||||
segment = DocumentSegment(
|
||||
id=str(uuid.uuid4()),
|
||||
tenant_id=account.current_tenant.id,
|
||||
dataset_id=document.dataset_id,
|
||||
document_id=document.id,
|
||||
position=i,
|
||||
content=f"Segment content {i} with some text",
|
||||
word_count=50 + i * 10,
|
||||
tokens=25 + i * 5,
|
||||
index_node_id=str(uuid.uuid4()),
|
||||
created_by=account.id,
|
||||
status="completed",
|
||||
)
|
||||
segments.append(segment)
|
||||
|
||||
# Create upload file
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account)
|
||||
|
||||
# Update document to reference the upload file
|
||||
document.data_source_info = json.dumps({"upload_file_id": upload_file.id})
|
||||
|
||||
# Add all to database
|
||||
for segment in segments:
|
||||
db.session.add(segment)
|
||||
db.session.commit()
|
||||
|
||||
# Verify initial state
|
||||
assert db.session.query(DocumentSegment).filter_by(document_id=document.id).count() == 3
|
||||
assert db.session.query(UploadFile).filter_by(id=upload_file.id).first() is not None
|
||||
|
||||
# Store original IDs for verification
|
||||
document_id = document.id
|
||||
segment_ids = [seg.id for seg in segments]
|
||||
file_id = upload_file.id
|
||||
|
||||
# Execute the task
|
||||
batch_clean_document_task(
|
||||
document_ids=[document_id], dataset_id=dataset.id, doc_form=dataset.doc_form, file_ids=[file_id]
|
||||
)
|
||||
|
||||
# Verify that the task completed successfully
|
||||
# The task should process all segments and clean up all associated resources
|
||||
|
||||
# Verify database cleanup
|
||||
db.session.commit()
|
||||
|
||||
# Check that all segments are deleted
|
||||
for segment_id in segment_ids:
|
||||
deleted_segment = db.session.query(DocumentSegment).filter_by(id=segment_id).first()
|
||||
assert deleted_segment is None
|
||||
|
||||
# Check that upload file is deleted
|
||||
deleted_file = db.session.query(UploadFile).filter_by(id=file_id).first()
|
||||
assert deleted_file is None
|
||||
|
||||
# Verify final database state
|
||||
assert db.session.query(DocumentSegment).filter_by(document_id=document_id).count() == 0
|
||||
assert db.session.query(UploadFile).filter_by(id=file_id).first() is None
|
||||
@@ -0,0 +1,737 @@
|
||||
"""
|
||||
Integration tests for batch_create_segment_to_index_task using testcontainers.
|
||||
|
||||
This module provides comprehensive integration tests for the batch segment creation
|
||||
and indexing task using TestContainers infrastructure. The tests ensure that the
|
||||
task properly processes CSV files, creates document segments, and establishes
|
||||
vector indexes in a real database environment.
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing scenarios with actual PostgreSQL and Redis instances.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from models.enums import CreatorUserRole
|
||||
from models.model import UploadFile
|
||||
from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
|
||||
|
||||
|
||||
class TestBatchCreateSegmentToIndexTask:
|
||||
"""Integration tests for batch_create_segment_to_index_task using testcontainers."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_database(self, db_session_with_containers):
|
||||
"""Clean up database before each test to ensure isolation."""
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
# Clear all test data
|
||||
db.session.query(DocumentSegment).delete()
|
||||
db.session.query(Document).delete()
|
||||
db.session.query(Dataset).delete()
|
||||
db.session.query(UploadFile).delete()
|
||||
db.session.query(TenantAccountJoin).delete()
|
||||
db.session.query(Tenant).delete()
|
||||
db.session.query(Account).delete()
|
||||
db.session.commit()
|
||||
|
||||
# Clear Redis cache
|
||||
redis_client.flushdb()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.batch_create_segment_to_index_task.storage") as mock_storage,
|
||||
patch("tasks.batch_create_segment_to_index_task.ModelManager") as mock_model_manager,
|
||||
patch("tasks.batch_create_segment_to_index_task.VectorService") as mock_vector_service,
|
||||
):
|
||||
# Setup default mock returns
|
||||
mock_storage.download.return_value = None
|
||||
|
||||
# Mock embedding model for high quality indexing
|
||||
mock_embedding_model = MagicMock()
|
||||
mock_embedding_model.get_text_embedding_num_tokens.return_value = [10, 15, 20]
|
||||
mock_model_manager_instance = MagicMock()
|
||||
mock_model_manager_instance.get_model_instance.return_value = mock_embedding_model
|
||||
mock_model_manager.return_value = mock_model_manager_instance
|
||||
|
||||
# Mock vector service
|
||||
mock_vector_service.create_segments_vector.return_value = None
|
||||
|
||||
yield {
|
||||
"storage": mock_storage,
|
||||
"model_manager": mock_model_manager,
|
||||
"vector_service": mock_vector_service,
|
||||
"embedding_model": mock_embedding_model,
|
||||
}
|
||||
|
||||
def _create_test_account_and_tenant(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create a test account and tenant for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
tuple: (Account, Tenant) created instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant for the account
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Set current tenant for account
|
||||
account.current_tenant = tenant
|
||||
|
||||
return account, tenant
|
||||
|
||||
def _create_test_dataset(self, db_session_with_containers, account, tenant):
|
||||
"""
|
||||
Helper method to create a test dataset for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
account: Account instance
|
||||
tenant: Tenant instance
|
||||
|
||||
Returns:
|
||||
Dataset: Created dataset instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
embedding_model="text-embedding-ada-002",
|
||||
embedding_model_provider="openai",
|
||||
created_by=account.id,
|
||||
)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
return dataset
|
||||
|
||||
def _create_test_document(self, db_session_with_containers, account, tenant, dataset):
|
||||
"""
|
||||
Helper method to create a test document for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
account: Account instance
|
||||
tenant: Tenant instance
|
||||
dataset: Dataset instance
|
||||
|
||||
Returns:
|
||||
Document: Created document instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
document = Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=True,
|
||||
archived=False,
|
||||
doc_form="text_model",
|
||||
word_count=0,
|
||||
)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
return document
|
||||
|
||||
def _create_test_upload_file(self, db_session_with_containers, account, tenant):
|
||||
"""
|
||||
Helper method to create a test upload file for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
account: Account instance
|
||||
tenant: Tenant instance
|
||||
|
||||
Returns:
|
||||
UploadFile: Created upload file instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
upload_file = UploadFile(
|
||||
tenant_id=tenant.id,
|
||||
storage_type="local",
|
||||
key=f"test_files/{fake.file_name()}",
|
||||
name=fake.file_name(),
|
||||
size=1024,
|
||||
extension=".csv",
|
||||
mime_type="text/csv",
|
||||
created_by_role=CreatorUserRole.ACCOUNT,
|
||||
created_by=account.id,
|
||||
created_at=datetime.now(),
|
||||
used=False,
|
||||
)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(upload_file)
|
||||
db.session.commit()
|
||||
|
||||
return upload_file
|
||||
|
||||
def _create_test_csv_content(self, content_type="text_model"):
|
||||
"""
|
||||
Helper method to create test CSV content.
|
||||
|
||||
Args:
|
||||
content_type: Type of content to create ("text_model" or "qa_model")
|
||||
|
||||
Returns:
|
||||
str: CSV content as string
|
||||
"""
|
||||
if content_type == "qa_model":
|
||||
csv_content = "content,answer\n"
|
||||
csv_content += "This is the first segment content,This is the first answer\n"
|
||||
csv_content += "This is the second segment content,This is the second answer\n"
|
||||
csv_content += "This is the third segment content,This is the third answer\n"
|
||||
else:
|
||||
csv_content = "content\n"
|
||||
csv_content += "This is the first segment content\n"
|
||||
csv_content += "This is the second segment content\n"
|
||||
csv_content += "This is the third segment content\n"
|
||||
|
||||
return csv_content
|
||||
|
||||
def test_batch_create_segment_to_index_task_success_text_model(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful batch creation of segments for text model documents.
|
||||
|
||||
This test verifies that the task can successfully:
|
||||
1. Process a CSV file with text content
|
||||
2. Create document segments with proper metadata
|
||||
3. Update document word count
|
||||
4. Create vector indexes
|
||||
5. Set Redis cache status
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
|
||||
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||
|
||||
# Create CSV content
|
||||
csv_content = self._create_test_csv_content("text_model")
|
||||
|
||||
# Mock storage to return our CSV content
|
||||
mock_storage = mock_external_service_dependencies["storage"]
|
||||
|
||||
def mock_download(key, file_path):
|
||||
Path(file_path).write_text(csv_content, encoding="utf-8")
|
||||
|
||||
mock_storage.download.side_effect = mock_download
|
||||
|
||||
# Execute the task
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=upload_file.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify results
|
||||
from extensions.ext_database import db
|
||||
|
||||
# Check that segments were created
|
||||
segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter_by(document_id=document.id)
|
||||
.order_by(DocumentSegment.position)
|
||||
.all()
|
||||
)
|
||||
assert len(segments) == 3
|
||||
|
||||
# Verify segment content and metadata
|
||||
for i, segment in enumerate(segments):
|
||||
assert segment.tenant_id == tenant.id
|
||||
assert segment.dataset_id == dataset.id
|
||||
assert segment.document_id == document.id
|
||||
assert segment.position == i + 1
|
||||
assert segment.status == "completed"
|
||||
assert segment.indexing_at is not None
|
||||
assert segment.completed_at is not None
|
||||
assert segment.answer is None # text_model doesn't have answers
|
||||
|
||||
# Check that document word count was updated
|
||||
db.session.refresh(document)
|
||||
assert document.word_count > 0
|
||||
|
||||
# Verify vector service was called
|
||||
mock_vector_service = mock_external_service_dependencies["vector_service"]
|
||||
mock_vector_service.create_segments_vector.assert_called_once()
|
||||
|
||||
# Check Redis cache was set
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"completed"
|
||||
|
||||
def test_batch_create_segment_to_index_task_dataset_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task failure when dataset does not exist.
|
||||
|
||||
This test verifies that the task properly handles error cases:
|
||||
1. Fails gracefully when dataset is not found
|
||||
2. Sets appropriate Redis cache status
|
||||
3. Logs error information
|
||||
4. Maintains database integrity
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||
|
||||
# Use non-existent IDs
|
||||
non_existent_dataset_id = str(uuid.uuid4())
|
||||
non_existent_document_id = str(uuid.uuid4())
|
||||
|
||||
# Execute the task with non-existent dataset
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=upload_file.id,
|
||||
dataset_id=non_existent_dataset_id,
|
||||
document_id=non_existent_document_id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify error handling
|
||||
# Check Redis cache was set to error status
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"error"
|
||||
|
||||
# Verify no segments were created (since dataset doesn't exist)
|
||||
from extensions.ext_database import db
|
||||
|
||||
segments = db.session.query(DocumentSegment).all()
|
||||
assert len(segments) == 0
|
||||
|
||||
# Verify no documents were modified
|
||||
documents = db.session.query(Document).all()
|
||||
assert len(documents) == 0
|
||||
|
||||
def test_batch_create_segment_to_index_task_document_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task failure when document does not exist.
|
||||
|
||||
This test verifies that the task properly handles error cases:
|
||||
1. Fails gracefully when document is not found
|
||||
2. Sets appropriate Redis cache status
|
||||
3. Maintains database integrity
|
||||
4. Logs appropriate error information
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||
|
||||
# Use non-existent document ID
|
||||
non_existent_document_id = str(uuid.uuid4())
|
||||
|
||||
# Execute the task with non-existent document
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=upload_file.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=non_existent_document_id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify error handling
|
||||
# Check Redis cache was set to error status
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"error"
|
||||
|
||||
# Verify no segments were created
|
||||
from extensions.ext_database import db
|
||||
|
||||
segments = db.session.query(DocumentSegment).all()
|
||||
assert len(segments) == 0
|
||||
|
||||
# Verify dataset remains unchanged (no segments were added to the dataset)
|
||||
db.session.refresh(dataset)
|
||||
segments_for_dataset = db.session.query(DocumentSegment).filter_by(dataset_id=dataset.id).all()
|
||||
assert len(segments_for_dataset) == 0
|
||||
|
||||
def test_batch_create_segment_to_index_task_document_not_available(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task failure when document is not available for indexing.
|
||||
|
||||
This test verifies that the task properly handles error cases:
|
||||
1. Fails when document is disabled
|
||||
2. Fails when document is archived
|
||||
3. Fails when document indexing status is not completed
|
||||
4. Sets appropriate Redis cache status
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||
|
||||
# Create document with various unavailable states
|
||||
test_cases = [
|
||||
# Disabled document
|
||||
Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name="disabled_document",
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=False, # Document is disabled
|
||||
archived=False,
|
||||
doc_form="text_model",
|
||||
word_count=0,
|
||||
),
|
||||
# Archived document
|
||||
Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=2,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name="archived_document",
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=True,
|
||||
archived=True, # Document is archived
|
||||
doc_form="text_model",
|
||||
word_count=0,
|
||||
),
|
||||
# Document with incomplete indexing
|
||||
Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=3,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name="incomplete_document",
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="indexing", # Not completed
|
||||
enabled=True,
|
||||
archived=False,
|
||||
doc_form="text_model",
|
||||
word_count=0,
|
||||
),
|
||||
]
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
for document in test_cases:
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
# Test each unavailable document
|
||||
for document in test_cases:
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=upload_file.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify error handling for each case
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"error"
|
||||
|
||||
# Verify no segments were created
|
||||
segments = db.session.query(DocumentSegment).filter_by(document_id=document.id).all()
|
||||
assert len(segments) == 0
|
||||
|
||||
def test_batch_create_segment_to_index_task_upload_file_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task failure when upload file does not exist.
|
||||
|
||||
This test verifies that the task properly handles error cases:
|
||||
1. Fails gracefully when upload file is not found
|
||||
2. Sets appropriate Redis cache status
|
||||
3. Maintains database integrity
|
||||
4. Logs appropriate error information
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
|
||||
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
|
||||
|
||||
# Use non-existent upload file ID
|
||||
non_existent_upload_file_id = str(uuid.uuid4())
|
||||
|
||||
# Execute the task with non-existent upload file
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=non_existent_upload_file_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify error handling
|
||||
# Check Redis cache was set to error status
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"error"
|
||||
|
||||
# Verify no segments were created
|
||||
from extensions.ext_database import db
|
||||
|
||||
segments = db.session.query(DocumentSegment).all()
|
||||
assert len(segments) == 0
|
||||
|
||||
# Verify document remains unchanged
|
||||
db.session.refresh(document)
|
||||
assert document.word_count == 0
|
||||
|
||||
def test_batch_create_segment_to_index_task_empty_csv_file(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test task failure when CSV file is empty.
|
||||
|
||||
This test verifies that the task properly handles error cases:
|
||||
1. Fails when CSV file contains no data
|
||||
2. Sets appropriate Redis cache status
|
||||
3. Maintains database integrity
|
||||
4. Logs appropriate error information
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
|
||||
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||
|
||||
# Create empty CSV content
|
||||
empty_csv_content = "content\n" # Only header, no data rows
|
||||
|
||||
# Mock storage to return empty CSV content
|
||||
mock_storage = mock_external_service_dependencies["storage"]
|
||||
|
||||
def mock_download(key, file_path):
|
||||
Path(file_path).write_text(empty_csv_content, encoding="utf-8")
|
||||
|
||||
mock_storage.download.side_effect = mock_download
|
||||
|
||||
# Execute the task
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=upload_file.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify error handling
|
||||
# Check Redis cache was set to error status
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"error"
|
||||
|
||||
# Verify no segments were created
|
||||
from extensions.ext_database import db
|
||||
|
||||
segments = db.session.query(DocumentSegment).all()
|
||||
assert len(segments) == 0
|
||||
|
||||
# Verify document remains unchanged
|
||||
db.session.refresh(document)
|
||||
assert document.word_count == 0
|
||||
|
||||
def test_batch_create_segment_to_index_task_position_calculation(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test proper position calculation for segments when existing segments exist.
|
||||
|
||||
This test verifies that the task correctly:
|
||||
1. Calculates positions for new segments based on existing ones
|
||||
2. Handles position increment logic properly
|
||||
3. Maintains proper segment ordering
|
||||
4. Works with existing segment data
|
||||
"""
|
||||
# Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, tenant)
|
||||
document = self._create_test_document(db_session_with_containers, account, tenant, dataset)
|
||||
upload_file = self._create_test_upload_file(db_session_with_containers, account, tenant)
|
||||
|
||||
# Create existing segments to test position calculation
|
||||
existing_segments = []
|
||||
for i in range(3):
|
||||
segment = DocumentSegment(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=i + 1,
|
||||
content=f"Existing segment {i + 1}",
|
||||
word_count=len(f"Existing segment {i + 1}"),
|
||||
tokens=10,
|
||||
created_by=account.id,
|
||||
status="completed",
|
||||
index_node_id=str(uuid.uuid4()),
|
||||
index_node_hash=f"hash_{i}",
|
||||
)
|
||||
existing_segments.append(segment)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
for segment in existing_segments:
|
||||
db.session.add(segment)
|
||||
db.session.commit()
|
||||
|
||||
# Create CSV content
|
||||
csv_content = self._create_test_csv_content("text_model")
|
||||
|
||||
# Mock storage to return our CSV content
|
||||
mock_storage = mock_external_service_dependencies["storage"]
|
||||
|
||||
def mock_download(key, file_path):
|
||||
Path(file_path).write_text(csv_content, encoding="utf-8")
|
||||
|
||||
mock_storage.download.side_effect = mock_download
|
||||
|
||||
# Execute the task
|
||||
job_id = str(uuid.uuid4())
|
||||
batch_create_segment_to_index_task(
|
||||
job_id=job_id,
|
||||
upload_file_id=upload_file.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
tenant_id=tenant.id,
|
||||
user_id=account.id,
|
||||
)
|
||||
|
||||
# Verify results
|
||||
# Check that new segments were created with correct positions
|
||||
all_segments = (
|
||||
db.session.query(DocumentSegment)
|
||||
.filter_by(document_id=document.id)
|
||||
.order_by(DocumentSegment.position)
|
||||
.all()
|
||||
)
|
||||
assert len(all_segments) == 6 # 3 existing + 3 new
|
||||
|
||||
# Verify position ordering
|
||||
for i, segment in enumerate(all_segments):
|
||||
assert segment.position == i + 1
|
||||
|
||||
# Verify new segments have correct positions (4, 5, 6)
|
||||
new_segments = all_segments[3:]
|
||||
for i, segment in enumerate(new_segments):
|
||||
expected_position = 4 + i # Should start at position 4
|
||||
assert segment.position == expected_position
|
||||
assert segment.status == "completed"
|
||||
assert segment.indexing_at is not None
|
||||
assert segment.completed_at is not None
|
||||
|
||||
# Check that document word count was updated
|
||||
db.session.refresh(document)
|
||||
assert document.word_count > 0
|
||||
|
||||
# Verify vector service was called
|
||||
mock_vector_service = mock_external_service_dependencies["vector_service"]
|
||||
mock_vector_service.create_segments_vector.assert_called_once()
|
||||
|
||||
# Check Redis cache was set
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"segment_batch_import_{job_id}"
|
||||
cache_value = redis_client.get(cache_key)
|
||||
assert cache_value == b"completed"
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,578 @@
|
||||
"""
|
||||
TestContainers-based integration tests for delete_segment_from_index_task.
|
||||
|
||||
This module provides comprehensive integration testing for the delete_segment_from_index_task
|
||||
using TestContainers to ensure realistic database interactions and proper isolation.
|
||||
The task is responsible for removing document segments from the vector index when segments
|
||||
are deleted from the dataset.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from models import Account, Dataset, Document, DocumentSegment, Tenant
|
||||
from tasks.delete_segment_from_index_task import delete_segment_from_index_task
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TestDeleteSegmentFromIndexTask:
|
||||
"""
|
||||
Comprehensive integration tests for delete_segment_from_index_task using testcontainers.
|
||||
|
||||
This test class covers all major functionality of the delete_segment_from_index_task:
|
||||
- Successful segment deletion from index
|
||||
- Dataset not found scenarios
|
||||
- Document not found scenarios
|
||||
- Document status validation (disabled, archived, not completed)
|
||||
- Index processor integration and cleanup
|
||||
- Exception handling and error scenarios
|
||||
- Performance and timing verification
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing environment with actual database interactions.
|
||||
"""
|
||||
|
||||
def _create_test_tenant(self, db_session_with_containers, fake=None):
|
||||
"""
|
||||
Helper method to create a test tenant with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
Tenant: Created test tenant instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
tenant = Tenant(name=f"Test Tenant {fake.company()}", plan="basic", status="active")
|
||||
tenant.id = fake.uuid4()
|
||||
tenant.created_at = fake.date_time_this_year()
|
||||
tenant.updated_at = tenant.created_at
|
||||
|
||||
db_session_with_containers.add(tenant)
|
||||
db_session_with_containers.commit()
|
||||
return tenant
|
||||
|
||||
def _create_test_account(self, db_session_with_containers, tenant, fake=None):
|
||||
"""
|
||||
Helper method to create a test account with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
tenant: Tenant instance for the account
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
Account: Created test account instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
account = Account(
|
||||
name=fake.name(),
|
||||
email=fake.email(),
|
||||
avatar=fake.url(),
|
||||
status="active",
|
||||
interface_language="en-US",
|
||||
)
|
||||
account.id = fake.uuid4()
|
||||
account.created_at = fake.date_time_this_year()
|
||||
account.updated_at = account.created_at
|
||||
|
||||
db_session_with_containers.add(account)
|
||||
db_session_with_containers.commit()
|
||||
return account
|
||||
|
||||
def _create_test_dataset(self, db_session_with_containers, tenant, account, fake=None):
|
||||
"""
|
||||
Helper method to create a test dataset with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
tenant: Tenant instance for the dataset
|
||||
account: Account instance for the dataset
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
Dataset: Created test dataset instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
dataset = Dataset()
|
||||
dataset.id = fake.uuid4()
|
||||
dataset.tenant_id = tenant.id
|
||||
dataset.name = f"Test Dataset {fake.word()}"
|
||||
dataset.description = fake.text(max_nb_chars=200)
|
||||
dataset.provider = "vendor"
|
||||
dataset.permission = "only_me"
|
||||
dataset.data_source_type = "upload_file"
|
||||
dataset.indexing_technique = "high_quality"
|
||||
dataset.index_struct = '{"type": "paragraph"}'
|
||||
dataset.created_by = account.id
|
||||
dataset.created_at = fake.date_time_this_year()
|
||||
dataset.updated_by = account.id
|
||||
dataset.updated_at = dataset.created_at
|
||||
dataset.embedding_model = "text-embedding-ada-002"
|
||||
dataset.embedding_model_provider = "openai"
|
||||
dataset.built_in_field_enabled = False
|
||||
|
||||
db_session_with_containers.add(dataset)
|
||||
db_session_with_containers.commit()
|
||||
return dataset
|
||||
|
||||
def _create_test_document(self, db_session_with_containers, dataset, account, fake=None, **kwargs):
|
||||
"""
|
||||
Helper method to create a test document with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
dataset: Dataset instance for the document
|
||||
account: Account instance for the document
|
||||
fake: Faker instance for generating test data
|
||||
**kwargs: Additional document attributes to override defaults
|
||||
|
||||
Returns:
|
||||
Document: Created test document instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
document = Document()
|
||||
document.id = fake.uuid4()
|
||||
document.tenant_id = dataset.tenant_id
|
||||
document.dataset_id = dataset.id
|
||||
document.position = kwargs.get("position", 1)
|
||||
document.data_source_type = kwargs.get("data_source_type", "upload_file")
|
||||
document.data_source_info = kwargs.get("data_source_info", "{}")
|
||||
document.batch = kwargs.get("batch", fake.uuid4())
|
||||
document.name = kwargs.get("name", f"Test Document {fake.word()}")
|
||||
document.created_from = kwargs.get("created_from", "api")
|
||||
document.created_by = account.id
|
||||
document.created_at = fake.date_time_this_year()
|
||||
document.processing_started_at = kwargs.get("processing_started_at", fake.date_time_this_year())
|
||||
document.file_id = kwargs.get("file_id", fake.uuid4())
|
||||
document.word_count = kwargs.get("word_count", fake.random_int(min=100, max=1000))
|
||||
document.parsing_completed_at = kwargs.get("parsing_completed_at", fake.date_time_this_year())
|
||||
document.cleaning_completed_at = kwargs.get("cleaning_completed_at", fake.date_time_this_year())
|
||||
document.splitting_completed_at = kwargs.get("splitting_completed_at", fake.date_time_this_year())
|
||||
document.tokens = kwargs.get("tokens", fake.random_int(min=50, max=500))
|
||||
document.indexing_latency = kwargs.get("indexing_latency", fake.random_number(digits=3))
|
||||
document.completed_at = kwargs.get("completed_at", fake.date_time_this_year())
|
||||
document.is_paused = kwargs.get("is_paused", False)
|
||||
document.indexing_status = kwargs.get("indexing_status", "completed")
|
||||
document.enabled = kwargs.get("enabled", True)
|
||||
document.archived = kwargs.get("archived", False)
|
||||
document.updated_at = fake.date_time_this_year()
|
||||
document.doc_type = kwargs.get("doc_type", "text")
|
||||
document.doc_metadata = kwargs.get("doc_metadata", {})
|
||||
document.doc_form = kwargs.get("doc_form", IndexType.PARAGRAPH_INDEX)
|
||||
document.doc_language = kwargs.get("doc_language", "en")
|
||||
|
||||
db_session_with_containers.add(document)
|
||||
db_session_with_containers.commit()
|
||||
return document
|
||||
|
||||
def _create_test_document_segments(self, db_session_with_containers, document, account, count=3, fake=None):
|
||||
"""
|
||||
Helper method to create test document segments with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
document: Document instance for the segments
|
||||
account: Account instance for the segments
|
||||
count: Number of segments to create
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
list[DocumentSegment]: List of created test document segment instances
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
segments = []
|
||||
|
||||
for i in range(count):
|
||||
segment = DocumentSegment()
|
||||
segment.id = fake.uuid4()
|
||||
segment.tenant_id = document.tenant_id
|
||||
segment.dataset_id = document.dataset_id
|
||||
segment.document_id = document.id
|
||||
segment.position = i + 1
|
||||
segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
|
||||
segment.answer = f"Test segment answer {i + 1}: {fake.text(max_nb_chars=100)}"
|
||||
segment.word_count = fake.random_int(min=10, max=100)
|
||||
segment.tokens = fake.random_int(min=5, max=50)
|
||||
segment.keywords = [fake.word() for _ in range(3)]
|
||||
segment.index_node_id = f"node_{fake.uuid4()}"
|
||||
segment.index_node_hash = fake.sha256()
|
||||
segment.hit_count = 0
|
||||
segment.enabled = True
|
||||
segment.status = "completed"
|
||||
segment.created_by = account.id
|
||||
segment.created_at = fake.date_time_this_year()
|
||||
segment.updated_by = account.id
|
||||
segment.updated_at = segment.created_at
|
||||
|
||||
db_session_with_containers.add(segment)
|
||||
segments.append(segment)
|
||||
|
||||
db_session_with_containers.commit()
|
||||
return segments
|
||||
|
||||
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
|
||||
def test_delete_segment_from_index_task_success(self, mock_index_processor_factory, db_session_with_containers):
|
||||
"""
|
||||
Test successful segment deletion from index with comprehensive verification.
|
||||
|
||||
This test verifies:
|
||||
- Proper task execution with valid dataset and document
|
||||
- Index processor factory initialization with correct document form
|
||||
- Index processor clean method called with correct parameters
|
||||
- Database session properly closed after execution
|
||||
- Task completes without exceptions
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
|
||||
|
||||
# Extract index node IDs for the task
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Mock the index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Execute the task
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed successfully
|
||||
assert result is None # Task should return None on success
|
||||
|
||||
# Verify index processor factory was called with correct document form
|
||||
mock_index_processor_factory.assert_called_once_with(document.doc_form)
|
||||
|
||||
# Verify index processor clean method was called with correct parameters
|
||||
# Note: We can't directly compare Dataset objects as they are different instances
|
||||
# from database queries, so we verify the call was made and check the parameters
|
||||
assert mock_processor.clean.call_count == 1
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
|
||||
assert call_args[0][1] == index_node_ids # Verify index node IDs match
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is True
|
||||
|
||||
def test_delete_segment_from_index_task_dataset_not_found(self, db_session_with_containers):
|
||||
"""
|
||||
Test task behavior when dataset is not found.
|
||||
|
||||
This test verifies:
|
||||
- Task handles missing dataset gracefully
|
||||
- No index processor operations are attempted
|
||||
- Task returns early without exceptions
|
||||
- Database session is properly closed
|
||||
"""
|
||||
fake = Faker()
|
||||
non_existent_dataset_id = fake.uuid4()
|
||||
non_existent_document_id = fake.uuid4()
|
||||
index_node_ids = [f"node_{fake.uuid4()}" for _ in range(3)]
|
||||
|
||||
# Execute the task with non-existent dataset
|
||||
result = delete_segment_from_index_task(index_node_ids, non_existent_dataset_id, non_existent_document_id)
|
||||
|
||||
# Verify the task completed without exceptions
|
||||
assert result is None # Task should return None when dataset not found
|
||||
|
||||
def test_delete_segment_from_index_task_document_not_found(self, db_session_with_containers):
|
||||
"""
|
||||
Test task behavior when document is not found.
|
||||
|
||||
This test verifies:
|
||||
- Task handles missing document gracefully
|
||||
- No index processor operations are attempted
|
||||
- Task returns early without exceptions
|
||||
- Database session is properly closed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
|
||||
non_existent_document_id = fake.uuid4()
|
||||
index_node_ids = [f"node_{fake.uuid4()}" for _ in range(3)]
|
||||
|
||||
# Execute the task with non-existent document
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, non_existent_document_id)
|
||||
|
||||
# Verify the task completed without exceptions
|
||||
assert result is None # Task should return None when document not found
|
||||
|
||||
def test_delete_segment_from_index_task_document_disabled(self, db_session_with_containers):
|
||||
"""
|
||||
Test task behavior when document is disabled.
|
||||
|
||||
This test verifies:
|
||||
- Task handles disabled document gracefully
|
||||
- No index processor operations are attempted
|
||||
- Task returns early without exceptions
|
||||
- Database session is properly closed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data with disabled document
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake, enabled=False)
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Execute the task with disabled document
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed without exceptions
|
||||
assert result is None # Task should return None when document is disabled
|
||||
|
||||
def test_delete_segment_from_index_task_document_archived(self, db_session_with_containers):
|
||||
"""
|
||||
Test task behavior when document is archived.
|
||||
|
||||
This test verifies:
|
||||
- Task handles archived document gracefully
|
||||
- No index processor operations are attempted
|
||||
- Task returns early without exceptions
|
||||
- Database session is properly closed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data with archived document
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake, archived=True)
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Execute the task with archived document
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed without exceptions
|
||||
assert result is None # Task should return None when document is archived
|
||||
|
||||
def test_delete_segment_from_index_task_document_not_completed(self, db_session_with_containers):
|
||||
"""
|
||||
Test task behavior when document indexing is not completed.
|
||||
|
||||
This test verifies:
|
||||
- Task handles incomplete indexing status gracefully
|
||||
- No index processor operations are attempted
|
||||
- Task returns early without exceptions
|
||||
- Database session is properly closed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data with incomplete indexing
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(
|
||||
db_session_with_containers, dataset, account, fake, indexing_status="indexing"
|
||||
)
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Execute the task with incomplete indexing
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed without exceptions
|
||||
assert result is None # Task should return None when indexing is not completed
|
||||
|
||||
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
|
||||
def test_delete_segment_from_index_task_index_processor_clean(
|
||||
self, mock_index_processor_factory, db_session_with_containers
|
||||
):
|
||||
"""
|
||||
Test index processor clean method integration with different document forms.
|
||||
|
||||
This test verifies:
|
||||
- Index processor factory creates correct processor for different document forms
|
||||
- Clean method is called with proper parameters for each document form
|
||||
- Task handles different index types correctly
|
||||
- Database session is properly managed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Test different document forms
|
||||
document_forms = [IndexType.PARAGRAPH_INDEX, IndexType.QA_INDEX, IndexType.PARENT_CHILD_INDEX]
|
||||
|
||||
for doc_form in document_forms:
|
||||
# Create test data for each document form
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake, doc_form=doc_form)
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 2, fake)
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Mock the index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Execute the task
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed successfully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor factory was called with correct document form
|
||||
mock_index_processor_factory.assert_called_with(doc_form)
|
||||
|
||||
# Verify index processor clean method was called with correct parameters
|
||||
assert mock_processor.clean.call_count == 1
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
|
||||
assert call_args[0][1] == index_node_ids # Verify index node IDs match
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is True
|
||||
|
||||
# Reset mocks for next iteration
|
||||
mock_index_processor_factory.reset_mock()
|
||||
mock_processor.reset_mock()
|
||||
|
||||
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
|
||||
def test_delete_segment_from_index_task_exception_handling(
|
||||
self, mock_index_processor_factory, db_session_with_containers
|
||||
):
|
||||
"""
|
||||
Test exception handling in the task.
|
||||
|
||||
This test verifies:
|
||||
- Task handles index processor exceptions gracefully
|
||||
- Database session is properly closed even when exceptions occur
|
||||
- Task logs exceptions appropriately
|
||||
- No unhandled exceptions are raised
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 3, fake)
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Mock the index processor to raise an exception
|
||||
mock_processor = MagicMock()
|
||||
mock_processor.clean.side_effect = Exception("Index processor error")
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Execute the task - should not raise exception
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed without raising exceptions
|
||||
assert result is None # Task should return None even when exceptions occur
|
||||
|
||||
# Verify index processor clean method was called
|
||||
assert mock_processor.clean.call_count == 1
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
|
||||
assert call_args[0][1] == index_node_ids # Verify index node IDs match
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is True
|
||||
|
||||
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
|
||||
def test_delete_segment_from_index_task_empty_index_node_ids(
|
||||
self, mock_index_processor_factory, db_session_with_containers
|
||||
):
|
||||
"""
|
||||
Test task behavior with empty index node IDs list.
|
||||
|
||||
This test verifies:
|
||||
- Task handles empty index node IDs gracefully
|
||||
- Index processor clean method is called with empty list
|
||||
- Task completes successfully
|
||||
- Database session is properly managed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
|
||||
# Use empty index node IDs
|
||||
index_node_ids = []
|
||||
|
||||
# Mock the index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Execute the task
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed successfully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor clean method was called with empty list
|
||||
assert mock_processor.clean.call_count == 1
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
|
||||
assert call_args[0][1] == index_node_ids # Verify index node IDs match (empty list)
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is True
|
||||
|
||||
@patch("tasks.delete_segment_from_index_task.IndexProcessorFactory")
|
||||
def test_delete_segment_from_index_task_large_index_node_ids(
|
||||
self, mock_index_processor_factory, db_session_with_containers
|
||||
):
|
||||
"""
|
||||
Test task behavior with large number of index node IDs.
|
||||
|
||||
This test verifies:
|
||||
- Task handles large lists of index node IDs efficiently
|
||||
- Index processor clean method is called with all node IDs
|
||||
- Task completes successfully with large datasets
|
||||
- Database session is properly managed
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
tenant = self._create_test_tenant(db_session_with_containers, fake)
|
||||
account = self._create_test_account(db_session_with_containers, tenant, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, tenant, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
|
||||
# Create large number of segments
|
||||
segments = self._create_test_document_segments(db_session_with_containers, document, account, 50, fake)
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
|
||||
# Mock the index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Execute the task
|
||||
result = delete_segment_from_index_task(index_node_ids, dataset.id, document.id)
|
||||
|
||||
# Verify the task completed successfully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor clean method was called with all node IDs
|
||||
assert mock_processor.clean.call_count == 1
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Verify dataset ID matches
|
||||
assert call_args[0][1] == index_node_ids # Verify index node IDs match
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is True
|
||||
|
||||
# Verify all node IDs were passed
|
||||
assert len(call_args[0][1]) == 50
|
||||
@@ -0,0 +1,615 @@
|
||||
"""
|
||||
Integration tests for disable_segment_from_index_task using TestContainers.
|
||||
|
||||
This module provides comprehensive integration tests for the disable_segment_from_index_task
|
||||
using real database and Redis containers to ensure the task works correctly with actual
|
||||
data and external dependencies.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from tasks.disable_segment_from_index_task import disable_segment_from_index_task
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TestDisableSegmentFromIndexTask:
|
||||
"""Integration tests for disable_segment_from_index_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_index_processor(self):
|
||||
"""Mock IndexProcessorFactory and its clean method."""
|
||||
with patch("tasks.disable_segment_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = mock_factory.return_value.init_index_processor.return_value
|
||||
mock_processor.clean.return_value = None
|
||||
yield mock_processor
|
||||
|
||||
def _create_test_account_and_tenant(self, db_session_with_containers) -> tuple[Account, Tenant]:
|
||||
"""
|
||||
Helper method to create a test account and tenant for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
tuple: (account, tenant) - Created account and tenant instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
plan="basic",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join with owner role
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Set current tenant for account
|
||||
account.current_tenant = tenant
|
||||
|
||||
return account, tenant
|
||||
|
||||
def _create_test_dataset(self, tenant: Tenant, account: Account) -> Dataset:
|
||||
"""
|
||||
Helper method to create a test dataset.
|
||||
|
||||
Args:
|
||||
tenant: Tenant instance
|
||||
account: Account instance
|
||||
|
||||
Returns:
|
||||
Dataset: Created dataset instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant.id,
|
||||
name=fake.sentence(nb_words=3),
|
||||
description=fake.text(max_nb_chars=200),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
return dataset
|
||||
|
||||
def _create_test_document(
|
||||
self, dataset: Dataset, tenant: Tenant, account: Account, doc_form: str = "text_model"
|
||||
) -> Document:
|
||||
"""
|
||||
Helper method to create a test document.
|
||||
|
||||
Args:
|
||||
dataset: Dataset instance
|
||||
tenant: Tenant instance
|
||||
account: Account instance
|
||||
doc_form: Document form type
|
||||
|
||||
Returns:
|
||||
Document: Created document instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
document = Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
batch=fake.uuid4(),
|
||||
name=fake.file_name(),
|
||||
created_from="api",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=True,
|
||||
archived=False,
|
||||
doc_form=doc_form,
|
||||
word_count=1000,
|
||||
tokens=500,
|
||||
completed_at=datetime.now(UTC),
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
return document
|
||||
|
||||
def _create_test_segment(
|
||||
self,
|
||||
document: Document,
|
||||
dataset: Dataset,
|
||||
tenant: Tenant,
|
||||
account: Account,
|
||||
status: str = "completed",
|
||||
enabled: bool = True,
|
||||
) -> DocumentSegment:
|
||||
"""
|
||||
Helper method to create a test document segment.
|
||||
|
||||
Args:
|
||||
document: Document instance
|
||||
dataset: Dataset instance
|
||||
tenant: Tenant instance
|
||||
account: Account instance
|
||||
status: Segment status
|
||||
enabled: Whether segment is enabled
|
||||
|
||||
Returns:
|
||||
DocumentSegment: Created segment instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
segment = DocumentSegment(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=1,
|
||||
content=fake.text(max_nb_chars=500),
|
||||
word_count=100,
|
||||
tokens=50,
|
||||
index_node_id=fake.uuid4(),
|
||||
index_node_hash=fake.sha256(),
|
||||
status=status,
|
||||
enabled=enabled,
|
||||
created_by=account.id,
|
||||
completed_at=datetime.now(UTC) if status == "completed" else None,
|
||||
)
|
||||
db.session.add(segment)
|
||||
db.session.commit()
|
||||
|
||||
return segment
|
||||
|
||||
def test_disable_segment_success(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test successful segment disabling from index.
|
||||
|
||||
This test verifies:
|
||||
- Segment is found and validated
|
||||
- Index processor clean method is called with correct parameters
|
||||
- Redis cache is cleared
|
||||
- Task completes successfully
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Set up Redis cache
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task completed successfully
|
||||
assert result is None # Task returns None on success
|
||||
|
||||
# Verify index processor was called correctly
|
||||
mock_index_processor.clean.assert_called_once()
|
||||
call_args = mock_index_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Check dataset ID
|
||||
assert call_args[0][1] == [segment.index_node_id] # Check index node IDs
|
||||
|
||||
# Verify Redis cache was cleared
|
||||
assert redis_client.get(indexing_cache_key) is None
|
||||
|
||||
# Verify segment is still in database
|
||||
db.session.refresh(segment)
|
||||
assert segment.id is not None
|
||||
|
||||
def test_disable_segment_not_found(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when segment is not found.
|
||||
|
||||
This test verifies:
|
||||
- Task handles non-existent segment gracefully
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Use a non-existent segment ID
|
||||
fake = Faker()
|
||||
non_existent_segment_id = fake.uuid4()
|
||||
|
||||
# Act: Execute the task with non-existent segment
|
||||
result = disable_segment_from_index_task(non_existent_segment_id)
|
||||
|
||||
# Assert: Verify the task handled the error gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_not_completed(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when segment is not in completed status.
|
||||
|
||||
This test verifies:
|
||||
- Task rejects segments that are not completed
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Create test data with non-completed segment
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account, status="indexing", enabled=True)
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the invalid status gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_no_dataset(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when segment has no associated dataset.
|
||||
|
||||
This test verifies:
|
||||
- Task handles segments without dataset gracefully
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Manually remove dataset association
|
||||
segment.dataset_id = "00000000-0000-0000-0000-000000000000"
|
||||
db.session.commit()
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the missing dataset gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_no_document(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when segment has no associated document.
|
||||
|
||||
This test verifies:
|
||||
- Task handles segments without document gracefully
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Manually remove document association
|
||||
segment.document_id = "00000000-0000-0000-0000-000000000000"
|
||||
db.session.commit()
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the missing document gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_document_disabled(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when document is disabled.
|
||||
|
||||
This test verifies:
|
||||
- Task handles disabled documents gracefully
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Create test data with disabled document
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
document.enabled = False
|
||||
db.session.commit()
|
||||
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the disabled document gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_document_archived(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when document is archived.
|
||||
|
||||
This test verifies:
|
||||
- Task handles archived documents gracefully
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Create test data with archived document
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
document.archived = True
|
||||
db.session.commit()
|
||||
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the archived document gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_document_indexing_not_completed(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when document indexing is not completed.
|
||||
|
||||
This test verifies:
|
||||
- Task handles documents with incomplete indexing gracefully
|
||||
- No index processor operations are performed
|
||||
- Task returns early without errors
|
||||
"""
|
||||
# Arrange: Create test data with incomplete indexing
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
document.indexing_status = "indexing"
|
||||
db.session.commit()
|
||||
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the incomplete indexing gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was not called
|
||||
mock_index_processor.clean.assert_not_called()
|
||||
|
||||
def test_disable_segment_index_processor_exception(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test handling when index processor raises an exception.
|
||||
|
||||
This test verifies:
|
||||
- Task handles index processor exceptions gracefully
|
||||
- Segment is re-enabled on failure
|
||||
- Redis cache is still cleared
|
||||
- Database changes are committed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Set up Redis cache
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
# Configure mock to raise exception
|
||||
mock_index_processor.clean.side_effect = Exception("Index processor error")
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task handled the exception gracefully
|
||||
assert result is None
|
||||
|
||||
# Verify index processor was called
|
||||
mock_index_processor.clean.assert_called_once()
|
||||
call_args = mock_index_processor.clean.call_args
|
||||
# Check that the call was made with the correct parameters
|
||||
assert len(call_args[0]) == 2 # Check two arguments were passed
|
||||
assert call_args[0][1] == [segment.index_node_id] # Check index node IDs
|
||||
|
||||
# Verify segment was re-enabled
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is True
|
||||
|
||||
# Verify Redis cache was still cleared
|
||||
assert redis_client.get(indexing_cache_key) is None
|
||||
|
||||
def test_disable_segment_different_doc_forms(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test disabling segments with different document forms.
|
||||
|
||||
This test verifies:
|
||||
- Task works with different document form types
|
||||
- Correct index processor is initialized for each form
|
||||
- Index processor clean method is called correctly
|
||||
"""
|
||||
# Test different document forms
|
||||
doc_forms = ["text_model", "qa_model", "table_model"]
|
||||
|
||||
for doc_form in doc_forms:
|
||||
# Arrange: Create test data for each form
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account, doc_form=doc_form)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Reset mock for each iteration
|
||||
mock_index_processor.reset_mock()
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify the task completed successfully
|
||||
assert result is None
|
||||
|
||||
# Verify correct index processor was initialized
|
||||
mock_index_processor.clean.assert_called_once()
|
||||
call_args = mock_index_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # Check dataset ID
|
||||
assert call_args[0][1] == [segment.index_node_id] # Check index node IDs
|
||||
|
||||
def test_disable_segment_redis_cache_handling(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test Redis cache handling during segment disabling.
|
||||
|
||||
This test verifies:
|
||||
- Redis cache is properly set before task execution
|
||||
- Cache is cleared after task completion
|
||||
- Cache handling works with different scenarios
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Test with cache present
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
assert redis_client.get(indexing_cache_key) is not None
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify cache was cleared
|
||||
assert result is None
|
||||
assert redis_client.get(indexing_cache_key) is None
|
||||
|
||||
# Test with no cache present
|
||||
segment2 = self._create_test_segment(document, dataset, tenant, account)
|
||||
result2 = disable_segment_from_index_task(segment2.id)
|
||||
|
||||
# Assert: Verify task still works without cache
|
||||
assert result2 is None
|
||||
|
||||
def test_disable_segment_performance_timing(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test performance timing of segment disabling task.
|
||||
|
||||
This test verifies:
|
||||
- Task execution time is reasonable
|
||||
- Performance logging works correctly
|
||||
- Task completes within expected time bounds
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Act: Execute the task and measure time
|
||||
start_time = time.perf_counter()
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
end_time = time.perf_counter()
|
||||
|
||||
# Assert: Verify task completed successfully and timing is reasonable
|
||||
assert result is None
|
||||
execution_time = end_time - start_time
|
||||
assert execution_time < 5.0 # Should complete within 5 seconds
|
||||
|
||||
def test_disable_segment_database_session_management(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test database session management during task execution.
|
||||
|
||||
This test verifies:
|
||||
- Database sessions are properly managed
|
||||
- Sessions are closed after task completion
|
||||
- No session leaks occur
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
|
||||
# Act: Execute the task
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
|
||||
# Assert: Verify task completed and session management worked
|
||||
assert result is None
|
||||
|
||||
# Verify segment is still accessible (session was properly managed)
|
||||
db.session.refresh(segment)
|
||||
assert segment.id is not None
|
||||
|
||||
def test_disable_segment_concurrent_execution(self, db_session_with_containers, mock_index_processor):
|
||||
"""
|
||||
Test concurrent execution of segment disabling tasks.
|
||||
|
||||
This test verifies:
|
||||
- Multiple tasks can run concurrently
|
||||
- Each task processes its own segment correctly
|
||||
- No interference between concurrent tasks
|
||||
"""
|
||||
# Arrange: Create multiple test segments
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
dataset = self._create_test_dataset(tenant, account)
|
||||
document = self._create_test_document(dataset, tenant, account)
|
||||
|
||||
segments = []
|
||||
for i in range(3):
|
||||
segment = self._create_test_segment(document, dataset, tenant, account)
|
||||
segments.append(segment)
|
||||
|
||||
# Act: Execute tasks concurrently (simulated)
|
||||
results = []
|
||||
for segment in segments:
|
||||
result = disable_segment_from_index_task(segment.id)
|
||||
results.append(result)
|
||||
|
||||
# Assert: Verify all tasks completed successfully
|
||||
assert all(result is None for result in results)
|
||||
|
||||
# Verify all segments were processed
|
||||
assert mock_index_processor.clean.call_count == len(segments)
|
||||
|
||||
# Verify each segment was processed with correct parameters
|
||||
for segment in segments:
|
||||
# Check that clean was called with this segment's dataset and index_node_id
|
||||
found = False
|
||||
for call in mock_index_processor.clean.call_args_list:
|
||||
if call[0][0].id == dataset.id and call[0][1] == [segment.index_node_id]:
|
||||
found = True
|
||||
break
|
||||
assert found, f"Segment {segment.id} was not processed correctly"
|
||||
@@ -0,0 +1,733 @@
|
||||
"""
|
||||
TestContainers-based integration tests for disable_segments_from_index_task.
|
||||
|
||||
This module provides comprehensive integration testing for the disable_segments_from_index_task
|
||||
using TestContainers to ensure realistic database interactions and proper isolation.
|
||||
The task is responsible for removing document segments from the search index when they are disabled.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from models import Account, Dataset, DocumentSegment
|
||||
from models import Document as DatasetDocument
|
||||
from models.dataset import DatasetProcessRule
|
||||
from tasks.disable_segments_from_index_task import disable_segments_from_index_task
|
||||
|
||||
|
||||
class TestDisableSegmentsFromIndexTask:
|
||||
"""
|
||||
Comprehensive integration tests for disable_segments_from_index_task using testcontainers.
|
||||
|
||||
This test class covers all major functionality of the disable_segments_from_index_task:
|
||||
- Successful segment disabling with proper index cleanup
|
||||
- Error handling for various edge cases
|
||||
- Database state validation after task execution
|
||||
- Redis cache cleanup verification
|
||||
- Index processor integration testing
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing environment with actual database interactions.
|
||||
"""
|
||||
|
||||
def _create_test_account(self, db_session_with_containers, fake=None):
|
||||
"""
|
||||
Helper method to create a test account with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
Account: Created test account instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
avatar=fake.url(),
|
||||
status="active",
|
||||
interface_language="en-US",
|
||||
)
|
||||
account.id = fake.uuid4()
|
||||
# monkey-patch attributes for test setup
|
||||
account.tenant_id = fake.uuid4()
|
||||
account.type = "normal"
|
||||
account.role = "owner"
|
||||
account.created_at = fake.date_time_this_year()
|
||||
account.updated_at = account.created_at
|
||||
|
||||
# Create a tenant for the account
|
||||
from models.account import Tenant
|
||||
|
||||
tenant = Tenant(
|
||||
name=f"Test Tenant {fake.company()}",
|
||||
plan="basic",
|
||||
status="active",
|
||||
)
|
||||
tenant.id = account.tenant_id
|
||||
tenant.created_at = fake.date_time_this_year()
|
||||
tenant.updated_at = tenant.created_at
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(tenant)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
# Set the current tenant for the account
|
||||
account.current_tenant = tenant
|
||||
|
||||
return account
|
||||
|
||||
def _create_test_dataset(self, db_session_with_containers, account, fake=None):
|
||||
"""
|
||||
Helper method to create a test dataset with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
account: The account creating the dataset
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
Dataset: Created test dataset instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
dataset = Dataset(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=account.tenant_id,
|
||||
name=f"Test Dataset {fake.word()}",
|
||||
description=fake.text(max_nb_chars=200),
|
||||
provider="vendor",
|
||||
permission="only_me",
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
updated_by=account.id,
|
||||
embedding_model="text-embedding-ada-002",
|
||||
embedding_model_provider="openai",
|
||||
built_in_field_enabled=False,
|
||||
)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
return dataset
|
||||
|
||||
def _create_test_document(self, db_session_with_containers, dataset, account, fake=None):
|
||||
"""
|
||||
Helper method to create a test document with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
dataset: The dataset containing the document
|
||||
account: The account creating the document
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
DatasetDocument: Created test document instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
document = DatasetDocument()
|
||||
|
||||
document.id = fake.uuid4()
|
||||
document.tenant_id = dataset.tenant_id
|
||||
document.dataset_id = dataset.id
|
||||
document.position = 1
|
||||
document.data_source_type = "upload_file"
|
||||
document.data_source_info = '{"upload_file_id": "test_file_id"}'
|
||||
document.batch = fake.uuid4()
|
||||
document.name = f"Test Document {fake.word()}.txt"
|
||||
document.created_from = "upload_file"
|
||||
document.created_by = account.id
|
||||
document.created_api_request_id = fake.uuid4()
|
||||
document.processing_started_at = fake.date_time_this_year()
|
||||
document.file_id = fake.uuid4()
|
||||
document.word_count = fake.random_int(min=100, max=1000)
|
||||
document.parsing_completed_at = fake.date_time_this_year()
|
||||
document.cleaning_completed_at = fake.date_time_this_year()
|
||||
document.splitting_completed_at = fake.date_time_this_year()
|
||||
document.tokens = fake.random_int(min=50, max=500)
|
||||
document.indexing_started_at = fake.date_time_this_year()
|
||||
document.indexing_completed_at = fake.date_time_this_year()
|
||||
document.indexing_status = "completed"
|
||||
document.enabled = True
|
||||
document.archived = False
|
||||
document.doc_form = "text_model" # Use text_model form for testing
|
||||
document.doc_language = "en"
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
return document
|
||||
|
||||
def _create_test_segments(self, db_session_with_containers, document, dataset, account, count=3, fake=None):
|
||||
"""
|
||||
Helper method to create test document segments with realistic data.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
document: The document containing the segments
|
||||
dataset: The dataset containing the document
|
||||
account: The account creating the segments
|
||||
count: Number of segments to create
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
List[DocumentSegment]: Created test segment instances
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
segments = []
|
||||
|
||||
for i in range(count):
|
||||
segment = DocumentSegment()
|
||||
segment.id = fake.uuid4()
|
||||
segment.tenant_id = dataset.tenant_id
|
||||
segment.dataset_id = dataset.id
|
||||
segment.document_id = document.id
|
||||
segment.position = i + 1
|
||||
segment.content = f"Test segment content {i + 1}: {fake.text(max_nb_chars=200)}"
|
||||
segment.answer = f"Test answer {i + 1}" if i % 2 == 0 else None
|
||||
segment.word_count = fake.random_int(min=10, max=100)
|
||||
segment.tokens = fake.random_int(min=5, max=50)
|
||||
segment.keywords = [fake.word() for _ in range(3)]
|
||||
segment.index_node_id = f"node_{segment.id}"
|
||||
segment.index_node_hash = fake.sha256()
|
||||
segment.hit_count = 0
|
||||
segment.enabled = True
|
||||
segment.disabled_at = None
|
||||
segment.disabled_by = None
|
||||
segment.status = "completed"
|
||||
segment.created_by = account.id
|
||||
segment.updated_by = account.id
|
||||
segment.indexing_at = fake.date_time_this_year()
|
||||
segment.completed_at = fake.date_time_this_year()
|
||||
segment.error = None
|
||||
segment.stopped_at = None
|
||||
|
||||
segments.append(segment)
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
for segment in segments:
|
||||
db.session.add(segment)
|
||||
db.session.commit()
|
||||
|
||||
return segments
|
||||
|
||||
def _create_dataset_process_rule(self, db_session_with_containers, dataset, fake=None):
|
||||
"""
|
||||
Helper method to create a dataset process rule.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
dataset: The dataset for the process rule
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
DatasetProcessRule: Created process rule instance
|
||||
"""
|
||||
fake = fake or Faker()
|
||||
process_rule = DatasetProcessRule()
|
||||
process_rule.id = fake.uuid4()
|
||||
process_rule.tenant_id = dataset.tenant_id
|
||||
process_rule.dataset_id = dataset.id
|
||||
process_rule.mode = "automatic"
|
||||
process_rule.rules = (
|
||||
"{"
|
||||
'"mode": "automatic", '
|
||||
'"rules": {'
|
||||
'"pre_processing_rules": [], "segmentation": '
|
||||
'{"separator": "\\n\\n", "max_tokens": 1000, "chunk_overlap": 50}}'
|
||||
"}"
|
||||
)
|
||||
process_rule.created_by = dataset.created_by
|
||||
process_rule.updated_by = dataset.updated_by
|
||||
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.add(process_rule)
|
||||
db.session.commit()
|
||||
|
||||
return process_rule
|
||||
|
||||
def test_disable_segments_success(self, db_session_with_containers):
|
||||
"""
|
||||
Test successful disabling of segments from index.
|
||||
|
||||
This test verifies that the task can correctly disable segments from the index
|
||||
when all conditions are met, including proper index cleanup and database state updates.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 3, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Mock the index processor to avoid external dependencies
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
|
||||
# Verify index processor was called correctly
|
||||
mock_factory.assert_called_once_with(document.doc_form)
|
||||
mock_processor.clean.assert_called_once()
|
||||
|
||||
# Verify the call arguments (checking by attributes rather than object identity)
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # First argument should be the dataset
|
||||
assert sorted(call_args[0][1]) == sorted(
|
||||
[segment.index_node_id for segment in segments]
|
||||
) # Compare sorted lists to handle any order while preserving duplicates
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is False
|
||||
|
||||
# Verify Redis cache cleanup was called for each segment
|
||||
assert mock_redis.delete.call_count == len(segments)
|
||||
for segment in segments:
|
||||
expected_key = f"segment_{segment.id}_indexing"
|
||||
mock_redis.delete.assert_any_call(expected_key)
|
||||
|
||||
def test_disable_segments_dataset_not_found(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when dataset is not found.
|
||||
|
||||
This test ensures that the task correctly handles cases where the specified
|
||||
dataset doesn't exist, logging appropriate messages and returning early.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
non_existent_dataset_id = fake.uuid4()
|
||||
non_existent_document_id = fake.uuid4()
|
||||
segment_ids = [fake.uuid4()]
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
# Redis should not be called when dataset is not found
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
def test_disable_segments_document_not_found(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when document is not found.
|
||||
|
||||
This test ensures that the task correctly handles cases where the specified
|
||||
document doesn't exist, logging appropriate messages and returning early.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
non_existent_document_id = fake.uuid4()
|
||||
segment_ids = [fake.uuid4()]
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, non_existent_document_id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
# Redis should not be called when document is not found
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
def test_disable_segments_document_invalid_status(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when document has invalid status for disabling.
|
||||
|
||||
This test ensures that the task correctly handles cases where the document
|
||||
is not enabled, archived, or not completed, preventing invalid operations.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
|
||||
|
||||
# Test case 1: Document not enabled
|
||||
document.enabled = False
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.commit()
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
# Redis should not be called when document status is invalid
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
# Test case 2: Document archived
|
||||
document.enabled = True
|
||||
document.archived = True
|
||||
db.session.commit()
|
||||
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
# Test case 3: Document indexing not completed
|
||||
document.enabled = True
|
||||
document.archived = False
|
||||
document.indexing_status = "indexing"
|
||||
db.session.commit()
|
||||
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
def test_disable_segments_no_segments_found(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when no segments are found for the given IDs.
|
||||
|
||||
This test ensures that the task correctly handles cases where the specified
|
||||
segment IDs don't exist or don't match the dataset/document criteria.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
# Use non-existent segment IDs
|
||||
non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(non_existent_segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
# Redis should not be called when no segments are found
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
def test_disable_segments_index_processor_error(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when index processor encounters an error.
|
||||
|
||||
This test verifies that the task correctly handles index processor errors
|
||||
by rolling back segment states and ensuring proper cleanup.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Mock the index processor to raise an exception
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_processor.clean.side_effect = Exception("Index processor error")
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
|
||||
# Verify segments were rolled back to enabled state
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.refresh(segments[0])
|
||||
db.session.refresh(segments[1])
|
||||
|
||||
# Check that segments are re-enabled after error
|
||||
updated_segments = db.session.query(DocumentSegment).where(DocumentSegment.id.in_(segment_ids)).all()
|
||||
|
||||
for segment in updated_segments:
|
||||
assert segment.enabled is True
|
||||
assert segment.disabled_at is None
|
||||
assert segment.disabled_by is None
|
||||
|
||||
# Verify Redis cache cleanup was still called
|
||||
assert mock_redis.delete.call_count == len(segments)
|
||||
|
||||
def test_disable_segments_with_different_doc_forms(self, db_session_with_containers):
|
||||
"""
|
||||
Test disabling segments with different document forms.
|
||||
|
||||
This test verifies that the task correctly handles different document forms
|
||||
(paragraph, qa, parent_child) and initializes the appropriate index processor.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Test different document forms
|
||||
doc_forms = ["text_model", "qa_model", "hierarchical_model"]
|
||||
|
||||
for doc_form in doc_forms:
|
||||
# Update document form
|
||||
document.doc_form = doc_form
|
||||
from extensions.ext_database import db
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Mock the index processor factory
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
mock_factory.assert_called_with(doc_form)
|
||||
|
||||
def test_disable_segments_performance_timing(self, db_session_with_containers):
|
||||
"""
|
||||
Test that the task properly measures and logs performance timing.
|
||||
|
||||
This test verifies that the task correctly measures execution time
|
||||
and logs performance metrics for monitoring purposes.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 3, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Mock the index processor
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Mock time.perf_counter to control timing
|
||||
with patch("tasks.disable_segments_from_index_task.time.perf_counter") as mock_perf_counter:
|
||||
mock_perf_counter.side_effect = [1000.0, 1000.5] # 0.5 seconds execution time
|
||||
|
||||
# Mock logger to capture log messages
|
||||
with patch("tasks.disable_segments_from_index_task.logger") as mock_logger:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
|
||||
# Verify performance logging
|
||||
mock_logger.info.assert_called()
|
||||
log_calls = [call[0][0] for call in mock_logger.info.call_args_list]
|
||||
performance_log = next((call for call in log_calls if "latency" in call), None)
|
||||
assert performance_log is not None
|
||||
assert "0.5" in performance_log # Should log the execution time
|
||||
|
||||
def test_disable_segments_redis_cache_cleanup(self, db_session_with_containers):
|
||||
"""
|
||||
Test that Redis cache is properly cleaned up for all segments.
|
||||
|
||||
This test verifies that the task correctly removes indexing cache entries
|
||||
from Redis for all processed segments, preventing stale cache issues.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 5, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Mock the index processor
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client to track delete calls
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
|
||||
# Verify Redis delete was called for each segment
|
||||
assert mock_redis.delete.call_count == len(segments)
|
||||
|
||||
# Verify correct cache keys were used
|
||||
expected_keys = [f"segment_{segment.id}_indexing" for segment in segments]
|
||||
actual_calls = [call[0][0] for call in mock_redis.delete.call_args_list]
|
||||
|
||||
for expected_key in expected_keys:
|
||||
assert expected_key in actual_calls
|
||||
|
||||
def test_disable_segments_database_session_cleanup(self, db_session_with_containers):
|
||||
"""
|
||||
Test that database session is properly closed after task execution.
|
||||
|
||||
This test verifies that the task correctly manages database sessions
|
||||
and ensures proper cleanup to prevent connection leaks.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Mock the index processor
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Mock db.session.close to verify it's called
|
||||
with patch("tasks.disable_segments_from_index_task.db.session.close") as mock_close:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
# Verify session was closed
|
||||
mock_close.assert_called()
|
||||
|
||||
def test_disable_segments_empty_segment_ids(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when empty segment IDs list is provided.
|
||||
|
||||
This test ensures that the task correctly handles edge cases where
|
||||
an empty list of segment IDs is provided.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
empty_segment_ids = []
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
# Act
|
||||
result = disable_segments_from_index_task(empty_segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
# Redis should not be called when no segments are provided
|
||||
mock_redis.delete.assert_not_called()
|
||||
|
||||
def test_disable_segments_mixed_valid_invalid_ids(self, db_session_with_containers):
|
||||
"""
|
||||
Test handling when some segment IDs are valid and others are invalid.
|
||||
|
||||
This test verifies that the task correctly processes only the valid
|
||||
segment IDs and ignores invalid ones.
|
||||
"""
|
||||
# Arrange
|
||||
fake = Faker()
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
dataset = self._create_test_dataset(db_session_with_containers, account, fake)
|
||||
document = self._create_test_document(db_session_with_containers, dataset, account, fake)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset, account, 2, fake)
|
||||
self._create_dataset_process_rule(db_session_with_containers, dataset, fake)
|
||||
|
||||
# Mix valid and invalid segment IDs
|
||||
valid_segment_ids = [segment.id for segment in segments]
|
||||
invalid_segment_ids = [fake.uuid4() for _ in range(2)]
|
||||
mixed_segment_ids = valid_segment_ids + invalid_segment_ids
|
||||
|
||||
# Mock the index processor
|
||||
with patch("tasks.disable_segments_from_index_task.IndexProcessorFactory") as mock_factory:
|
||||
mock_processor = MagicMock()
|
||||
mock_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
# Mock Redis client
|
||||
with patch("tasks.disable_segments_from_index_task.redis_client") as mock_redis:
|
||||
mock_redis.delete.return_value = True
|
||||
|
||||
# Act
|
||||
result = disable_segments_from_index_task(mixed_segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert
|
||||
assert result is None # Task should complete without returning a value
|
||||
|
||||
# Verify index processor was called with only valid segment node IDs
|
||||
expected_node_ids = [segment.index_node_id for segment in segments]
|
||||
mock_processor.clean.assert_called_once()
|
||||
|
||||
# Verify the call arguments
|
||||
call_args = mock_processor.clean.call_args
|
||||
assert call_args[0][0].id == dataset.id # First argument should be the dataset
|
||||
assert sorted(call_args[0][1]) == sorted(
|
||||
expected_node_ids
|
||||
) # Compare sorted lists to handle any order while preserving duplicates
|
||||
assert call_args[1]["with_keywords"] is True
|
||||
assert call_args[1]["delete_child_chunks"] is False
|
||||
|
||||
# Verify Redis cleanup was called only for valid segments
|
||||
assert mock_redis.delete.call_count == len(segments)
|
||||
@@ -0,0 +1,887 @@
|
||||
from dataclasses import asdict
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from core.entities.document_task import DocumentTask
|
||||
from enums.cloud_plan import CloudPlan
|
||||
from extensions.ext_database import db
|
||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, Document
|
||||
from tasks.document_indexing_task import (
|
||||
_document_indexing, # Core function
|
||||
_document_indexing_with_tenant_queue, # Tenant queue wrapper function
|
||||
document_indexing_task, # Deprecated old interface
|
||||
normal_document_indexing_task, # New normal task
|
||||
priority_document_indexing_task, # New priority task
|
||||
)
|
||||
|
||||
|
||||
class TestDocumentIndexingTasks:
|
||||
"""Integration tests for document indexing tasks using testcontainers.
|
||||
|
||||
This test class covers:
|
||||
- Core _document_indexing function
|
||||
- Deprecated document_indexing_task function
|
||||
- New normal_document_indexing_task function
|
||||
- New priority_document_indexing_task function
|
||||
- Tenant queue wrapper _document_indexing_with_tenant_queue function
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.document_indexing_task.IndexingRunner") as mock_indexing_runner,
|
||||
patch("tasks.document_indexing_task.FeatureService") as mock_feature_service,
|
||||
):
|
||||
# Setup mock indexing runner
|
||||
mock_runner_instance = MagicMock()
|
||||
mock_indexing_runner.return_value = mock_runner_instance
|
||||
|
||||
# Setup mock feature service
|
||||
mock_features = MagicMock()
|
||||
mock_features.billing.enabled = False
|
||||
mock_feature_service.get_features.return_value = mock_features
|
||||
|
||||
yield {
|
||||
"indexing_runner": mock_indexing_runner,
|
||||
"indexing_runner_instance": mock_runner_instance,
|
||||
"feature_service": mock_feature_service,
|
||||
"features": mock_features,
|
||||
}
|
||||
|
||||
def _create_test_dataset_and_documents(
|
||||
self, db_session_with_containers, mock_external_service_dependencies, document_count=3
|
||||
):
|
||||
"""
|
||||
Helper method to create a test dataset and documents for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
document_count: Number of documents to create
|
||||
|
||||
Returns:
|
||||
tuple: (dataset, documents) - Created dataset and document instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(max_nb_chars=100),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Create documents
|
||||
documents = []
|
||||
for i in range(document_count):
|
||||
document = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=i,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="waiting",
|
||||
enabled=True,
|
||||
)
|
||||
db.session.add(document)
|
||||
documents.append(document)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure it's properly loaded
|
||||
db.session.refresh(dataset)
|
||||
|
||||
return dataset, documents
|
||||
|
||||
def _create_test_dataset_with_billing_features(
|
||||
self, db_session_with_containers, mock_external_service_dependencies, billing_enabled=True
|
||||
):
|
||||
"""
|
||||
Helper method to create a test dataset with billing features configured.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
billing_enabled: Whether billing is enabled
|
||||
|
||||
Returns:
|
||||
tuple: (dataset, documents) - Created dataset and document instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(max_nb_chars=100),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Create documents
|
||||
documents = []
|
||||
for i in range(3):
|
||||
document = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=i,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="waiting",
|
||||
enabled=True,
|
||||
)
|
||||
db.session.add(document)
|
||||
documents.append(document)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
# Configure billing features
|
||||
mock_external_service_dependencies["features"].billing.enabled = billing_enabled
|
||||
if billing_enabled:
|
||||
mock_external_service_dependencies["features"].billing.subscription.plan = CloudPlan.SANDBOX
|
||||
mock_external_service_dependencies["features"].vector_space.limit = 100
|
||||
mock_external_service_dependencies["features"].vector_space.size = 50
|
||||
|
||||
# Refresh dataset to ensure it's properly loaded
|
||||
db.session.refresh(dataset)
|
||||
|
||||
return dataset, documents
|
||||
|
||||
def test_document_indexing_task_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful document indexing with multiple documents.
|
||||
|
||||
This test verifies:
|
||||
- Proper dataset retrieval from database
|
||||
- Correct document processing and status updates
|
||||
- IndexingRunner integration
|
||||
- Database state updates
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=3
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
|
||||
# Act: Execute the task
|
||||
_document_indexing(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
# Verify indexing runner was called correctly
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify documents were updated to parsing status
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
# Verify the run method was called with correct documents
|
||||
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
|
||||
assert call_args is not None
|
||||
processed_documents = call_args[0][0] # First argument should be documents list
|
||||
assert len(processed_documents) == 3
|
||||
|
||||
def test_document_indexing_task_dataset_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of non-existent dataset.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing datasets
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- No unnecessary indexing runner calls
|
||||
"""
|
||||
# Arrange: Use non-existent dataset ID
|
||||
fake = Faker()
|
||||
non_existent_dataset_id = fake.uuid4()
|
||||
document_ids = [fake.uuid4() for _ in range(3)]
|
||||
|
||||
# Act: Execute the task with non-existent dataset
|
||||
_document_indexing(non_existent_dataset_id, document_ids)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["indexing_runner"].assert_not_called()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_not_called()
|
||||
|
||||
def test_document_indexing_task_document_not_found_in_dataset(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling when some documents don't exist in the dataset.
|
||||
|
||||
This test verifies:
|
||||
- Only existing documents are processed
|
||||
- Non-existent documents are ignored
|
||||
- Indexing runner receives only valid documents
|
||||
- Database state updates correctly
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=2
|
||||
)
|
||||
|
||||
# Mix existing and non-existent document IDs
|
||||
fake = Faker()
|
||||
existing_document_ids = [doc.id for doc in documents]
|
||||
non_existent_document_ids = [fake.uuid4() for _ in range(2)]
|
||||
all_document_ids = existing_document_ids + non_existent_document_ids
|
||||
|
||||
# Act: Execute the task with mixed document IDs
|
||||
_document_indexing(dataset.id, all_document_ids)
|
||||
|
||||
# Assert: Verify only existing documents were processed
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify only existing documents were updated
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in existing_document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
# Verify the run method was called with only existing documents
|
||||
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
|
||||
assert call_args is not None
|
||||
processed_documents = call_args[0][0] # First argument should be documents list
|
||||
assert len(processed_documents) == 2 # Only existing documents
|
||||
|
||||
def test_document_indexing_task_indexing_runner_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of IndexingRunner exceptions.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions from IndexingRunner are properly caught
|
||||
- Task completes without raising exceptions
|
||||
- Database session is properly closed
|
||||
- Error logging occurs
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=2
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
|
||||
# Mock IndexingRunner to raise an exception
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = Exception(
|
||||
"Indexing runner failed"
|
||||
)
|
||||
|
||||
# Act: Execute the task
|
||||
_document_indexing(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify exception was handled gracefully
|
||||
# The task should complete without raising exceptions
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify documents were still updated to parsing status before the exception
|
||||
# Re-query documents from database since _document_indexing close the session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
def test_document_indexing_task_mixed_document_states(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test processing documents with mixed initial states.
|
||||
|
||||
This test verifies:
|
||||
- Documents with different initial states are handled correctly
|
||||
- Only valid documents are processed
|
||||
- Database state updates are consistent
|
||||
- IndexingRunner receives correct documents
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, base_documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=2
|
||||
)
|
||||
|
||||
# Create additional documents with different states
|
||||
fake = Faker()
|
||||
extra_documents = []
|
||||
|
||||
# Document with different indexing status
|
||||
doc1 = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
position=2,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=dataset.created_by,
|
||||
indexing_status="completed", # Already completed
|
||||
enabled=True,
|
||||
)
|
||||
db.session.add(doc1)
|
||||
extra_documents.append(doc1)
|
||||
|
||||
# Document with disabled status
|
||||
doc2 = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
position=3,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=dataset.created_by,
|
||||
indexing_status="waiting",
|
||||
enabled=False, # Disabled
|
||||
)
|
||||
db.session.add(doc2)
|
||||
extra_documents.append(doc2)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
all_documents = base_documents + extra_documents
|
||||
document_ids = [doc.id for doc in all_documents]
|
||||
|
||||
# Act: Execute the task with mixed document states
|
||||
_document_indexing(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify processing
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify all documents were updated to parsing status
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
# Verify the run method was called with all documents
|
||||
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
|
||||
assert call_args is not None
|
||||
processed_documents = call_args[0][0] # First argument should be documents list
|
||||
assert len(processed_documents) == 4
|
||||
|
||||
def test_document_indexing_task_billing_sandbox_plan_batch_limit(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test billing validation for sandbox plan batch upload limit.
|
||||
|
||||
This test verifies:
|
||||
- Sandbox plan batch upload limit enforcement
|
||||
- Error handling for batch upload limit exceeded
|
||||
- Document status updates to error state
|
||||
- Proper error message recording
|
||||
"""
|
||||
# Arrange: Create test data with billing enabled
|
||||
dataset, documents = self._create_test_dataset_with_billing_features(
|
||||
db_session_with_containers, mock_external_service_dependencies, billing_enabled=True
|
||||
)
|
||||
|
||||
# Configure sandbox plan with batch limit
|
||||
mock_external_service_dependencies["features"].billing.subscription.plan = CloudPlan.SANDBOX
|
||||
|
||||
# Create more documents than sandbox plan allows (limit is 1)
|
||||
fake = Faker()
|
||||
extra_documents = []
|
||||
for i in range(2): # Total will be 5 documents (3 existing + 2 new)
|
||||
document = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
position=i + 3,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=dataset.created_by,
|
||||
indexing_status="waiting",
|
||||
enabled=True,
|
||||
)
|
||||
db.session.add(document)
|
||||
extra_documents.append(document)
|
||||
|
||||
db.session.commit()
|
||||
all_documents = documents + extra_documents
|
||||
document_ids = [doc.id for doc in all_documents]
|
||||
|
||||
# Act: Execute the task with too many documents for sandbox plan
|
||||
_document_indexing(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify error handling
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "error"
|
||||
assert updated_document.error is not None
|
||||
assert "batch upload" in updated_document.error
|
||||
assert updated_document.stopped_at is not None
|
||||
|
||||
# Verify no indexing runner was called
|
||||
mock_external_service_dependencies["indexing_runner"].assert_not_called()
|
||||
|
||||
def test_document_indexing_task_billing_disabled_success(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful processing when billing is disabled.
|
||||
|
||||
This test verifies:
|
||||
- Processing continues normally when billing is disabled
|
||||
- No billing validation occurs
|
||||
- Documents are processed successfully
|
||||
- IndexingRunner is called correctly
|
||||
"""
|
||||
# Arrange: Create test data with billing disabled
|
||||
dataset, documents = self._create_test_dataset_with_billing_features(
|
||||
db_session_with_containers, mock_external_service_dependencies, billing_enabled=False
|
||||
)
|
||||
|
||||
document_ids = [doc.id for doc in documents]
|
||||
|
||||
# Act: Execute the task with billing disabled
|
||||
_document_indexing(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify successful processing
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify documents were updated to parsing status
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
def test_document_indexing_task_document_is_paused_error(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of DocumentIsPausedError from IndexingRunner.
|
||||
|
||||
This test verifies:
|
||||
- DocumentIsPausedError is properly caught and handled
|
||||
- Task completes without raising exceptions
|
||||
- Appropriate logging occurs
|
||||
- Database session is properly closed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=2
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
|
||||
# Mock IndexingRunner to raise DocumentIsPausedError
|
||||
from core.indexing_runner import DocumentIsPausedError
|
||||
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = DocumentIsPausedError(
|
||||
"Document indexing is paused"
|
||||
)
|
||||
|
||||
# Act: Execute the task
|
||||
_document_indexing(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify exception was handled gracefully
|
||||
# The task should complete without raising exceptions
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify documents were still updated to parsing status before the exception
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
# ==================== NEW TESTS FOR REFACTORED FUNCTIONS ====================
|
||||
def test_old_document_indexing_task_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test document_indexing_task basic functionality.
|
||||
|
||||
This test verifies:
|
||||
- Task function calls the wrapper correctly
|
||||
- Basic parameter passing works
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
|
||||
# Act: Execute the deprecated task (it only takes 2 parameters)
|
||||
document_indexing_task(dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify processing occurred (core logic is tested in _document_indexing tests)
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
def test_normal_document_indexing_task_success(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test normal_document_indexing_task basic functionality.
|
||||
|
||||
This test verifies:
|
||||
- Task function calls the wrapper correctly
|
||||
- Basic parameter passing works
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
tenant_id = dataset.tenant_id
|
||||
|
||||
# Act: Execute the new normal task
|
||||
normal_document_indexing_task(tenant_id, dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify processing occurred (core logic is tested in _document_indexing tests)
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
def test_priority_document_indexing_task_success(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test priority_document_indexing_task basic functionality.
|
||||
|
||||
This test verifies:
|
||||
- Task function calls the wrapper correctly
|
||||
- Basic parameter passing works
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
tenant_id = dataset.tenant_id
|
||||
|
||||
# Act: Execute the new priority task
|
||||
priority_document_indexing_task(tenant_id, dataset.id, document_ids)
|
||||
|
||||
# Assert: Verify processing occurred (core logic is tested in _document_indexing tests)
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
def test_document_indexing_with_tenant_queue_success(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test _document_indexing_with_tenant_queue function with no waiting tasks.
|
||||
|
||||
This test verifies:
|
||||
- Core indexing logic execution (same as _document_indexing)
|
||||
- Tenant queue cleanup when no waiting tasks
|
||||
- Task function parameter passing
|
||||
- Queue management after processing
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=2
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
tenant_id = dataset.tenant_id
|
||||
|
||||
# Mock the task function
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
mock_task_func = MagicMock()
|
||||
|
||||
# Act: Execute the wrapper function
|
||||
_document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func)
|
||||
|
||||
# Assert: Verify core processing occurred (same as _document_indexing)
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify documents were updated (same as _document_indexing)
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
# Verify the run method was called with correct documents
|
||||
call_args = mock_external_service_dependencies["indexing_runner_instance"].run.call_args
|
||||
assert call_args is not None
|
||||
processed_documents = call_args[0][0]
|
||||
assert len(processed_documents) == 2
|
||||
|
||||
# Verify task function was not called (no waiting tasks)
|
||||
mock_task_func.delay.assert_not_called()
|
||||
|
||||
def test_document_indexing_with_tenant_queue_with_waiting_tasks(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test _document_indexing_with_tenant_queue function with waiting tasks in queue using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Core indexing logic execution
|
||||
- Real Redis-based tenant queue processing of waiting tasks
|
||||
- Task function calls for waiting tasks
|
||||
- Queue management with multiple tasks using actual Redis operations
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
tenant_id = dataset.tenant_id
|
||||
dataset_id = dataset.id
|
||||
|
||||
# Mock the task function
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
mock_task_func = MagicMock()
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||
|
||||
# Create real queue instance
|
||||
queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing")
|
||||
|
||||
# Add waiting tasks to the real Redis queue
|
||||
waiting_tasks = [
|
||||
DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-1"]),
|
||||
DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-2"]),
|
||||
]
|
||||
# Convert DocumentTask objects to dictionaries for serialization
|
||||
waiting_task_dicts = [asdict(task) for task in waiting_tasks]
|
||||
queue.push_tasks(waiting_task_dicts)
|
||||
|
||||
# Act: Execute the wrapper function
|
||||
_document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func)
|
||||
|
||||
# Assert: Verify core processing occurred
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify task function was called for each waiting task
|
||||
assert mock_task_func.delay.call_count == 1
|
||||
|
||||
# Verify correct parameters for each call
|
||||
calls = mock_task_func.delay.call_args_list
|
||||
assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
|
||||
|
||||
# Verify queue is empty after processing (tasks were pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10) # Pull more than we added
|
||||
assert len(remaining_tasks) == 1
|
||||
|
||||
def test_document_indexing_with_tenant_queue_error_handling(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test error handling in _document_indexing_with_tenant_queue using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Exception handling during core processing
|
||||
- Tenant queue cleanup even on errors using real Redis
|
||||
- Proper error logging
|
||||
- Function completes without raising exceptions
|
||||
- Queue management continues despite core processing errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, documents = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
document_ids = [doc.id for doc in documents]
|
||||
tenant_id = dataset.tenant_id
|
||||
dataset_id = dataset.id
|
||||
|
||||
# Mock IndexingRunner to raise an exception
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.side_effect = Exception("Test error")
|
||||
|
||||
# Mock the task function
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
mock_task_func = MagicMock()
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||
|
||||
# Create real queue instance
|
||||
queue = TenantIsolatedTaskQueue(tenant_id, "document_indexing")
|
||||
|
||||
# Add waiting task to the real Redis queue
|
||||
waiting_task = DocumentTask(tenant_id=tenant_id, dataset_id=dataset.id, document_ids=["waiting-doc-1"])
|
||||
queue.push_tasks([asdict(waiting_task)])
|
||||
|
||||
# Act: Execute the wrapper function
|
||||
_document_indexing_with_tenant_queue(tenant_id, dataset.id, document_ids, mock_task_func)
|
||||
|
||||
# Assert: Verify error was handled gracefully
|
||||
# The function should not raise exceptions
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify documents were still updated to parsing status before the exception
|
||||
# Re-query documents from database since _document_indexing uses a different session
|
||||
for doc_id in document_ids:
|
||||
updated_document = db.session.query(Document).where(Document.id == doc_id).first()
|
||||
assert updated_document.indexing_status == "parsing"
|
||||
assert updated_document.processing_started_at is not None
|
||||
|
||||
# Verify waiting task was still processed despite core processing error
|
||||
mock_task_func.delay.assert_called_once()
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call = mock_task_func.delay.call_args
|
||||
assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
|
||||
|
||||
# Verify queue is empty after processing (task was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 0
|
||||
|
||||
def test_document_indexing_with_tenant_queue_tenant_isolation(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test tenant isolation in _document_indexing_with_tenant_queue using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Different tenants have isolated queues
|
||||
- Tasks from one tenant don't affect another tenant's queue
|
||||
- Queue operations are properly scoped to tenant
|
||||
"""
|
||||
# Arrange: Create test data for two different tenants
|
||||
dataset1, documents1 = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
dataset2, documents2 = self._create_test_dataset_and_documents(
|
||||
db_session_with_containers, mock_external_service_dependencies, document_count=1
|
||||
)
|
||||
|
||||
tenant1_id = dataset1.tenant_id
|
||||
tenant2_id = dataset2.tenant_id
|
||||
dataset1_id = dataset1.id
|
||||
dataset2_id = dataset2.id
|
||||
document_ids1 = [doc.id for doc in documents1]
|
||||
document_ids2 = [doc.id for doc in documents2]
|
||||
|
||||
# Mock the task function
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
mock_task_func = MagicMock()
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||
|
||||
# Create queue instances for both tenants
|
||||
queue1 = TenantIsolatedTaskQueue(tenant1_id, "document_indexing")
|
||||
queue2 = TenantIsolatedTaskQueue(tenant2_id, "document_indexing")
|
||||
|
||||
# Add waiting tasks to both queues
|
||||
waiting_task1 = DocumentTask(tenant_id=tenant1_id, dataset_id=dataset1.id, document_ids=["tenant1-doc-1"])
|
||||
waiting_task2 = DocumentTask(tenant_id=tenant2_id, dataset_id=dataset2.id, document_ids=["tenant2-doc-1"])
|
||||
|
||||
queue1.push_tasks([asdict(waiting_task1)])
|
||||
queue2.push_tasks([asdict(waiting_task2)])
|
||||
|
||||
# Act: Execute the wrapper function for tenant1 only
|
||||
_document_indexing_with_tenant_queue(tenant1_id, dataset1.id, document_ids1, mock_task_func)
|
||||
|
||||
# Assert: Verify core processing occurred for tenant1
|
||||
mock_external_service_dependencies["indexing_runner"].assert_called_once()
|
||||
mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
|
||||
|
||||
# Verify only tenant1's waiting task was processed
|
||||
mock_task_func.delay.assert_called_once()
|
||||
call = mock_task_func.delay.call_args
|
||||
assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]}
|
||||
|
||||
# Verify tenant1's queue is empty
|
||||
remaining_tasks1 = queue1.pull_tasks(count=10)
|
||||
assert len(remaining_tasks1) == 0
|
||||
|
||||
# Verify tenant2's queue still has its task (isolation)
|
||||
remaining_tasks2 = queue2.pull_tasks(count=10)
|
||||
assert len(remaining_tasks2) == 1
|
||||
|
||||
# Verify queue keys are different
|
||||
assert queue1._queue != queue2._queue
|
||||
assert queue1._task_key != queue2._task_key
|
||||
@@ -0,0 +1,450 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
|
||||
|
||||
|
||||
class TestEnableSegmentsToIndexTask:
|
||||
"""Integration tests for enable_segments_to_index_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.enable_segments_to_index_task.IndexProcessorFactory") as mock_index_processor_factory,
|
||||
):
|
||||
# Setup mock index processor
|
||||
mock_processor = MagicMock()
|
||||
mock_index_processor_factory.return_value.init_index_processor.return_value = mock_processor
|
||||
|
||||
yield {
|
||||
"index_processor_factory": mock_index_processor_factory,
|
||||
"index_processor": mock_processor,
|
||||
}
|
||||
|
||||
def _create_test_dataset_and_document(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Helper method to create a test dataset and document for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
|
||||
Returns:
|
||||
tuple: (dataset, document) - Created dataset and document instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Create dataset
|
||||
dataset = Dataset(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(max_nb_chars=100),
|
||||
data_source_type="upload_file",
|
||||
indexing_technique="high_quality",
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(dataset)
|
||||
db.session.commit()
|
||||
|
||||
# Create document
|
||||
document = Document(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type="upload_file",
|
||||
batch="test_batch",
|
||||
name=fake.file_name(),
|
||||
created_from="upload_file",
|
||||
created_by=account.id,
|
||||
indexing_status="completed",
|
||||
enabled=True,
|
||||
doc_form=IndexType.PARAGRAPH_INDEX,
|
||||
)
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property works correctly
|
||||
db.session.refresh(dataset)
|
||||
|
||||
return dataset, document
|
||||
|
||||
def _create_test_segments(
|
||||
self, db_session_with_containers, document, dataset, count=3, enabled=False, status="completed"
|
||||
):
|
||||
"""
|
||||
Helper method to create test document segments.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
document: Document instance
|
||||
dataset: Dataset instance
|
||||
count: Number of segments to create
|
||||
enabled: Whether segments should be enabled
|
||||
status: Status of the segments
|
||||
|
||||
Returns:
|
||||
list: List of created DocumentSegment instances
|
||||
"""
|
||||
fake = Faker()
|
||||
segments = []
|
||||
|
||||
for i in range(count):
|
||||
text = fake.text(max_nb_chars=200)
|
||||
segment = DocumentSegment(
|
||||
id=fake.uuid4(),
|
||||
tenant_id=document.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=i,
|
||||
content=text,
|
||||
word_count=len(text.split()),
|
||||
tokens=len(text.split()) * 2,
|
||||
index_node_id=f"node_{i}",
|
||||
index_node_hash=f"hash_{i}",
|
||||
enabled=enabled,
|
||||
status=status,
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db.session.add(segment)
|
||||
segments.append(segment)
|
||||
|
||||
db.session.commit()
|
||||
return segments
|
||||
|
||||
def test_enable_segments_to_index_with_different_index_type(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test segments indexing with different index types.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling of different index types
|
||||
- Index processor factory integration
|
||||
- Document processing with various configurations
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data with different index type
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Update document to use different index type
|
||||
document.doc_form = IndexType.QA_INDEX
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property reflects the updated document
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# Create segments
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify different index type handling
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.QA_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3
|
||||
|
||||
# Verify Redis cache keys were deleted
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_enable_segments_to_index_dataset_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of non-existent dataset.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing datasets
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- No unnecessary index processor calls
|
||||
"""
|
||||
# Arrange: Use non-existent dataset ID
|
||||
fake = Faker()
|
||||
non_existent_dataset_id = fake.uuid4()
|
||||
non_existent_document_id = fake.uuid4()
|
||||
segment_ids = [fake.uuid4()]
|
||||
|
||||
# Act: Execute the task with non-existent dataset
|
||||
enable_segments_to_index_task(segment_ids, non_existent_dataset_id, non_existent_document_id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_enable_segments_to_index_document_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of non-existent document.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing documents
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- No unnecessary index processor calls
|
||||
"""
|
||||
# Arrange: Create dataset but use non-existent document ID
|
||||
dataset, _ = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
fake = Faker()
|
||||
non_existent_document_id = fake.uuid4()
|
||||
segment_ids = [fake.uuid4()]
|
||||
|
||||
# Act: Execute the task with non-existent document
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, non_existent_document_id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_enable_segments_to_index_invalid_document_status(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling of document with invalid status.
|
||||
|
||||
This test verifies:
|
||||
- Early return when document is disabled, archived, or not completed
|
||||
- No index processing for documents not ready for indexing
|
||||
- Proper database session cleanup
|
||||
- No unnecessary external service calls
|
||||
"""
|
||||
# Arrange: Create test data with invalid document status
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Test different invalid statuses
|
||||
invalid_statuses = [
|
||||
("disabled", {"enabled": False}),
|
||||
("archived", {"archived": True}),
|
||||
("not_completed", {"indexing_status": "processing"}),
|
||||
]
|
||||
|
||||
for _, status_attrs in invalid_statuses:
|
||||
# Reset document status
|
||||
document.enabled = True
|
||||
document.archived = False
|
||||
document.indexing_status = "completed"
|
||||
db.session.commit()
|
||||
|
||||
# Set invalid status
|
||||
for attr, value in status_attrs.items():
|
||||
setattr(document, attr, value)
|
||||
db.session.commit()
|
||||
|
||||
# Create segments
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_not_called()
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
# Clean up segments for next iteration
|
||||
for segment in segments:
|
||||
db.session.delete(segment)
|
||||
db.session.commit()
|
||||
|
||||
def test_enable_segments_to_index_segments_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test handling when no segments are found.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling when segments don't exist
|
||||
- Early return without processing
|
||||
- Database session cleanup
|
||||
- Index processor is created but load is not called
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Use non-existent segment IDs
|
||||
fake = Faker()
|
||||
non_existent_segment_ids = [fake.uuid4() for _ in range(3)]
|
||||
|
||||
# Act: Execute the task with non-existent segments
|
||||
enable_segments_to_index_task(non_existent_segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify index processor was created but load was not called
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(IndexType.PARAGRAPH_INDEX)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_not_called()
|
||||
|
||||
def test_enable_segments_to_index_with_parent_child_structure(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test segments indexing with parent-child structure.
|
||||
|
||||
This test verifies:
|
||||
- Proper handling of PARENT_CHILD_INDEX type
|
||||
- Child document creation from segments
|
||||
- Correct document structure for parent-child indexing
|
||||
- Index processor receives properly structured documents
|
||||
- Redis cache key deletion
|
||||
"""
|
||||
# Arrange: Create test data with parent-child index type
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Update document to use parent-child index type
|
||||
document.doc_form = IndexType.PARENT_CHILD_INDEX
|
||||
db.session.commit()
|
||||
|
||||
# Refresh dataset to ensure doc_form property reflects the updated document
|
||||
db.session.refresh(dataset)
|
||||
|
||||
# Create segments with mock child chunks
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Mock the get_child_chunks method for each segment
|
||||
with patch.object(DocumentSegment, "get_child_chunks") as mock_get_child_chunks:
|
||||
# Setup mock to return child chunks for each segment
|
||||
mock_child_chunks = []
|
||||
for i in range(2): # Each segment has 2 child chunks
|
||||
mock_child = MagicMock()
|
||||
mock_child.content = f"child_content_{i}"
|
||||
mock_child.index_node_id = f"child_node_{i}"
|
||||
mock_child.index_node_hash = f"child_hash_{i}"
|
||||
mock_child_chunks.append(mock_child)
|
||||
|
||||
mock_get_child_chunks.return_value = mock_child_chunks
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify parent-child index processing
|
||||
mock_external_service_dependencies["index_processor_factory"].assert_called_once_with(
|
||||
IndexType.PARENT_CHILD_INDEX
|
||||
)
|
||||
mock_external_service_dependencies["index_processor"].load.assert_called_once()
|
||||
|
||||
# Verify the load method was called with correct parameters
|
||||
call_args = mock_external_service_dependencies["index_processor"].load.call_args
|
||||
assert call_args is not None
|
||||
documents = call_args[0][1] # Second argument should be documents list
|
||||
assert len(documents) == 3 # 3 segments
|
||||
|
||||
# Verify each document has children
|
||||
for doc in documents:
|
||||
assert hasattr(doc, "children")
|
||||
assert len(doc.children) == 2 # Each document has 2 children
|
||||
|
||||
# Verify Redis cache keys were deleted
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
|
||||
def test_enable_segments_to_index_general_exception_handling(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test general exception handling during indexing process.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions are properly caught and handled
|
||||
- Segment status is set to error
|
||||
- Segments are disabled
|
||||
- Error information is recorded
|
||||
- Redis cache is still cleared
|
||||
- Database session is properly closed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
dataset, document = self._create_test_dataset_and_document(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
segments = self._create_test_segments(db_session_with_containers, document, dataset)
|
||||
|
||||
# Set up Redis cache keys
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
redis_client.set(indexing_cache_key, "processing", ex=300)
|
||||
|
||||
# Mock the index processor to raise an exception
|
||||
mock_external_service_dependencies["index_processor"].load.side_effect = Exception("Index processing failed")
|
||||
|
||||
# Act: Execute the task
|
||||
enable_segments_to_index_task(segment_ids, dataset.id, document.id)
|
||||
|
||||
# Assert: Verify error handling
|
||||
for segment in segments:
|
||||
db.session.refresh(segment)
|
||||
assert segment.enabled is False
|
||||
assert segment.status == "error"
|
||||
assert segment.error is not None
|
||||
assert "Index processing failed" in segment.error
|
||||
assert segment.disabled_at is not None
|
||||
|
||||
# Verify Redis cache keys were still cleared despite error
|
||||
for segment in segments:
|
||||
indexing_cache_key = f"segment_{segment.id}_indexing"
|
||||
assert redis_client.exists(indexing_cache_key) == 0
|
||||
@@ -0,0 +1,242 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from extensions.ext_database import db
|
||||
from libs.email_i18n import EmailType
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from tasks.mail_account_deletion_task import send_account_deletion_verification_code, send_deletion_success_task
|
||||
|
||||
|
||||
class TestMailAccountDeletionTask:
|
||||
"""Integration tests for mail account deletion tasks using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_account_deletion_task.mail") as mock_mail,
|
||||
patch("tasks.mail_account_deletion_task.get_email_i18n_service") as mock_get_email_service,
|
||||
):
|
||||
# Setup mock mail service
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Setup mock email service
|
||||
mock_email_service = MagicMock()
|
||||
mock_get_email_service.return_value = mock_email_service
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"get_email_service": mock_get_email_service,
|
||||
"email_service": mock_email_service,
|
||||
}
|
||||
|
||||
def _create_test_account(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create a test account for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
Account: Created account instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
return account
|
||||
|
||||
def test_send_deletion_success_task_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful account deletion success email sending.
|
||||
|
||||
This test verifies:
|
||||
- Proper email service initialization check
|
||||
- Correct email service method calls
|
||||
- Template context is properly formatted
|
||||
- Email type is correctly specified
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_email = account.email
|
||||
test_language = "en-US"
|
||||
|
||||
# Act: Execute the task
|
||||
send_deletion_success_task(test_email, test_language)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
# Verify mail service was checked
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
|
||||
# Verify email service was retrieved
|
||||
mock_external_service_dependencies["get_email_service"].assert_called_once()
|
||||
|
||||
# Verify email was sent with correct parameters
|
||||
mock_external_service_dependencies["email_service"].send_email.assert_called_once_with(
|
||||
email_type=EmailType.ACCOUNT_DELETION_SUCCESS,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"email": test_email,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_deletion_success_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test account deletion success email when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls are made
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Setup mail service to return not initialized
|
||||
mock_external_service_dependencies["mail"].is_inited.return_value = False
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_email = account.email
|
||||
|
||||
# Act: Execute the task
|
||||
send_deletion_success_task(test_email)
|
||||
|
||||
# Assert: Verify no email service calls were made
|
||||
mock_external_service_dependencies["get_email_service"].assert_not_called()
|
||||
mock_external_service_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_deletion_success_task_email_service_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test account deletion success email when email service raises exception.
|
||||
|
||||
This test verifies:
|
||||
- Exception is properly caught and logged
|
||||
- Task completes without raising exception
|
||||
- Error logging is recorded
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_external_service_dependencies["email_service"].send_email.side_effect = Exception("Email service failed")
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_email = account.email
|
||||
|
||||
# Act: Execute the task (should not raise exception)
|
||||
send_deletion_success_task(test_email)
|
||||
|
||||
# Assert: Verify email service was called but exception was handled
|
||||
mock_external_service_dependencies["email_service"].send_email.assert_called_once()
|
||||
|
||||
def test_send_account_deletion_verification_code_success(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful account deletion verification code email sending.
|
||||
|
||||
This test verifies:
|
||||
- Proper email service initialization check
|
||||
- Correct email service method calls
|
||||
- Template context includes verification code
|
||||
- Email type is correctly specified
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_email = account.email
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Act: Execute the task
|
||||
send_account_deletion_verification_code(test_email, test_code, test_language)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
# Verify mail service was checked
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
|
||||
# Verify email service was retrieved
|
||||
mock_external_service_dependencies["get_email_service"].assert_called_once()
|
||||
|
||||
# Verify email was sent with correct parameters
|
||||
mock_external_service_dependencies["email_service"].send_email.assert_called_once_with(
|
||||
email_type=EmailType.ACCOUNT_DELETION_VERIFICATION,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"code": test_code,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_account_deletion_verification_code_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test account deletion verification code email when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls are made
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Setup mail service to return not initialized
|
||||
mock_external_service_dependencies["mail"].is_inited.return_value = False
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_email = account.email
|
||||
test_code = "123456"
|
||||
|
||||
# Act: Execute the task
|
||||
send_account_deletion_verification_code(test_email, test_code)
|
||||
|
||||
# Assert: Verify no email service calls were made
|
||||
mock_external_service_dependencies["get_email_service"].assert_not_called()
|
||||
mock_external_service_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_account_deletion_verification_code_email_service_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test account deletion verification code email when email service raises exception.
|
||||
|
||||
This test verifies:
|
||||
- Exception is properly caught and logged
|
||||
- Task completes without raising exception
|
||||
- Error logging is recorded
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_external_service_dependencies["email_service"].send_email.side_effect = Exception("Email service failed")
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_email = account.email
|
||||
test_code = "123456"
|
||||
|
||||
# Act: Execute the task (should not raise exception)
|
||||
send_account_deletion_verification_code(test_email, test_code)
|
||||
|
||||
# Assert: Verify email service was called but exception was handled
|
||||
mock_external_service_dependencies["email_service"].send_email.assert_called_once()
|
||||
@@ -0,0 +1,282 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from libs.email_i18n import EmailType
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from tasks.mail_change_mail_task import send_change_mail_completed_notification_task, send_change_mail_task
|
||||
|
||||
|
||||
class TestMailChangeMailTask:
|
||||
"""Integration tests for mail_change_mail_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_change_mail_task.mail") as mock_mail,
|
||||
patch("tasks.mail_change_mail_task.get_email_i18n_service") as mock_get_email_i18n_service,
|
||||
):
|
||||
# Setup mock mail service
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Setup mock email i18n service
|
||||
mock_email_service = MagicMock()
|
||||
mock_get_email_i18n_service.return_value = mock_email_service
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"email_i18n_service": mock_email_service,
|
||||
"get_email_i18n_service": mock_get_email_i18n_service,
|
||||
}
|
||||
|
||||
def _create_test_account(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create a test account for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
Account: Created account instance
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db_session_with_containers.add(account)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db_session_with_containers.add(tenant)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db_session_with_containers.add(join)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
return account
|
||||
|
||||
def test_send_change_mail_task_success_old_email_phase(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful change email task execution for old_email phase.
|
||||
|
||||
This test verifies:
|
||||
- Proper mail service initialization check
|
||||
- Correct email service method call with old_email phase
|
||||
- Successful task completion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_language = "en-US"
|
||||
test_email = account.email
|
||||
test_code = "123456"
|
||||
test_phase = "old_email"
|
||||
|
||||
# Act: Execute the task
|
||||
send_change_mail_task(test_language, test_email, test_code, test_phase)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_called_once_with(
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
phase=test_phase,
|
||||
)
|
||||
|
||||
def test_send_change_mail_task_success_new_email_phase(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful change email task execution for new_email phase.
|
||||
|
||||
This test verifies:
|
||||
- Proper mail service initialization check
|
||||
- Correct email service method call with new_email phase
|
||||
- Successful task completion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_language = "zh-Hans"
|
||||
test_email = "new@example.com"
|
||||
test_code = "789012"
|
||||
test_phase = "new_email"
|
||||
|
||||
# Act: Execute the task
|
||||
send_change_mail_task(test_language, test_email, test_code, test_phase)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_called_once_with(
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
phase=test_phase,
|
||||
)
|
||||
|
||||
def test_send_change_mail_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test change email task when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls when mail is not available
|
||||
"""
|
||||
# Arrange: Setup mail service as not initialized
|
||||
mock_external_service_dependencies["mail"].is_inited.return_value = False
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_code = "123456"
|
||||
test_phase = "old_email"
|
||||
|
||||
# Act: Execute the task
|
||||
send_change_mail_task(test_language, test_email, test_code, test_phase)
|
||||
|
||||
# Assert: Verify no email service calls
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_not_called()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_not_called()
|
||||
|
||||
def test_send_change_mail_task_email_service_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test change email task when email service raises an exception.
|
||||
|
||||
This test verifies:
|
||||
- Exception is properly caught and logged
|
||||
- Task completes without raising exception
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_external_service_dependencies["email_i18n_service"].send_change_email.side_effect = Exception(
|
||||
"Email service failed"
|
||||
)
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_code = "123456"
|
||||
test_phase = "old_email"
|
||||
|
||||
# Act: Execute the task (should not raise exception)
|
||||
send_change_mail_task(test_language, test_email, test_code, test_phase)
|
||||
|
||||
# Assert: Verify email service was called despite exception
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_change_email.assert_called_once_with(
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
phase=test_phase,
|
||||
)
|
||||
|
||||
def test_send_change_mail_completed_notification_task_success(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful change email completed notification task execution.
|
||||
|
||||
This test verifies:
|
||||
- Proper mail service initialization check
|
||||
- Correct email service method call with CHANGE_EMAIL_COMPLETED type
|
||||
- Template context is properly constructed
|
||||
- Successful task completion
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account = self._create_test_account(db_session_with_containers)
|
||||
test_language = "en-US"
|
||||
test_email = account.email
|
||||
|
||||
# Act: Execute the task
|
||||
send_change_mail_completed_notification_task(test_language, test_email)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_email.assert_called_once_with(
|
||||
email_type=EmailType.CHANGE_EMAIL_COMPLETED,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"email": test_email,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_change_mail_completed_notification_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test change email completed notification task when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls when mail is not available
|
||||
"""
|
||||
# Arrange: Setup mail service as not initialized
|
||||
mock_external_service_dependencies["mail"].is_inited.return_value = False
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
|
||||
# Act: Execute the task
|
||||
send_change_mail_completed_notification_task(test_language, test_email)
|
||||
|
||||
# Assert: Verify no email service calls
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_not_called()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_change_mail_completed_notification_task_email_service_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test change email completed notification task when email service raises an exception.
|
||||
|
||||
This test verifies:
|
||||
- Exception is properly caught and logged
|
||||
- Task completes without raising exception
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_external_service_dependencies["email_i18n_service"].send_email.side_effect = Exception(
|
||||
"Email service failed"
|
||||
)
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
|
||||
# Act: Execute the task (should not raise exception)
|
||||
send_change_mail_completed_notification_task(test_language, test_email)
|
||||
|
||||
# Assert: Verify email service was called despite exception
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_external_service_dependencies["get_email_i18n_service"].assert_called_once()
|
||||
mock_external_service_dependencies["email_i18n_service"].send_email.assert_called_once_with(
|
||||
email_type=EmailType.CHANGE_EMAIL_COMPLETED,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"email": test_email,
|
||||
},
|
||||
)
|
||||
@@ -0,0 +1,598 @@
|
||||
"""
|
||||
TestContainers-based integration tests for send_email_code_login_mail_task.
|
||||
|
||||
This module provides comprehensive integration tests for the email code login mail task
|
||||
using TestContainers infrastructure. The tests ensure that the task properly sends
|
||||
email verification codes for login with internationalization support and handles
|
||||
various error scenarios in a real database environment.
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing scenarios with actual PostgreSQL and Redis instances.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from libs.email_i18n import EmailType
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from tasks.mail_email_code_login import send_email_code_login_mail_task
|
||||
|
||||
|
||||
class TestSendEmailCodeLoginMailTask:
|
||||
"""
|
||||
Comprehensive integration tests for send_email_code_login_mail_task using testcontainers.
|
||||
|
||||
This test class covers all major functionality of the email code login mail task:
|
||||
- Successful email sending with different languages
|
||||
- Email service integration and template rendering
|
||||
- Error handling for various failure scenarios
|
||||
- Performance metrics and logging verification
|
||||
- Edge cases and boundary conditions
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing environment with actual database interactions.
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_database(self, db_session_with_containers):
|
||||
"""Clean up database before each test to ensure isolation."""
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
# Clear all test data
|
||||
db_session_with_containers.query(TenantAccountJoin).delete()
|
||||
db_session_with_containers.query(Tenant).delete()
|
||||
db_session_with_containers.query(Account).delete()
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Clear Redis cache
|
||||
redis_client.flushdb()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_email_code_login.mail") as mock_mail,
|
||||
patch("tasks.mail_email_code_login.get_email_i18n_service") as mock_email_service,
|
||||
):
|
||||
# Setup default mock returns
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Mock email service
|
||||
mock_email_service_instance = MagicMock()
|
||||
mock_email_service_instance.send_email.return_value = None
|
||||
mock_email_service.return_value = mock_email_service_instance
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"email_service": mock_email_service,
|
||||
"email_service_instance": mock_email_service_instance,
|
||||
}
|
||||
|
||||
def _create_test_account(self, db_session_with_containers, fake=None):
|
||||
"""
|
||||
Helper method to create a test account for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
Account: Created account instance
|
||||
"""
|
||||
if fake is None:
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
|
||||
db_session_with_containers.add(account)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
return account
|
||||
|
||||
def _create_test_tenant_and_account(self, db_session_with_containers, fake=None):
|
||||
"""
|
||||
Helper method to create a test tenant and account for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
tuple: (Account, Tenant) created instances
|
||||
"""
|
||||
if fake is None:
|
||||
fake = Faker()
|
||||
|
||||
# Create account using the existing helper method
|
||||
account = self._create_test_account(db_session_with_containers, fake)
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
plan="basic",
|
||||
status="active",
|
||||
)
|
||||
|
||||
db_session_with_containers.add(tenant)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Create tenant-account relationship
|
||||
tenant_account_join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
)
|
||||
|
||||
db_session_with_containers.add(tenant_account_join)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
return account, tenant
|
||||
|
||||
def test_send_email_code_login_mail_task_success_english(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful email code login mail sending in English.
|
||||
|
||||
This test verifies that the task can successfully:
|
||||
1. Send email code login mail with English language
|
||||
2. Use proper email service integration
|
||||
3. Pass correct template context to email service
|
||||
4. Log performance metrics correctly
|
||||
5. Complete task execution without errors
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_email = fake.email()
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Act: Execute the task
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_mail = mock_external_service_dependencies["mail"]
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify mail service was checked for initialization
|
||||
mock_mail.is_inited.assert_called_once()
|
||||
|
||||
# Verify email service was called with correct parameters
|
||||
mock_email_service_instance.send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_CODE_LOGIN,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"code": test_code,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_email_code_login_mail_task_success_chinese(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful email code login mail sending in Chinese.
|
||||
|
||||
This test verifies that the task can successfully:
|
||||
1. Send email code login mail with Chinese language
|
||||
2. Handle different language codes properly
|
||||
3. Use correct template context for Chinese emails
|
||||
4. Complete task execution without errors
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_email = fake.email()
|
||||
test_code = "789012"
|
||||
test_language = "zh-Hans"
|
||||
|
||||
# Act: Execute the task
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify email service was called with Chinese language
|
||||
mock_email_service_instance.send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_CODE_LOGIN,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"code": test_code,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_email_code_login_mail_task_success_multiple_languages(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful email code login mail sending with multiple languages.
|
||||
|
||||
This test verifies that the task can successfully:
|
||||
1. Handle various language codes correctly
|
||||
2. Send emails with different language configurations
|
||||
3. Maintain proper template context for each language
|
||||
4. Complete multiple task executions without conflicts
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_languages = ["en-US", "zh-Hans", "zh-CN", "ja-JP", "ko-KR"]
|
||||
test_emails = [fake.email() for _ in test_languages]
|
||||
test_codes = [fake.numerify("######") for _ in test_languages]
|
||||
|
||||
# Act: Execute the task for each language
|
||||
for i, language in enumerate(test_languages):
|
||||
send_email_code_login_mail_task(
|
||||
language=language,
|
||||
to=test_emails[i],
|
||||
code=test_codes[i],
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify email service was called for each language
|
||||
assert mock_email_service_instance.send_email.call_count == len(test_languages)
|
||||
|
||||
# Verify each call had correct parameters
|
||||
for i, language in enumerate(test_languages):
|
||||
call_args = mock_email_service_instance.send_email.call_args_list[i]
|
||||
assert call_args[1]["email_type"] == EmailType.EMAIL_CODE_LOGIN
|
||||
assert call_args[1]["language_code"] == language
|
||||
assert call_args[1]["to"] == test_emails[i]
|
||||
assert call_args[1]["template_context"]["code"] == test_codes[i]
|
||||
|
||||
def test_send_email_code_login_mail_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email code login mail task when mail service is not initialized.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Check mail service initialization status
|
||||
2. Return early when mail is not initialized
|
||||
3. Not attempt to send email when service is unavailable
|
||||
4. Handle gracefully without errors
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_email = fake.email()
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Mock mail service as not initialized
|
||||
mock_mail = mock_external_service_dependencies["mail"]
|
||||
mock_mail.is_inited.return_value = False
|
||||
|
||||
# Act: Execute the task
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify mail service was checked for initialization
|
||||
mock_mail.is_inited.assert_called_once()
|
||||
|
||||
# Verify email service was not called
|
||||
mock_email_service_instance.send_email.assert_not_called()
|
||||
|
||||
def test_send_email_code_login_mail_task_email_service_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email code login mail task when email service raises an exception.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Handle email service exceptions gracefully
|
||||
2. Log appropriate error messages
|
||||
3. Continue execution without crashing
|
||||
4. Maintain proper error handling
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_email = fake.email()
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Mock email service to raise an exception
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
mock_email_service_instance.send_email.side_effect = Exception("Email service unavailable")
|
||||
|
||||
# Act: Execute the task - it should handle the exception gracefully
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_mail = mock_external_service_dependencies["mail"]
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify mail service was checked for initialization
|
||||
mock_mail.is_inited.assert_called_once()
|
||||
|
||||
# Verify email service was called (and failed)
|
||||
mock_email_service_instance.send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_CODE_LOGIN,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"code": test_code,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_email_code_login_mail_task_invalid_parameters(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email code login mail task with invalid parameters.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Handle empty or None email addresses
|
||||
2. Process empty or None verification codes
|
||||
3. Handle invalid language codes
|
||||
4. Maintain proper error handling for invalid inputs
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_language = "en-US"
|
||||
|
||||
# Test cases for invalid parameters
|
||||
invalid_test_cases = [
|
||||
{"email": "", "code": "123456", "description": "empty email"},
|
||||
{"email": None, "code": "123456", "description": "None email"},
|
||||
{"email": fake.email(), "code": "", "description": "empty code"},
|
||||
{"email": fake.email(), "code": None, "description": "None code"},
|
||||
{"email": "invalid-email", "code": "123456", "description": "invalid email format"},
|
||||
]
|
||||
|
||||
for test_case in invalid_test_cases:
|
||||
# Reset mocks for each test case
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
mock_email_service_instance.reset_mock()
|
||||
|
||||
# Act: Execute the task with invalid parameters
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_case["email"],
|
||||
code=test_case["code"],
|
||||
)
|
||||
|
||||
# Assert: Verify that email service was still called
|
||||
# The task should pass parameters to email service as-is
|
||||
# and let the email service handle validation
|
||||
mock_email_service_instance.send_email.assert_called_once()
|
||||
|
||||
def test_send_email_code_login_mail_task_edge_cases(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email code login mail task with edge cases and boundary conditions.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Handle very long email addresses
|
||||
2. Process very long verification codes
|
||||
3. Handle special characters in parameters
|
||||
4. Process extreme language codes
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_language = "en-US"
|
||||
|
||||
# Edge case test data
|
||||
edge_cases = [
|
||||
{
|
||||
"email": "a" * 100 + "@example.com", # Very long email
|
||||
"code": "1" * 20, # Very long code
|
||||
"description": "very long email and code",
|
||||
},
|
||||
{
|
||||
"email": "test+tag@example.com", # Email with special characters
|
||||
"code": "123-456", # Code with special characters
|
||||
"description": "special characters",
|
||||
},
|
||||
{
|
||||
"email": "test@sub.domain.example.com", # Complex domain
|
||||
"code": "000000", # All zeros
|
||||
"description": "complex domain and all zeros code",
|
||||
},
|
||||
{
|
||||
"email": "test@example.co.uk", # International domain
|
||||
"code": "999999", # All nines
|
||||
"description": "international domain and all nines code",
|
||||
},
|
||||
]
|
||||
|
||||
for test_case in edge_cases:
|
||||
# Reset mocks for each test case
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
mock_email_service_instance.reset_mock()
|
||||
|
||||
# Act: Execute the task with edge case data
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_case["email"],
|
||||
code=test_case["code"],
|
||||
)
|
||||
|
||||
# Assert: Verify that email service was called with edge case data
|
||||
mock_email_service_instance.send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_CODE_LOGIN,
|
||||
language_code=test_language,
|
||||
to=test_case["email"],
|
||||
template_context={
|
||||
"to": test_case["email"],
|
||||
"code": test_case["code"],
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_email_code_login_mail_task_database_integration(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email code login mail task with database integration.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Work with real database connections
|
||||
2. Handle database session management
|
||||
3. Maintain proper database state
|
||||
4. Complete without database-related errors
|
||||
"""
|
||||
# Arrange: Setup test data with database
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_tenant_and_account(db_session_with_containers, fake)
|
||||
|
||||
test_email = account.email
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Act: Execute the task
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify email service was called with database account email
|
||||
mock_email_service_instance.send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_CODE_LOGIN,
|
||||
language_code=test_language,
|
||||
to=test_email,
|
||||
template_context={
|
||||
"to": test_email,
|
||||
"code": test_code,
|
||||
},
|
||||
)
|
||||
|
||||
# Verify database state is maintained
|
||||
db_session_with_containers.refresh(account)
|
||||
assert account.email == test_email
|
||||
assert account.status == "active"
|
||||
|
||||
def test_send_email_code_login_mail_task_redis_integration(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email code login mail task with Redis integration.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Work with Redis cache connections
|
||||
2. Handle Redis operations without errors
|
||||
3. Maintain proper cache state
|
||||
4. Complete without Redis-related errors
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_email = fake.email()
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Setup Redis cache data
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
cache_key = f"email_code_login_test_{test_email}"
|
||||
redis_client.set(cache_key, "test_value", ex=300)
|
||||
|
||||
# Act: Execute the task
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
|
||||
# Verify email service was called
|
||||
mock_email_service_instance.send_email.assert_called_once()
|
||||
|
||||
# Verify Redis cache is still accessible
|
||||
assert redis_client.exists(cache_key) == 1
|
||||
assert redis_client.get(cache_key) == b"test_value"
|
||||
|
||||
# Clean up Redis cache
|
||||
redis_client.delete(cache_key)
|
||||
|
||||
def test_send_email_code_login_mail_task_error_handling_comprehensive(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test comprehensive error handling for email code login mail task.
|
||||
|
||||
This test verifies that the task can properly:
|
||||
1. Handle various types of exceptions
|
||||
2. Log appropriate error messages
|
||||
3. Continue execution despite errors
|
||||
4. Maintain proper error reporting
|
||||
"""
|
||||
# Arrange: Setup test data
|
||||
fake = Faker()
|
||||
test_email = fake.email()
|
||||
test_code = "123456"
|
||||
test_language = "en-US"
|
||||
|
||||
# Test different exception types
|
||||
exception_types = [
|
||||
("ValueError", ValueError("Invalid email format")),
|
||||
("RuntimeError", RuntimeError("Service unavailable")),
|
||||
("ConnectionError", ConnectionError("Network error")),
|
||||
("TimeoutError", TimeoutError("Request timeout")),
|
||||
("Exception", Exception("Generic error")),
|
||||
]
|
||||
|
||||
for error_name, exception in exception_types:
|
||||
# Reset mocks for each test case
|
||||
mock_email_service_instance = mock_external_service_dependencies["email_service_instance"]
|
||||
mock_email_service_instance.reset_mock()
|
||||
mock_email_service_instance.send_email.side_effect = exception
|
||||
|
||||
# Mock logging to capture error messages
|
||||
with patch("tasks.mail_email_code_login.logger") as mock_logger:
|
||||
# Act: Execute the task - it should handle the exception gracefully
|
||||
send_email_code_login_mail_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
)
|
||||
|
||||
# Assert: Verify error handling
|
||||
# Verify email service was called (and failed)
|
||||
mock_email_service_instance.send_email.assert_called_once()
|
||||
|
||||
# Verify error was logged
|
||||
error_calls = [
|
||||
call
|
||||
for call in mock_logger.exception.call_args_list
|
||||
if f"Send email code login mail to {test_email} failed" in str(call)
|
||||
]
|
||||
# Check if any exception call was made (the exact message format may vary)
|
||||
assert mock_logger.exception.call_count >= 1, f"Error should be logged for {error_name}"
|
||||
|
||||
# Reset side effect for next iteration
|
||||
mock_email_service_instance.send_email.side_effect = None
|
||||
@@ -0,0 +1,261 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from tasks.mail_inner_task import send_inner_email_task
|
||||
|
||||
|
||||
class TestMailInnerTask:
|
||||
"""Integration tests for send_inner_email_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_inner_task.mail") as mock_mail,
|
||||
patch("tasks.mail_inner_task.get_email_i18n_service") as mock_get_email_i18n_service,
|
||||
patch("tasks.mail_inner_task._render_template_with_strategy") as mock_render_template,
|
||||
):
|
||||
# Setup mock mail service
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Setup mock email i18n service
|
||||
mock_email_service = MagicMock()
|
||||
mock_get_email_i18n_service.return_value = mock_email_service
|
||||
|
||||
# Setup mock template rendering
|
||||
mock_render_template.return_value = "<html>Test email content</html>"
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"email_service": mock_email_service,
|
||||
"render_template": mock_render_template,
|
||||
}
|
||||
|
||||
def _create_test_email_data(self, fake: Faker) -> dict:
|
||||
"""
|
||||
Helper method to create test email data for testing.
|
||||
|
||||
Args:
|
||||
fake: Faker instance for generating test data
|
||||
|
||||
Returns:
|
||||
dict: Test email data including recipients, subject, body, and substitutions
|
||||
"""
|
||||
return {
|
||||
"to": [fake.email() for _ in range(3)],
|
||||
"subject": fake.sentence(nb_words=4),
|
||||
"body": "Hello {{name}}, this is a test email from {{company}}.",
|
||||
"substitutions": {
|
||||
"name": fake.name(),
|
||||
"company": fake.company(),
|
||||
"date": fake.date(),
|
||||
},
|
||||
}
|
||||
|
||||
def test_send_inner_email_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful email sending with valid data.
|
||||
|
||||
This test verifies:
|
||||
- Proper email service initialization check
|
||||
- Template rendering with substitutions
|
||||
- Email service integration
|
||||
- Multiple recipient handling
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
fake = Faker()
|
||||
email_data = self._create_test_email_data(fake)
|
||||
|
||||
# Act: Execute the task
|
||||
send_inner_email_task(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
body=email_data["body"],
|
||||
substitutions=email_data["substitutions"],
|
||||
)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
# Verify mail service was checked for initialization
|
||||
mock_external_service_dependencies["mail"].is_inited.assert_called_once()
|
||||
|
||||
# Verify template rendering was called with correct parameters
|
||||
mock_external_service_dependencies["render_template"].assert_called_once_with(
|
||||
email_data["body"], email_data["substitutions"]
|
||||
)
|
||||
|
||||
# Verify email service was called once with the full recipient list
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_raw_email.assert_called_once_with(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
html_content="<html>Test email content</html>",
|
||||
)
|
||||
|
||||
def test_send_inner_email_single_recipient(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test email sending with single recipient.
|
||||
|
||||
This test verifies:
|
||||
- Single recipient handling
|
||||
- Template rendering
|
||||
- Email service integration
|
||||
"""
|
||||
# Arrange: Create test data with single recipient
|
||||
fake = Faker()
|
||||
email_data = {
|
||||
"to": [fake.email()],
|
||||
"subject": fake.sentence(nb_words=3),
|
||||
"body": "Welcome {{user_name}}!",
|
||||
"substitutions": {
|
||||
"user_name": fake.name(),
|
||||
},
|
||||
}
|
||||
|
||||
# Act: Execute the task
|
||||
send_inner_email_task(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
body=email_data["body"],
|
||||
substitutions=email_data["substitutions"],
|
||||
)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_raw_email.assert_called_once_with(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
html_content="<html>Test email content</html>",
|
||||
)
|
||||
|
||||
def test_send_inner_email_empty_substitutions(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test email sending with empty substitutions.
|
||||
|
||||
This test verifies:
|
||||
- Template rendering with empty substitutions
|
||||
- Email service integration
|
||||
- Handling of minimal template context
|
||||
"""
|
||||
# Arrange: Create test data with empty substitutions
|
||||
fake = Faker()
|
||||
email_data = {
|
||||
"to": [fake.email()],
|
||||
"subject": fake.sentence(nb_words=3),
|
||||
"body": "This is a simple email without variables.",
|
||||
"substitutions": {},
|
||||
}
|
||||
|
||||
# Act: Execute the task
|
||||
send_inner_email_task(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
body=email_data["body"],
|
||||
substitutions=email_data["substitutions"],
|
||||
)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_external_service_dependencies["render_template"].assert_called_once_with(email_data["body"], {})
|
||||
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_raw_email.assert_called_once_with(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
html_content="<html>Test email content</html>",
|
||||
)
|
||||
|
||||
def test_send_inner_email_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email sending when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No template rendering occurs
|
||||
- No email service calls
|
||||
- No exceptions raised
|
||||
"""
|
||||
# Arrange: Setup mail service as not initialized
|
||||
mock_external_service_dependencies["mail"].is_inited.return_value = False
|
||||
|
||||
fake = Faker()
|
||||
email_data = self._create_test_email_data(fake)
|
||||
|
||||
# Act: Execute the task
|
||||
send_inner_email_task(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
body=email_data["body"],
|
||||
substitutions=email_data["substitutions"],
|
||||
)
|
||||
|
||||
# Assert: Verify no processing occurred
|
||||
mock_external_service_dependencies["render_template"].assert_not_called()
|
||||
mock_external_service_dependencies["email_service"].send_raw_email.assert_not_called()
|
||||
|
||||
def test_send_inner_email_template_rendering_error(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email sending when template rendering fails.
|
||||
|
||||
This test verifies:
|
||||
- Exception handling during template rendering
|
||||
- No email service calls when template fails
|
||||
"""
|
||||
# Arrange: Setup template rendering to raise an exception
|
||||
mock_external_service_dependencies["render_template"].side_effect = Exception("Template rendering failed")
|
||||
|
||||
fake = Faker()
|
||||
email_data = self._create_test_email_data(fake)
|
||||
|
||||
# Act: Execute the task
|
||||
send_inner_email_task(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
body=email_data["body"],
|
||||
substitutions=email_data["substitutions"],
|
||||
)
|
||||
|
||||
# Assert: Verify template rendering was attempted
|
||||
mock_external_service_dependencies["render_template"].assert_called_once()
|
||||
|
||||
# Verify no email service calls due to exception
|
||||
mock_external_service_dependencies["email_service"].send_raw_email.assert_not_called()
|
||||
|
||||
def test_send_inner_email_service_error(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test email sending when email service fails.
|
||||
|
||||
This test verifies:
|
||||
- Exception handling during email sending
|
||||
- Graceful error handling
|
||||
"""
|
||||
# Arrange: Setup email service to raise an exception
|
||||
mock_external_service_dependencies["email_service"].send_raw_email.side_effect = Exception(
|
||||
"Email service failed"
|
||||
)
|
||||
|
||||
fake = Faker()
|
||||
email_data = self._create_test_email_data(fake)
|
||||
|
||||
# Act: Execute the task
|
||||
send_inner_email_task(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
body=email_data["body"],
|
||||
substitutions=email_data["substitutions"],
|
||||
)
|
||||
|
||||
# Assert: Verify template rendering occurred
|
||||
mock_external_service_dependencies["render_template"].assert_called_once()
|
||||
|
||||
# Verify email service was called (and failed)
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_raw_email.assert_called_once_with(
|
||||
to=email_data["to"],
|
||||
subject=email_data["subject"],
|
||||
html_content="<html>Test email content</html>",
|
||||
)
|
||||
@@ -0,0 +1,544 @@
|
||||
"""
|
||||
Integration tests for mail_invite_member_task using testcontainers.
|
||||
|
||||
This module provides integration tests for the invite member email task
|
||||
using TestContainers infrastructure. The tests ensure that the task properly sends
|
||||
invitation emails with internationalization support, handles error scenarios,
|
||||
and integrates correctly with the database and Redis for token management.
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing scenarios with actual PostgreSQL and Redis instances.
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from extensions.ext_redis import redis_client
|
||||
from libs.email_i18n import EmailType
|
||||
from models.account import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from tasks.mail_invite_member_task import send_invite_member_mail_task
|
||||
|
||||
|
||||
class TestMailInviteMemberTask:
|
||||
"""
|
||||
Integration tests for send_invite_member_mail_task using testcontainers.
|
||||
|
||||
This test class covers the core functionality of the invite member email task:
|
||||
- Email sending with proper internationalization
|
||||
- Template context generation and URL construction
|
||||
- Error handling for failure scenarios
|
||||
- Integration with Redis for token validation
|
||||
- Mail service initialization checks
|
||||
- Real database integration with actual invitation flow
|
||||
|
||||
All tests use the testcontainers infrastructure to ensure proper database isolation
|
||||
and realistic testing environment with actual database and Redis interactions.
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def cleanup_database(self, db_session_with_containers):
|
||||
"""Clean up database before each test to ensure isolation."""
|
||||
# Clear all test data
|
||||
db_session_with_containers.query(TenantAccountJoin).delete()
|
||||
db_session_with_containers.query(Tenant).delete()
|
||||
db_session_with_containers.query(Account).delete()
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Clear Redis cache
|
||||
redis_client.flushdb()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_invite_member_task.mail") as mock_mail,
|
||||
patch("tasks.mail_invite_member_task.get_email_i18n_service") as mock_email_service,
|
||||
patch("tasks.mail_invite_member_task.dify_config") as mock_config,
|
||||
):
|
||||
# Setup mail service mock
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Setup email service mock
|
||||
mock_email_service_instance = MagicMock()
|
||||
mock_email_service_instance.send_email.return_value = None
|
||||
mock_email_service.return_value = mock_email_service_instance
|
||||
|
||||
# Setup config mock
|
||||
mock_config.CONSOLE_WEB_URL = "https://console.dify.ai"
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"email_service": mock_email_service_instance,
|
||||
"config": mock_config,
|
||||
}
|
||||
|
||||
def _create_test_account_and_tenant(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create a test account and tenant for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
tuple: (Account, Tenant) created instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
password=fake.password(),
|
||||
interface_language="en-US",
|
||||
status=AccountStatus.ACTIVE,
|
||||
)
|
||||
account.created_at = datetime.now(UTC)
|
||||
account.updated_at = datetime.now(UTC)
|
||||
db_session_with_containers.add(account)
|
||||
db_session_with_containers.commit()
|
||||
db_session_with_containers.refresh(account)
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
)
|
||||
tenant.created_at = datetime.now(UTC)
|
||||
tenant.updated_at = datetime.now(UTC)
|
||||
db_session_with_containers.add(tenant)
|
||||
db_session_with_containers.commit()
|
||||
db_session_with_containers.refresh(tenant)
|
||||
|
||||
# Create tenant member relationship
|
||||
tenant_join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
)
|
||||
tenant_join.created_at = datetime.now(UTC)
|
||||
db_session_with_containers.add(tenant_join)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
return account, tenant
|
||||
|
||||
def _create_invitation_token(self, tenant, account):
|
||||
"""
|
||||
Helper method to create a valid invitation token in Redis.
|
||||
|
||||
Args:
|
||||
tenant: Tenant instance
|
||||
account: Account instance
|
||||
|
||||
Returns:
|
||||
str: Generated invitation token
|
||||
"""
|
||||
token = str(uuid.uuid4())
|
||||
invitation_data = {
|
||||
"account_id": account.id,
|
||||
"email": account.email,
|
||||
"workspace_id": tenant.id,
|
||||
}
|
||||
cache_key = f"member_invite:token:{token}"
|
||||
redis_client.setex(cache_key, 24 * 60 * 60, json.dumps(invitation_data)) # 24 hours
|
||||
return token
|
||||
|
||||
def _create_pending_account_for_invitation(self, db_session_with_containers, email, tenant):
|
||||
"""
|
||||
Helper method to create a pending account for invitation testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session
|
||||
email: Email address for the account
|
||||
tenant: Tenant instance
|
||||
|
||||
Returns:
|
||||
Account: Created pending account
|
||||
"""
|
||||
account = Account(
|
||||
email=email,
|
||||
name=email.split("@")[0],
|
||||
password="",
|
||||
interface_language="en-US",
|
||||
status=AccountStatus.PENDING,
|
||||
)
|
||||
|
||||
account.created_at = datetime.now(UTC)
|
||||
account.updated_at = datetime.now(UTC)
|
||||
db_session_with_containers.add(account)
|
||||
db_session_with_containers.commit()
|
||||
db_session_with_containers.refresh(account)
|
||||
|
||||
# Create tenant member relationship
|
||||
tenant_join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.NORMAL,
|
||||
)
|
||||
tenant_join.created_at = datetime.now(UTC)
|
||||
db_session_with_containers.add(tenant_join)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
return account
|
||||
|
||||
def test_send_invite_member_mail_success(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Test successful invitation email sending with all parameters.
|
||||
|
||||
This test verifies:
|
||||
- Email service is called with correct parameters
|
||||
- Template context includes all required fields
|
||||
- URL is constructed correctly with token
|
||||
- Performance logging is recorded
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
invitee_email = "test@example.com"
|
||||
language = "en-US"
|
||||
token = self._create_invitation_token(tenant, inviter)
|
||||
inviter_name = inviter.name
|
||||
workspace_name = tenant.name
|
||||
|
||||
# Act: Execute the task
|
||||
send_invite_member_mail_task(
|
||||
language=language,
|
||||
to=invitee_email,
|
||||
token=token,
|
||||
inviter_name=inviter_name,
|
||||
workspace_name=workspace_name,
|
||||
)
|
||||
|
||||
# Assert: Verify email service was called correctly
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_email.assert_called_once()
|
||||
|
||||
# Verify call arguments
|
||||
call_args = mock_email_service.send_email.call_args
|
||||
assert call_args[1]["email_type"] == EmailType.INVITE_MEMBER
|
||||
assert call_args[1]["language_code"] == language
|
||||
assert call_args[1]["to"] == invitee_email
|
||||
|
||||
# Verify template context
|
||||
template_context = call_args[1]["template_context"]
|
||||
assert template_context["to"] == invitee_email
|
||||
assert template_context["inviter_name"] == inviter_name
|
||||
assert template_context["workspace_name"] == workspace_name
|
||||
assert template_context["url"] == f"https://console.dify.ai/activate?token={token}"
|
||||
|
||||
def test_send_invite_member_mail_different_languages(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test invitation email sending with different language codes.
|
||||
|
||||
This test verifies:
|
||||
- Email service handles different language codes correctly
|
||||
- Template context is passed correctly for each language
|
||||
- No language-specific errors occur
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
token = self._create_invitation_token(tenant, inviter)
|
||||
|
||||
test_languages = ["en-US", "zh-CN", "ja-JP", "fr-FR", "de-DE", "es-ES"]
|
||||
|
||||
for language in test_languages:
|
||||
# Act: Execute the task with different language
|
||||
send_invite_member_mail_task(
|
||||
language=language,
|
||||
to="test@example.com",
|
||||
token=token,
|
||||
inviter_name=inviter.name,
|
||||
workspace_name=tenant.name,
|
||||
)
|
||||
|
||||
# Assert: Verify language code was passed correctly
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
call_args = mock_email_service.send_email.call_args
|
||||
assert call_args[1]["language_code"] == language
|
||||
|
||||
def test_send_invite_member_mail_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test behavior when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Task returns early when mail is not initialized
|
||||
- Email service is not called
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Setup mail service as not initialized
|
||||
mock_mail = mock_external_service_dependencies["mail"]
|
||||
mock_mail.is_inited.return_value = False
|
||||
|
||||
# Act: Execute the task
|
||||
result = send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to="test@example.com",
|
||||
token="test-token",
|
||||
inviter_name="Test User",
|
||||
workspace_name="Test Workspace",
|
||||
)
|
||||
|
||||
# Assert: Verify early return
|
||||
assert result is None
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_email.assert_not_called()
|
||||
|
||||
def test_send_invite_member_mail_email_service_exception(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test error handling when email service raises an exception.
|
||||
|
||||
This test verifies:
|
||||
- Exception is caught and logged
|
||||
- Task completes without raising exception
|
||||
- Error logging is performed
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_email.side_effect = Exception("Email service failed")
|
||||
|
||||
# Act & Assert: Execute task and verify exception is handled
|
||||
with patch("tasks.mail_invite_member_task.logger") as mock_logger:
|
||||
send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to="test@example.com",
|
||||
token="test-token",
|
||||
inviter_name="Test User",
|
||||
workspace_name="Test Workspace",
|
||||
)
|
||||
|
||||
# Verify error was logged
|
||||
mock_logger.exception.assert_called_once()
|
||||
error_call = mock_logger.exception.call_args[0][0]
|
||||
assert "Send invite member mail to %s failed" in error_call
|
||||
|
||||
def test_send_invite_member_mail_template_context_validation(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test template context contains all required fields for email rendering.
|
||||
|
||||
This test verifies:
|
||||
- All required template context fields are present
|
||||
- Field values match expected data
|
||||
- URL construction is correct
|
||||
- No missing or None values in context
|
||||
"""
|
||||
# Arrange: Create test data with specific values
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
token = "test-token-123"
|
||||
invitee_email = "invitee@example.com"
|
||||
inviter_name = "John Doe"
|
||||
workspace_name = "Acme Corp"
|
||||
|
||||
# Act: Execute the task
|
||||
send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to=invitee_email,
|
||||
token=token,
|
||||
inviter_name=inviter_name,
|
||||
workspace_name=workspace_name,
|
||||
)
|
||||
|
||||
# Assert: Verify template context
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
call_args = mock_email_service.send_email.call_args
|
||||
template_context = call_args[1]["template_context"]
|
||||
|
||||
# Verify all required fields are present
|
||||
required_fields = ["to", "inviter_name", "workspace_name", "url"]
|
||||
for field in required_fields:
|
||||
assert field in template_context
|
||||
assert template_context[field] is not None
|
||||
assert template_context[field] != ""
|
||||
|
||||
# Verify specific values
|
||||
assert template_context["to"] == invitee_email
|
||||
assert template_context["inviter_name"] == inviter_name
|
||||
assert template_context["workspace_name"] == workspace_name
|
||||
assert template_context["url"] == f"https://console.dify.ai/activate?token={token}"
|
||||
|
||||
def test_send_invite_member_mail_integration_with_redis_token(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test integration with Redis token validation.
|
||||
|
||||
This test verifies:
|
||||
- Task works with real Redis token data
|
||||
- Token validation can be performed after email sending
|
||||
- Redis data integrity is maintained
|
||||
"""
|
||||
# Arrange: Create test data and store token in Redis
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
token = self._create_invitation_token(tenant, inviter)
|
||||
|
||||
# Verify token exists in Redis before sending email
|
||||
cache_key = f"member_invite:token:{token}"
|
||||
assert redis_client.exists(cache_key) == 1
|
||||
|
||||
# Act: Execute the task
|
||||
send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to=inviter.email,
|
||||
token=token,
|
||||
inviter_name=inviter.name,
|
||||
workspace_name=tenant.name,
|
||||
)
|
||||
|
||||
# Assert: Verify token still exists after email sending
|
||||
assert redis_client.exists(cache_key) == 1
|
||||
|
||||
# Verify token data integrity
|
||||
token_data = redis_client.get(cache_key)
|
||||
assert token_data is not None
|
||||
invitation_data = json.loads(token_data)
|
||||
assert invitation_data["account_id"] == inviter.id
|
||||
assert invitation_data["email"] == inviter.email
|
||||
assert invitation_data["workspace_id"] == tenant.id
|
||||
|
||||
def test_send_invite_member_mail_with_special_characters(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test email sending with special characters in names and workspace names.
|
||||
|
||||
This test verifies:
|
||||
- Special characters are handled correctly in template context
|
||||
- Email service receives properly formatted data
|
||||
- No encoding issues occur
|
||||
"""
|
||||
# Arrange: Create test data with special characters
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
token = self._create_invitation_token(tenant, inviter)
|
||||
|
||||
special_cases = [
|
||||
("John O'Connor", "Acme & Co."),
|
||||
("José María", "Café & Restaurant"),
|
||||
("李小明", "北京科技有限公司"),
|
||||
("François & Marie", "L'École Internationale"),
|
||||
("Александр", "ООО Технологии"),
|
||||
("محمد أحمد", "شركة التقنية المتقدمة"),
|
||||
]
|
||||
|
||||
for inviter_name, workspace_name in special_cases:
|
||||
# Act: Execute the task
|
||||
send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to="test@example.com",
|
||||
token=token,
|
||||
inviter_name=inviter_name,
|
||||
workspace_name=workspace_name,
|
||||
)
|
||||
|
||||
# Assert: Verify special characters are preserved
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
call_args = mock_email_service.send_email.call_args
|
||||
template_context = call_args[1]["template_context"]
|
||||
|
||||
assert template_context["inviter_name"] == inviter_name
|
||||
assert template_context["workspace_name"] == workspace_name
|
||||
|
||||
def test_send_invite_member_mail_real_database_integration(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test real database integration with actual invitation flow.
|
||||
|
||||
This test verifies:
|
||||
- Task works with real database entities
|
||||
- Account and tenant relationships are properly maintained
|
||||
- Database state is consistent after email sending
|
||||
- Real invitation data flow is tested
|
||||
"""
|
||||
# Arrange: Create real database entities
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
invitee_email = "newmember@example.com"
|
||||
|
||||
# Create a pending account for invitation (simulating real invitation flow)
|
||||
pending_account = self._create_pending_account_for_invitation(db_session_with_containers, invitee_email, tenant)
|
||||
|
||||
# Create invitation token with real account data
|
||||
token = self._create_invitation_token(tenant, pending_account)
|
||||
|
||||
# Act: Execute the task with real data
|
||||
send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to=invitee_email,
|
||||
token=token,
|
||||
inviter_name=inviter.name,
|
||||
workspace_name=tenant.name,
|
||||
)
|
||||
|
||||
# Assert: Verify email service was called with real data
|
||||
mock_email_service = mock_external_service_dependencies["email_service"]
|
||||
mock_email_service.send_email.assert_called_once()
|
||||
|
||||
# Verify database state is maintained
|
||||
db_session_with_containers.refresh(pending_account)
|
||||
db_session_with_containers.refresh(tenant)
|
||||
|
||||
assert pending_account.status == AccountStatus.PENDING
|
||||
assert pending_account.email == invitee_email
|
||||
assert tenant.name is not None
|
||||
|
||||
# Verify tenant relationship exists
|
||||
tenant_join = (
|
||||
db_session_with_containers.query(TenantAccountJoin)
|
||||
.filter_by(tenant_id=tenant.id, account_id=pending_account.id)
|
||||
.first()
|
||||
)
|
||||
assert tenant_join is not None
|
||||
assert tenant_join.role == TenantAccountRole.NORMAL
|
||||
|
||||
def test_send_invite_member_mail_token_lifecycle_management(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test token lifecycle management and validation.
|
||||
|
||||
This test verifies:
|
||||
- Token is properly stored in Redis with correct TTL
|
||||
- Token data structure is correct
|
||||
- Token can be retrieved and validated after email sending
|
||||
- Token expiration is handled correctly
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
inviter, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
token = self._create_invitation_token(tenant, inviter)
|
||||
|
||||
# Act: Execute the task
|
||||
send_invite_member_mail_task(
|
||||
language="en-US",
|
||||
to=inviter.email,
|
||||
token=token,
|
||||
inviter_name=inviter.name,
|
||||
workspace_name=tenant.name,
|
||||
)
|
||||
|
||||
# Assert: Verify token lifecycle
|
||||
cache_key = f"member_invite:token:{token}"
|
||||
|
||||
# Token should still exist
|
||||
assert redis_client.exists(cache_key) == 1
|
||||
|
||||
# Token should have correct TTL (approximately 24 hours)
|
||||
ttl = redis_client.ttl(cache_key)
|
||||
assert 23 * 60 * 60 <= ttl <= 24 * 60 * 60 # Allow some tolerance
|
||||
|
||||
# Token data should be valid
|
||||
token_data = redis_client.get(cache_key)
|
||||
assert token_data is not None
|
||||
|
||||
invitation_data = json.loads(token_data)
|
||||
assert invitation_data["account_id"] == inviter.id
|
||||
assert invitation_data["email"] == inviter.email
|
||||
assert invitation_data["workspace_id"] == tenant.id
|
||||
@@ -0,0 +1,401 @@
|
||||
"""
|
||||
TestContainers-based integration tests for mail_owner_transfer_task.
|
||||
|
||||
This module provides comprehensive integration tests for the mail owner transfer tasks
|
||||
using TestContainers to ensure real email service integration and proper functionality
|
||||
testing with actual database and service dependencies.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from libs.email_i18n import EmailType
|
||||
from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from tasks.mail_owner_transfer_task import (
|
||||
send_new_owner_transfer_notify_email_task,
|
||||
send_old_owner_transfer_notify_email_task,
|
||||
send_owner_transfer_confirm_task,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TestMailOwnerTransferTask:
|
||||
"""Integration tests for mail owner transfer tasks using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_mail_dependencies(self):
|
||||
"""Mock setup for mail service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_owner_transfer_task.mail") as mock_mail,
|
||||
patch("tasks.mail_owner_transfer_task.get_email_i18n_service") as mock_get_email_service,
|
||||
):
|
||||
# Setup mock mail service
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Setup mock email service
|
||||
mock_email_service = MagicMock()
|
||||
mock_get_email_service.return_value = mock_email_service
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"email_service": mock_email_service,
|
||||
"get_email_service": mock_get_email_service,
|
||||
}
|
||||
|
||||
def _create_test_account_and_tenant(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create test account and tenant for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
tuple: (account, tenant) - Created account and tenant instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db_session_with_containers.add(account)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Create tenant
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db_session_with_containers.add(tenant)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER.value,
|
||||
current=True,
|
||||
)
|
||||
db_session_with_containers.add(join)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
return account, tenant
|
||||
|
||||
def test_send_owner_transfer_confirm_task_success(self, db_session_with_containers, mock_mail_dependencies):
|
||||
"""
|
||||
Test successful owner transfer confirmation email sending.
|
||||
|
||||
This test verifies:
|
||||
- Proper email service initialization check
|
||||
- Correct email service method calls with right parameters
|
||||
- Email template context is properly constructed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = account.email
|
||||
test_code = "123456"
|
||||
test_workspace = tenant.name
|
||||
|
||||
# Act: Execute the task
|
||||
send_owner_transfer_confirm_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
workspace=test_workspace,
|
||||
)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_mail_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_mail_dependencies["get_email_service"].assert_called_once()
|
||||
|
||||
# Verify email service was called with correct parameters
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once()
|
||||
call_args = mock_mail_dependencies["email_service"].send_email.call_args
|
||||
|
||||
assert call_args[1]["email_type"] == EmailType.OWNER_TRANSFER_CONFIRM
|
||||
assert call_args[1]["language_code"] == test_language
|
||||
assert call_args[1]["to"] == test_email
|
||||
assert call_args[1]["template_context"]["to"] == test_email
|
||||
assert call_args[1]["template_context"]["code"] == test_code
|
||||
assert call_args[1]["template_context"]["WorkspaceName"] == test_workspace
|
||||
|
||||
def test_send_owner_transfer_confirm_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test owner transfer confirmation email when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls are made
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Set mail service as not initialized
|
||||
mock_mail_dependencies["mail"].is_inited.return_value = False
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_code = "123456"
|
||||
test_workspace = "Test Workspace"
|
||||
|
||||
# Act: Execute the task
|
||||
send_owner_transfer_confirm_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
workspace=test_workspace,
|
||||
)
|
||||
|
||||
# Assert: Verify no email service calls were made
|
||||
mock_mail_dependencies["get_email_service"].assert_not_called()
|
||||
mock_mail_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_owner_transfer_confirm_task_exception_handling(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test exception handling in owner transfer confirmation email.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions are properly caught and logged
|
||||
- No exceptions are propagated to caller
|
||||
- Email service calls are attempted
|
||||
- Error logging works correctly
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_code = "123456"
|
||||
test_workspace = "Test Workspace"
|
||||
|
||||
# Act & Assert: Verify no exception is raised
|
||||
try:
|
||||
send_owner_transfer_confirm_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
code=test_code,
|
||||
workspace=test_workspace,
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Task should not raise exceptions, but raised: {e}")
|
||||
|
||||
# Verify email service was called despite the exception
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once()
|
||||
|
||||
def test_send_old_owner_transfer_notify_email_task_success(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful old owner transfer notification email sending.
|
||||
|
||||
This test verifies:
|
||||
- Proper email service initialization check
|
||||
- Correct email service method calls with right parameters
|
||||
- Email template context includes new owner email
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = account.email
|
||||
test_workspace = tenant.name
|
||||
test_new_owner_email = "newowner@example.com"
|
||||
|
||||
# Act: Execute the task
|
||||
send_old_owner_transfer_notify_email_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
workspace=test_workspace,
|
||||
new_owner_email=test_new_owner_email,
|
||||
)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_mail_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_mail_dependencies["get_email_service"].assert_called_once()
|
||||
|
||||
# Verify email service was called with correct parameters
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once()
|
||||
call_args = mock_mail_dependencies["email_service"].send_email.call_args
|
||||
|
||||
assert call_args[1]["email_type"] == EmailType.OWNER_TRANSFER_OLD_NOTIFY
|
||||
assert call_args[1]["language_code"] == test_language
|
||||
assert call_args[1]["to"] == test_email
|
||||
assert call_args[1]["template_context"]["to"] == test_email
|
||||
assert call_args[1]["template_context"]["WorkspaceName"] == test_workspace
|
||||
assert call_args[1]["template_context"]["NewOwnerEmail"] == test_new_owner_email
|
||||
|
||||
def test_send_old_owner_transfer_notify_email_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test old owner transfer notification email when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls are made
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Set mail service as not initialized
|
||||
mock_mail_dependencies["mail"].is_inited.return_value = False
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_workspace = "Test Workspace"
|
||||
test_new_owner_email = "newowner@example.com"
|
||||
|
||||
# Act: Execute the task
|
||||
send_old_owner_transfer_notify_email_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
workspace=test_workspace,
|
||||
new_owner_email=test_new_owner_email,
|
||||
)
|
||||
|
||||
# Assert: Verify no email service calls were made
|
||||
mock_mail_dependencies["get_email_service"].assert_not_called()
|
||||
mock_mail_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_old_owner_transfer_notify_email_task_exception_handling(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test exception handling in old owner transfer notification email.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions are properly caught and logged
|
||||
- No exceptions are propagated to caller
|
||||
- Email service calls are attempted
|
||||
- Error logging works correctly
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_workspace = "Test Workspace"
|
||||
test_new_owner_email = "newowner@example.com"
|
||||
|
||||
# Act & Assert: Verify no exception is raised
|
||||
try:
|
||||
send_old_owner_transfer_notify_email_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
workspace=test_workspace,
|
||||
new_owner_email=test_new_owner_email,
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Task should not raise exceptions, but raised: {e}")
|
||||
|
||||
# Verify email service was called despite the exception
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once()
|
||||
|
||||
def test_send_new_owner_transfer_notify_email_task_success(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful new owner transfer notification email sending.
|
||||
|
||||
This test verifies:
|
||||
- Proper email service initialization check
|
||||
- Correct email service method calls with right parameters
|
||||
- Email template context is properly constructed
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant = self._create_test_account_and_tenant(db_session_with_containers)
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = account.email
|
||||
test_workspace = tenant.name
|
||||
|
||||
# Act: Execute the task
|
||||
send_new_owner_transfer_notify_email_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
workspace=test_workspace,
|
||||
)
|
||||
|
||||
# Assert: Verify the expected outcomes
|
||||
mock_mail_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_mail_dependencies["get_email_service"].assert_called_once()
|
||||
|
||||
# Verify email service was called with correct parameters
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once()
|
||||
call_args = mock_mail_dependencies["email_service"].send_email.call_args
|
||||
|
||||
assert call_args[1]["email_type"] == EmailType.OWNER_TRANSFER_NEW_NOTIFY
|
||||
assert call_args[1]["language_code"] == test_language
|
||||
assert call_args[1]["to"] == test_email
|
||||
assert call_args[1]["template_context"]["to"] == test_email
|
||||
assert call_args[1]["template_context"]["WorkspaceName"] == test_workspace
|
||||
|
||||
def test_send_new_owner_transfer_notify_email_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test new owner transfer notification email when mail service is not initialized.
|
||||
|
||||
This test verifies:
|
||||
- Early return when mail service is not initialized
|
||||
- No email service calls are made
|
||||
- No exceptions are raised
|
||||
"""
|
||||
# Arrange: Set mail service as not initialized
|
||||
mock_mail_dependencies["mail"].is_inited.return_value = False
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_workspace = "Test Workspace"
|
||||
|
||||
# Act: Execute the task
|
||||
send_new_owner_transfer_notify_email_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
workspace=test_workspace,
|
||||
)
|
||||
|
||||
# Assert: Verify no email service calls were made
|
||||
mock_mail_dependencies["get_email_service"].assert_not_called()
|
||||
mock_mail_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_new_owner_transfer_notify_email_task_exception_handling(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""
|
||||
Test exception handling in new owner transfer notification email.
|
||||
|
||||
This test verifies:
|
||||
- Exceptions are properly caught and logged
|
||||
- No exceptions are propagated to caller
|
||||
- Email service calls are attempted
|
||||
- Error logging works correctly
|
||||
"""
|
||||
# Arrange: Setup email service to raise exception
|
||||
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
|
||||
|
||||
test_language = "en-US"
|
||||
test_email = "test@example.com"
|
||||
test_workspace = "Test Workspace"
|
||||
|
||||
# Act & Assert: Verify no exception is raised
|
||||
try:
|
||||
send_new_owner_transfer_notify_email_task(
|
||||
language=test_language,
|
||||
to=test_email,
|
||||
workspace=test_workspace,
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Task should not raise exceptions, but raised: {e}")
|
||||
|
||||
# Verify email service was called despite the exception
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once()
|
||||
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
TestContainers-based integration tests for mail_register_task.py
|
||||
|
||||
This module provides integration tests for email registration tasks
|
||||
using TestContainers to ensure real database and service interactions.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from libs.email_i18n import EmailType
|
||||
from tasks.mail_register_task import send_email_register_mail_task, send_email_register_mail_task_when_account_exist
|
||||
|
||||
|
||||
class TestMailRegisterTask:
|
||||
"""Integration tests for mail_register_task using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_mail_dependencies(self):
|
||||
"""Mock setup for mail service dependencies."""
|
||||
with (
|
||||
patch("tasks.mail_register_task.mail") as mock_mail,
|
||||
patch("tasks.mail_register_task.get_email_i18n_service") as mock_get_email_service,
|
||||
):
|
||||
# Setup mock mail service
|
||||
mock_mail.is_inited.return_value = True
|
||||
|
||||
# Setup mock email i18n service
|
||||
mock_email_service = MagicMock()
|
||||
mock_get_email_service.return_value = mock_email_service
|
||||
|
||||
yield {
|
||||
"mail": mock_mail,
|
||||
"email_service": mock_email_service,
|
||||
"get_email_service": mock_get_email_service,
|
||||
}
|
||||
|
||||
def test_send_email_register_mail_task_success(self, db_session_with_containers, mock_mail_dependencies):
|
||||
"""Test successful email registration mail sending."""
|
||||
fake = Faker()
|
||||
language = "en-US"
|
||||
to_email = fake.email()
|
||||
code = fake.numerify("######")
|
||||
|
||||
send_email_register_mail_task(language=language, to=to_email, code=code)
|
||||
|
||||
mock_mail_dependencies["mail"].is_inited.assert_called_once()
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_REGISTER,
|
||||
language_code=language,
|
||||
to=to_email,
|
||||
template_context={
|
||||
"to": to_email,
|
||||
"code": code,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_email_register_mail_task_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""Test email registration task when mail service is not initialized."""
|
||||
mock_mail_dependencies["mail"].is_inited.return_value = False
|
||||
|
||||
send_email_register_mail_task(language="en-US", to="test@example.com", code="123456")
|
||||
|
||||
mock_mail_dependencies["get_email_service"].assert_not_called()
|
||||
mock_mail_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_email_register_mail_task_exception_handling(self, db_session_with_containers, mock_mail_dependencies):
|
||||
"""Test email registration task exception handling."""
|
||||
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
|
||||
|
||||
fake = Faker()
|
||||
to_email = fake.email()
|
||||
code = fake.numerify("######")
|
||||
|
||||
with patch("tasks.mail_register_task.logger") as mock_logger:
|
||||
send_email_register_mail_task(language="en-US", to=to_email, code=code)
|
||||
mock_logger.exception.assert_called_once_with("Send email register mail to %s failed", to_email)
|
||||
|
||||
def test_send_email_register_mail_task_when_account_exist_success(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""Test successful email registration mail sending when account exists."""
|
||||
fake = Faker()
|
||||
language = "en-US"
|
||||
to_email = fake.email()
|
||||
account_name = fake.name()
|
||||
|
||||
with patch("tasks.mail_register_task.dify_config") as mock_config:
|
||||
mock_config.CONSOLE_WEB_URL = "https://console.dify.ai"
|
||||
|
||||
send_email_register_mail_task_when_account_exist(language=language, to=to_email, account_name=account_name)
|
||||
|
||||
mock_mail_dependencies["email_service"].send_email.assert_called_once_with(
|
||||
email_type=EmailType.EMAIL_REGISTER_WHEN_ACCOUNT_EXIST,
|
||||
language_code=language,
|
||||
to=to_email,
|
||||
template_context={
|
||||
"to": to_email,
|
||||
"login_url": "https://console.dify.ai/signin",
|
||||
"reset_password_url": "https://console.dify.ai/reset-password",
|
||||
"account_name": account_name,
|
||||
},
|
||||
)
|
||||
|
||||
def test_send_email_register_mail_task_when_account_exist_mail_not_initialized(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""Test account exist email task when mail service is not initialized."""
|
||||
mock_mail_dependencies["mail"].is_inited.return_value = False
|
||||
|
||||
send_email_register_mail_task_when_account_exist(
|
||||
language="en-US", to="test@example.com", account_name="Test User"
|
||||
)
|
||||
|
||||
mock_mail_dependencies["get_email_service"].assert_not_called()
|
||||
mock_mail_dependencies["email_service"].send_email.assert_not_called()
|
||||
|
||||
def test_send_email_register_mail_task_when_account_exist_exception_handling(
|
||||
self, db_session_with_containers, mock_mail_dependencies
|
||||
):
|
||||
"""Test account exist email task exception handling."""
|
||||
mock_mail_dependencies["email_service"].send_email.side_effect = Exception("Email service error")
|
||||
|
||||
fake = Faker()
|
||||
to_email = fake.email()
|
||||
account_name = fake.name()
|
||||
|
||||
with patch("tasks.mail_register_task.logger") as mock_logger:
|
||||
send_email_register_mail_task_when_account_exist(language="en-US", to=to_email, account_name=account_name)
|
||||
mock_logger.exception.assert_called_once_with("Send email register mail to %s failed", to_email)
|
||||
@@ -0,0 +1,936 @@
|
||||
import json
|
||||
import uuid
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom, RagPipelineGenerateEntity
|
||||
from core.app.entities.rag_pipeline_invoke_entities import RagPipelineInvokeEntity
|
||||
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
||||
from extensions.ext_database import db
|
||||
from models import Account, Tenant, TenantAccountJoin, TenantAccountRole
|
||||
from models.dataset import Pipeline
|
||||
from models.workflow import Workflow
|
||||
from tasks.rag_pipeline.priority_rag_pipeline_run_task import (
|
||||
priority_rag_pipeline_run_task,
|
||||
run_single_rag_pipeline_task,
|
||||
)
|
||||
from tasks.rag_pipeline.rag_pipeline_run_task import rag_pipeline_run_task
|
||||
|
||||
|
||||
class TestRagPipelineRunTasks:
|
||||
"""Integration tests for RAG pipeline run tasks using testcontainers.
|
||||
|
||||
This test class covers:
|
||||
- priority_rag_pipeline_run_task function
|
||||
- rag_pipeline_run_task function
|
||||
- run_single_rag_pipeline_task function
|
||||
- Real Redis-based TenantIsolatedTaskQueue operations
|
||||
- PipelineGenerator._generate method mocking and parameter validation
|
||||
- File operations and cleanup
|
||||
- Error handling and queue management
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_pipeline_generator(self):
|
||||
"""Mock PipelineGenerator._generate method."""
|
||||
with patch("core.app.apps.pipeline.pipeline_generator.PipelineGenerator._generate") as mock_generate:
|
||||
# Mock the _generate method to return a simple response
|
||||
mock_generate.return_value = {"answer": "Test response", "metadata": {"test": "data"}}
|
||||
yield mock_generate
|
||||
|
||||
@pytest.fixture
|
||||
def mock_file_service(self):
|
||||
"""Mock FileService for file operations."""
|
||||
with (
|
||||
patch("services.file_service.FileService.get_file_content") as mock_get_content,
|
||||
patch("services.file_service.FileService.delete_file") as mock_delete_file,
|
||||
):
|
||||
yield {
|
||||
"get_content": mock_get_content,
|
||||
"delete_file": mock_delete_file,
|
||||
}
|
||||
|
||||
def _create_test_pipeline_and_workflow(self, db_session_with_containers):
|
||||
"""
|
||||
Helper method to create test pipeline and workflow for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
|
||||
Returns:
|
||||
tuple: (account, tenant, pipeline, workflow) - Created entities
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create account and tenant
|
||||
account = Account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
status="active",
|
||||
)
|
||||
db.session.add(account)
|
||||
db.session.commit()
|
||||
|
||||
tenant = Tenant(
|
||||
name=fake.company(),
|
||||
status="normal",
|
||||
)
|
||||
db.session.add(tenant)
|
||||
db.session.commit()
|
||||
|
||||
# Create tenant-account join
|
||||
join = TenantAccountJoin(
|
||||
tenant_id=tenant.id,
|
||||
account_id=account.id,
|
||||
role=TenantAccountRole.OWNER,
|
||||
current=True,
|
||||
)
|
||||
db.session.add(join)
|
||||
db.session.commit()
|
||||
|
||||
# Create workflow
|
||||
workflow = Workflow(
|
||||
id=str(uuid.uuid4()),
|
||||
tenant_id=tenant.id,
|
||||
app_id=str(uuid.uuid4()),
|
||||
type="workflow",
|
||||
version="draft",
|
||||
graph="{}",
|
||||
features="{}",
|
||||
marked_name=fake.company(),
|
||||
marked_comment=fake.text(max_nb_chars=100),
|
||||
created_by=account.id,
|
||||
environment_variables=[],
|
||||
conversation_variables=[],
|
||||
rag_pipeline_variables=[],
|
||||
)
|
||||
db.session.add(workflow)
|
||||
db.session.commit()
|
||||
|
||||
# Create pipeline
|
||||
pipeline = Pipeline(
|
||||
tenant_id=tenant.id,
|
||||
workflow_id=workflow.id,
|
||||
name=fake.company(),
|
||||
description=fake.text(max_nb_chars=100),
|
||||
created_by=account.id,
|
||||
)
|
||||
pipeline.id = str(uuid.uuid4())
|
||||
db.session.add(pipeline)
|
||||
db.session.commit()
|
||||
|
||||
# Refresh entities to ensure they're properly loaded
|
||||
db.session.refresh(account)
|
||||
db.session.refresh(tenant)
|
||||
db.session.refresh(workflow)
|
||||
db.session.refresh(pipeline)
|
||||
|
||||
return account, tenant, pipeline, workflow
|
||||
|
||||
def _create_rag_pipeline_invoke_entities(self, account, tenant, pipeline, workflow, count=2):
|
||||
"""
|
||||
Helper method to create RAG pipeline invoke entities for testing.
|
||||
|
||||
Args:
|
||||
account: Account instance
|
||||
tenant: Tenant instance
|
||||
pipeline: Pipeline instance
|
||||
workflow: Workflow instance
|
||||
count: Number of entities to create
|
||||
|
||||
Returns:
|
||||
list: List of RagPipelineInvokeEntity instances
|
||||
"""
|
||||
fake = Faker()
|
||||
entities = []
|
||||
|
||||
for i in range(count):
|
||||
# Create application generate entity
|
||||
app_config = {
|
||||
"app_id": str(uuid.uuid4()),
|
||||
"app_name": fake.company(),
|
||||
"mode": "workflow",
|
||||
"workflow_id": workflow.id,
|
||||
"tenant_id": tenant.id,
|
||||
"app_mode": "workflow",
|
||||
}
|
||||
|
||||
application_generate_entity = {
|
||||
"task_id": str(uuid.uuid4()),
|
||||
"app_config": app_config,
|
||||
"inputs": {"query": f"Test query {i}"},
|
||||
"files": [],
|
||||
"user_id": account.id,
|
||||
"stream": False,
|
||||
"invoke_from": "published",
|
||||
"workflow_execution_id": str(uuid.uuid4()),
|
||||
"pipeline_config": {
|
||||
"app_id": str(uuid.uuid4()),
|
||||
"app_name": fake.company(),
|
||||
"mode": "workflow",
|
||||
"workflow_id": workflow.id,
|
||||
"tenant_id": tenant.id,
|
||||
"app_mode": "workflow",
|
||||
},
|
||||
"datasource_type": "upload_file",
|
||||
"datasource_info": {},
|
||||
"dataset_id": str(uuid.uuid4()),
|
||||
"batch": "test_batch",
|
||||
}
|
||||
|
||||
entity = RagPipelineInvokeEntity(
|
||||
pipeline_id=pipeline.id,
|
||||
application_generate_entity=application_generate_entity,
|
||||
user_id=account.id,
|
||||
tenant_id=tenant.id,
|
||||
workflow_id=workflow.id,
|
||||
streaming=False,
|
||||
workflow_execution_id=str(uuid.uuid4()),
|
||||
workflow_thread_pool_id=str(uuid.uuid4()),
|
||||
)
|
||||
entities.append(entity)
|
||||
|
||||
return entities
|
||||
|
||||
def _create_file_content_for_entities(self, entities):
|
||||
"""
|
||||
Helper method to create file content for RAG pipeline invoke entities.
|
||||
|
||||
Args:
|
||||
entities: List of RagPipelineInvokeEntity instances
|
||||
|
||||
Returns:
|
||||
str: JSON string containing serialized entities
|
||||
"""
|
||||
entities_data = [entity.model_dump() for entity in entities]
|
||||
return json.dumps(entities_data)
|
||||
|
||||
def test_priority_rag_pipeline_run_task_success(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test successful priority RAG pipeline run task execution.
|
||||
|
||||
This test verifies:
|
||||
- Task execution with multiple RAG pipeline invoke entities
|
||||
- File content retrieval and parsing
|
||||
- PipelineGenerator._generate method calls with correct parameters
|
||||
- Thread pool execution
|
||||
- File cleanup after execution
|
||||
- Queue management with no waiting tasks
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=2)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Act: Execute the priority task
|
||||
priority_rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
# Verify file operations
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
|
||||
# Verify PipelineGenerator._generate was called for each entity
|
||||
assert mock_pipeline_generator.call_count == 2
|
||||
|
||||
# Verify call parameters for each entity
|
||||
calls = mock_pipeline_generator.call_args_list
|
||||
for call in calls:
|
||||
call_kwargs = call[1] # Get keyword arguments
|
||||
assert call_kwargs["pipeline"].id == pipeline.id
|
||||
assert call_kwargs["workflow_id"] == workflow.id
|
||||
assert call_kwargs["user"].id == account.id
|
||||
assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED
|
||||
assert call_kwargs["streaming"] == False
|
||||
assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity)
|
||||
|
||||
def test_rag_pipeline_run_task_success(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test successful regular RAG pipeline run task execution.
|
||||
|
||||
This test verifies:
|
||||
- Task execution with multiple RAG pipeline invoke entities
|
||||
- File content retrieval and parsing
|
||||
- PipelineGenerator._generate method calls with correct parameters
|
||||
- Thread pool execution
|
||||
- File cleanup after execution
|
||||
- Queue management with no waiting tasks
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=3)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Act: Execute the regular task
|
||||
rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
# Verify file operations
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
|
||||
# Verify PipelineGenerator._generate was called for each entity
|
||||
assert mock_pipeline_generator.call_count == 3
|
||||
|
||||
# Verify call parameters for each entity
|
||||
calls = mock_pipeline_generator.call_args_list
|
||||
for call in calls:
|
||||
call_kwargs = call[1] # Get keyword arguments
|
||||
assert call_kwargs["pipeline"].id == pipeline.id
|
||||
assert call_kwargs["workflow_id"] == workflow.id
|
||||
assert call_kwargs["user"].id == account.id
|
||||
assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED
|
||||
assert call_kwargs["streaming"] == False
|
||||
assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity)
|
||||
|
||||
def test_priority_rag_pipeline_run_task_with_waiting_tasks(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test priority RAG pipeline run task with waiting tasks in queue using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Core task execution
|
||||
- Real Redis-based tenant queue processing of waiting tasks
|
||||
- Task function calls for waiting tasks
|
||||
- Queue management with multiple tasks using actual Redis operations
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Add waiting tasks to the real Redis queue
|
||||
waiting_file_ids = [str(uuid.uuid4()) for _ in range(2)]
|
||||
queue.push_tasks(waiting_file_ids)
|
||||
|
||||
# Mock the task function calls
|
||||
with patch(
|
||||
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
|
||||
) as mock_delay:
|
||||
# Act: Execute the priority task
|
||||
priority_rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify core processing occurred
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify waiting tasks were processed, pull 1 task a time by default
|
||||
assert mock_delay.call_count == 1
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify queue still has remaining tasks (only 1 was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 1 # 2 original - 1 pulled = 1 remaining
|
||||
|
||||
def test_rag_pipeline_run_task_legacy_compatibility(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test regular RAG pipeline run task with legacy Redis queue format for backward compatibility.
|
||||
|
||||
This test simulates the scenario where:
|
||||
- Old code writes file IDs directly to Redis list using lpush
|
||||
- New worker processes these legacy queue entries
|
||||
- Ensures backward compatibility during deployment transition
|
||||
|
||||
Legacy format: redis_client.lpush(tenant_self_pipeline_task_queue, upload_file.id)
|
||||
New format: TenantIsolatedTaskQueue.push_tasks([file_id])
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Simulate legacy Redis queue format - direct file IDs in Redis list
|
||||
from extensions.ext_redis import redis_client
|
||||
|
||||
# Legacy queue key format (old code)
|
||||
legacy_queue_key = f"tenant_self_pipeline_task_queue:{tenant.id}"
|
||||
legacy_task_key = f"tenant_pipeline_task:{tenant.id}"
|
||||
|
||||
# Add legacy format data to Redis (simulating old code behavior)
|
||||
legacy_file_ids = [str(uuid.uuid4()) for _ in range(3)]
|
||||
for file_id_legacy in legacy_file_ids:
|
||||
redis_client.lpush(legacy_queue_key, file_id_legacy)
|
||||
|
||||
# Set the task key to indicate there are waiting tasks (legacy behavior)
|
||||
redis_client.set(legacy_task_key, 1, ex=60 * 60)
|
||||
|
||||
# Mock the task function calls
|
||||
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
|
||||
# Act: Execute the priority task with new code but legacy queue data
|
||||
rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify core processing occurred
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify waiting tasks were processed, pull 1 task a time by default
|
||||
assert mock_delay.call_count == 1
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify that new code can process legacy queue entries
|
||||
# The new TenantIsolatedTaskQueue should be able to read from the legacy format
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Verify queue still has remaining tasks (only 1 was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 2 # 3 original - 1 pulled = 2 remaining
|
||||
|
||||
# Cleanup: Remove legacy test data
|
||||
redis_client.delete(legacy_queue_key)
|
||||
redis_client.delete(legacy_task_key)
|
||||
|
||||
def test_rag_pipeline_run_task_with_waiting_tasks(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test regular RAG pipeline run task with waiting tasks in queue using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Core task execution
|
||||
- Real Redis-based tenant queue processing of waiting tasks
|
||||
- Task function calls for waiting tasks
|
||||
- Queue management with multiple tasks using actual Redis operations
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Add waiting tasks to the real Redis queue
|
||||
waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)]
|
||||
queue.push_tasks(waiting_file_ids)
|
||||
|
||||
# Mock the task function calls
|
||||
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
|
||||
# Act: Execute the regular task
|
||||
rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify core processing occurred
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify waiting tasks were processed, pull 1 task a time by default
|
||||
assert mock_delay.call_count == 1
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify queue still has remaining tasks (only 1 was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 2 # 3 original - 1 pulled = 2 remaining
|
||||
|
||||
def test_priority_rag_pipeline_run_task_error_handling(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test error handling in priority RAG pipeline run task using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Exception handling during core processing
|
||||
- Tenant queue cleanup even on errors using real Redis
|
||||
- Proper error logging
|
||||
- Function completes without raising exceptions
|
||||
- Queue management continues despite core processing errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Mock PipelineGenerator to raise an exception
|
||||
mock_pipeline_generator.side_effect = Exception("Pipeline generation failed")
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Add waiting task to the real Redis queue
|
||||
waiting_file_id = str(uuid.uuid4())
|
||||
queue.push_tasks([waiting_file_id])
|
||||
|
||||
# Mock the task function calls
|
||||
with patch(
|
||||
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
|
||||
) as mock_delay:
|
||||
# Act: Execute the priority task (should not raise exception)
|
||||
priority_rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify error was handled gracefully
|
||||
# The function should not raise exceptions
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify waiting task was still processed despite core processing error
|
||||
mock_delay.assert_called_once()
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify queue is empty after processing (task was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 0
|
||||
|
||||
def test_rag_pipeline_run_task_error_handling(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test error handling in regular RAG pipeline run task using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Exception handling during core processing
|
||||
- Tenant queue cleanup even on errors using real Redis
|
||||
- Proper error logging
|
||||
- Function completes without raising exceptions
|
||||
- Queue management continues despite core processing errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
|
||||
file_content = self._create_file_content_for_entities(entities)
|
||||
|
||||
# Mock file service
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].return_value = file_content
|
||||
|
||||
# Mock PipelineGenerator to raise an exception
|
||||
mock_pipeline_generator.side_effect = Exception("Pipeline generation failed")
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Add waiting task to the real Redis queue
|
||||
waiting_file_id = str(uuid.uuid4())
|
||||
queue.push_tasks([waiting_file_id])
|
||||
|
||||
# Mock the task function calls
|
||||
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
|
||||
# Act: Execute the regular task (should not raise exception)
|
||||
rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify error was handled gracefully
|
||||
# The function should not raise exceptions
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_file_service["delete_file"].assert_called_once_with(file_id)
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify waiting task was still processed despite core processing error
|
||||
mock_delay.assert_called_once()
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify queue is empty after processing (task was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 0
|
||||
|
||||
def test_priority_rag_pipeline_run_task_tenant_isolation(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test tenant isolation in priority RAG pipeline run task using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Different tenants have isolated queues
|
||||
- Tasks from one tenant don't affect another tenant's queue
|
||||
- Queue operations are properly scoped to tenant
|
||||
"""
|
||||
# Arrange: Create test data for two different tenants
|
||||
account1, tenant1, pipeline1, workflow1 = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
account2, tenant2, pipeline2, workflow2 = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
|
||||
entities1 = self._create_rag_pipeline_invoke_entities(account1, tenant1, pipeline1, workflow1, count=1)
|
||||
entities2 = self._create_rag_pipeline_invoke_entities(account2, tenant2, pipeline2, workflow2, count=1)
|
||||
|
||||
file_content1 = self._create_file_content_for_entities(entities1)
|
||||
file_content2 = self._create_file_content_for_entities(entities2)
|
||||
|
||||
# Mock file service
|
||||
file_id1 = str(uuid.uuid4())
|
||||
file_id2 = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].side_effect = [file_content1, file_content2]
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue1 = TenantIsolatedTaskQueue(tenant1.id, "pipeline")
|
||||
queue2 = TenantIsolatedTaskQueue(tenant2.id, "pipeline")
|
||||
|
||||
# Add waiting tasks to both queues
|
||||
waiting_file_id1 = str(uuid.uuid4())
|
||||
waiting_file_id2 = str(uuid.uuid4())
|
||||
|
||||
queue1.push_tasks([waiting_file_id1])
|
||||
queue2.push_tasks([waiting_file_id2])
|
||||
|
||||
# Mock the task function calls
|
||||
with patch(
|
||||
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
|
||||
) as mock_delay:
|
||||
# Act: Execute the priority task for tenant1 only
|
||||
priority_rag_pipeline_run_task(file_id1, tenant1.id)
|
||||
|
||||
# Assert: Verify core processing occurred for tenant1
|
||||
assert mock_file_service["get_content"].call_count == 1
|
||||
assert mock_file_service["delete_file"].call_count == 1
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify only tenant1's waiting task was processed
|
||||
mock_delay.assert_called_once()
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
|
||||
assert call_kwargs.get("tenant_id") == tenant1.id
|
||||
|
||||
# Verify tenant1's queue is empty
|
||||
remaining_tasks1 = queue1.pull_tasks(count=10)
|
||||
assert len(remaining_tasks1) == 0
|
||||
|
||||
# Verify tenant2's queue still has its task (isolation)
|
||||
remaining_tasks2 = queue2.pull_tasks(count=10)
|
||||
assert len(remaining_tasks2) == 1
|
||||
|
||||
# Verify queue keys are different
|
||||
assert queue1._queue != queue2._queue
|
||||
assert queue1._task_key != queue2._task_key
|
||||
|
||||
def test_rag_pipeline_run_task_tenant_isolation(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test tenant isolation in regular RAG pipeline run task using real Redis.
|
||||
|
||||
This test verifies:
|
||||
- Different tenants have isolated queues
|
||||
- Tasks from one tenant don't affect another tenant's queue
|
||||
- Queue operations are properly scoped to tenant
|
||||
"""
|
||||
# Arrange: Create test data for two different tenants
|
||||
account1, tenant1, pipeline1, workflow1 = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
account2, tenant2, pipeline2, workflow2 = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
|
||||
entities1 = self._create_rag_pipeline_invoke_entities(account1, tenant1, pipeline1, workflow1, count=1)
|
||||
entities2 = self._create_rag_pipeline_invoke_entities(account2, tenant2, pipeline2, workflow2, count=1)
|
||||
|
||||
file_content1 = self._create_file_content_for_entities(entities1)
|
||||
file_content2 = self._create_file_content_for_entities(entities2)
|
||||
|
||||
# Mock file service
|
||||
file_id1 = str(uuid.uuid4())
|
||||
file_id2 = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].side_effect = [file_content1, file_content2]
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue1 = TenantIsolatedTaskQueue(tenant1.id, "pipeline")
|
||||
queue2 = TenantIsolatedTaskQueue(tenant2.id, "pipeline")
|
||||
|
||||
# Add waiting tasks to both queues
|
||||
waiting_file_id1 = str(uuid.uuid4())
|
||||
waiting_file_id2 = str(uuid.uuid4())
|
||||
|
||||
queue1.push_tasks([waiting_file_id1])
|
||||
queue2.push_tasks([waiting_file_id2])
|
||||
|
||||
# Mock the task function calls
|
||||
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
|
||||
# Act: Execute the regular task for tenant1 only
|
||||
rag_pipeline_run_task(file_id1, tenant1.id)
|
||||
|
||||
# Assert: Verify core processing occurred for tenant1
|
||||
assert mock_file_service["get_content"].call_count == 1
|
||||
assert mock_file_service["delete_file"].call_count == 1
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify only tenant1's waiting task was processed
|
||||
mock_delay.assert_called_once()
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
|
||||
assert call_kwargs.get("tenant_id") == tenant1.id
|
||||
|
||||
# Verify tenant1's queue is empty
|
||||
remaining_tasks1 = queue1.pull_tasks(count=10)
|
||||
assert len(remaining_tasks1) == 0
|
||||
|
||||
# Verify tenant2's queue still has its task (isolation)
|
||||
remaining_tasks2 = queue2.pull_tasks(count=10)
|
||||
assert len(remaining_tasks2) == 1
|
||||
|
||||
# Verify queue keys are different
|
||||
assert queue1._queue != queue2._queue
|
||||
assert queue1._task_key != queue2._task_key
|
||||
|
||||
def test_run_single_rag_pipeline_task_success(
|
||||
self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers
|
||||
):
|
||||
"""
|
||||
Test successful run_single_rag_pipeline_task execution.
|
||||
|
||||
This test verifies:
|
||||
- Single RAG pipeline task execution within Flask app context
|
||||
- Entity validation and database queries
|
||||
- PipelineGenerator._generate method call with correct parameters
|
||||
- Proper Flask context handling
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
entities = self._create_rag_pipeline_invoke_entities(account, tenant, pipeline, workflow, count=1)
|
||||
entity_data = entities[0].model_dump()
|
||||
|
||||
# Act: Execute the single task
|
||||
with flask_app_with_containers.app_context():
|
||||
run_single_rag_pipeline_task(entity_data, flask_app_with_containers)
|
||||
|
||||
# Assert: Verify expected outcomes
|
||||
# Verify PipelineGenerator._generate was called
|
||||
assert mock_pipeline_generator.call_count == 1
|
||||
|
||||
# Verify call parameters
|
||||
call = mock_pipeline_generator.call_args
|
||||
call_kwargs = call[1] # Get keyword arguments
|
||||
assert call_kwargs["pipeline"].id == pipeline.id
|
||||
assert call_kwargs["workflow_id"] == workflow.id
|
||||
assert call_kwargs["user"].id == account.id
|
||||
assert call_kwargs["invoke_from"] == InvokeFrom.PUBLISHED
|
||||
assert call_kwargs["streaming"] == False
|
||||
assert isinstance(call_kwargs["application_generate_entity"], RagPipelineGenerateEntity)
|
||||
|
||||
def test_run_single_rag_pipeline_task_entity_validation_error(
|
||||
self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers
|
||||
):
|
||||
"""
|
||||
Test run_single_rag_pipeline_task with invalid entity data.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for invalid entity data
|
||||
- Exception logging
|
||||
- Function raises ValueError for missing entities
|
||||
"""
|
||||
# Arrange: Create entity data with valid UUIDs but non-existent entities
|
||||
fake = Faker()
|
||||
invalid_entity_data = {
|
||||
"pipeline_id": str(uuid.uuid4()),
|
||||
"application_generate_entity": {
|
||||
"app_config": {
|
||||
"app_id": str(uuid.uuid4()),
|
||||
"app_name": "Test App",
|
||||
"mode": "workflow",
|
||||
"workflow_id": str(uuid.uuid4()),
|
||||
},
|
||||
"inputs": {"query": "Test query"},
|
||||
"query": "Test query",
|
||||
"response_mode": "blocking",
|
||||
"user": str(uuid.uuid4()),
|
||||
"files": [],
|
||||
"conversation_id": str(uuid.uuid4()),
|
||||
},
|
||||
"user_id": str(uuid.uuid4()),
|
||||
"tenant_id": str(uuid.uuid4()),
|
||||
"workflow_id": str(uuid.uuid4()),
|
||||
"streaming": False,
|
||||
"workflow_execution_id": str(uuid.uuid4()),
|
||||
"workflow_thread_pool_id": str(uuid.uuid4()),
|
||||
}
|
||||
|
||||
# Act & Assert: Execute the single task with non-existent entities (should raise ValueError)
|
||||
with flask_app_with_containers.app_context():
|
||||
with pytest.raises(ValueError, match="Account .* not found"):
|
||||
run_single_rag_pipeline_task(invalid_entity_data, flask_app_with_containers)
|
||||
|
||||
# Assert: Pipeline generator should not be called
|
||||
mock_pipeline_generator.assert_not_called()
|
||||
|
||||
def test_run_single_rag_pipeline_task_database_entity_not_found(
|
||||
self, db_session_with_containers, mock_pipeline_generator, flask_app_with_containers
|
||||
):
|
||||
"""
|
||||
Test run_single_rag_pipeline_task with non-existent database entities.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing database entities
|
||||
- Exception logging
|
||||
- Function raises ValueError for missing entities
|
||||
"""
|
||||
# Arrange: Create test data with non-existent IDs
|
||||
fake = Faker()
|
||||
entity_data = {
|
||||
"pipeline_id": str(uuid.uuid4()),
|
||||
"application_generate_entity": {
|
||||
"app_config": {
|
||||
"app_id": str(uuid.uuid4()),
|
||||
"app_name": "Test App",
|
||||
"mode": "workflow",
|
||||
"workflow_id": str(uuid.uuid4()),
|
||||
},
|
||||
"inputs": {"query": "Test query"},
|
||||
"query": "Test query",
|
||||
"response_mode": "blocking",
|
||||
"user": str(uuid.uuid4()),
|
||||
"files": [],
|
||||
"conversation_id": str(uuid.uuid4()),
|
||||
},
|
||||
"user_id": str(uuid.uuid4()),
|
||||
"tenant_id": str(uuid.uuid4()),
|
||||
"workflow_id": str(uuid.uuid4()),
|
||||
"streaming": False,
|
||||
"workflow_execution_id": str(uuid.uuid4()),
|
||||
"workflow_thread_pool_id": str(uuid.uuid4()),
|
||||
}
|
||||
|
||||
# Act & Assert: Execute the single task with non-existent entities (should raise ValueError)
|
||||
with flask_app_with_containers.app_context():
|
||||
with pytest.raises(ValueError, match="Account .* not found"):
|
||||
run_single_rag_pipeline_task(entity_data, flask_app_with_containers)
|
||||
|
||||
# Assert: Pipeline generator should not be called
|
||||
mock_pipeline_generator.assert_not_called()
|
||||
|
||||
def test_priority_rag_pipeline_run_task_file_not_found(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test priority RAG pipeline run task with non-existent file.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing files
|
||||
- Exception logging
|
||||
- Function raises Exception for file errors
|
||||
- Queue management continues despite file errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
|
||||
# Mock file service to raise exception
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].side_effect = Exception("File not found")
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Add waiting task to the real Redis queue
|
||||
waiting_file_id = str(uuid.uuid4())
|
||||
queue.push_tasks([waiting_file_id])
|
||||
|
||||
# Mock the task function calls
|
||||
with patch(
|
||||
"tasks.rag_pipeline.priority_rag_pipeline_run_task.priority_rag_pipeline_run_task.delay"
|
||||
) as mock_delay:
|
||||
# Act & Assert: Execute the priority task (should raise Exception)
|
||||
with pytest.raises(Exception, match="File not found"):
|
||||
priority_rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify error was handled gracefully
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_pipeline_generator.assert_not_called()
|
||||
|
||||
# Verify waiting task was still processed despite file error
|
||||
mock_delay.assert_called_once()
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify queue is empty after processing (task was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 0
|
||||
|
||||
def test_rag_pipeline_run_task_file_not_found(
|
||||
self, db_session_with_containers, mock_pipeline_generator, mock_file_service
|
||||
):
|
||||
"""
|
||||
Test regular RAG pipeline run task with non-existent file.
|
||||
|
||||
This test verifies:
|
||||
- Proper error handling for missing files
|
||||
- Exception logging
|
||||
- Function raises Exception for file errors
|
||||
- Queue management continues despite file errors
|
||||
"""
|
||||
# Arrange: Create test data
|
||||
account, tenant, pipeline, workflow = self._create_test_pipeline_and_workflow(db_session_with_containers)
|
||||
|
||||
# Mock file service to raise exception
|
||||
file_id = str(uuid.uuid4())
|
||||
mock_file_service["get_content"].side_effect = Exception("File not found")
|
||||
|
||||
# Use real Redis for TenantIsolatedTaskQueue
|
||||
queue = TenantIsolatedTaskQueue(tenant.id, "pipeline")
|
||||
|
||||
# Add waiting task to the real Redis queue
|
||||
waiting_file_id = str(uuid.uuid4())
|
||||
queue.push_tasks([waiting_file_id])
|
||||
|
||||
# Mock the task function calls
|
||||
with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
|
||||
# Act & Assert: Execute the regular task (should raise Exception)
|
||||
with pytest.raises(Exception, match="File not found"):
|
||||
rag_pipeline_run_task(file_id, tenant.id)
|
||||
|
||||
# Assert: Verify error was handled gracefully
|
||||
mock_file_service["get_content"].assert_called_once_with(file_id)
|
||||
mock_pipeline_generator.assert_not_called()
|
||||
|
||||
# Verify waiting task was still processed despite file error
|
||||
mock_delay.assert_called_once()
|
||||
|
||||
# Verify correct parameters for the call
|
||||
call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
|
||||
assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
|
||||
assert call_kwargs.get("tenant_id") == tenant.id
|
||||
|
||||
# Verify queue is empty after processing (task was pulled)
|
||||
remaining_tasks = queue.pull_tasks(count=10)
|
||||
assert len(remaining_tasks) == 0
|
||||
Reference in New Issue
Block a user