Initial commit: AI 知识库文档智能分块工具
This commit is contained in:
135
tests/test_image_parser.py
Normal file
135
tests/test_image_parser.py
Normal file
@@ -0,0 +1,135 @@
|
||||
"""ImageParser 单元测试"""
|
||||
|
||||
import base64
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from exceptions import ApiError, ParseError
|
||||
from parsers.image_parser import ImageParser, DEFAULT_VISION_PROMPT
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_api_client():
|
||||
return MagicMock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def parser(mock_api_client):
|
||||
return ImageParser(mock_api_client)
|
||||
|
||||
|
||||
class TestSupportedExtensions:
|
||||
def test_supports_png(self, parser):
|
||||
assert ".png" in parser.supported_extensions()
|
||||
|
||||
def test_supports_jpg(self, parser):
|
||||
assert ".jpg" in parser.supported_extensions()
|
||||
|
||||
def test_supports_jpeg(self, parser):
|
||||
assert ".jpeg" in parser.supported_extensions()
|
||||
|
||||
def test_supports_bmp(self, parser):
|
||||
assert ".bmp" in parser.supported_extensions()
|
||||
|
||||
def test_supports_gif(self, parser):
|
||||
assert ".gif" in parser.supported_extensions()
|
||||
|
||||
def test_supports_webp(self, parser):
|
||||
assert ".webp" in parser.supported_extensions()
|
||||
|
||||
def test_has_six_extensions(self, parser):
|
||||
assert len(parser.supported_extensions()) == 6
|
||||
|
||||
|
||||
class TestParse:
|
||||
def test_successful_parse(self, mock_api_client, tmp_path):
|
||||
"""成功解析图片文件,返回 Vision API 的文本描述"""
|
||||
img = tmp_path / "photo.png"
|
||||
img.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 20)
|
||||
|
||||
mock_api_client.vision.return_value = "图片中包含一段中文文字"
|
||||
parser = ImageParser(mock_api_client)
|
||||
|
||||
result = parser.parse(str(img))
|
||||
|
||||
assert result == "图片中包含一段中文文字"
|
||||
mock_api_client.vision.assert_called_once()
|
||||
|
||||
def test_base64_encoding_correctness(self, mock_api_client, tmp_path):
|
||||
"""验证传递给 API 的 base64 编码与文件内容一致"""
|
||||
raw_bytes = b"\x89PNG\r\n\x1a\nSOME_IMAGE_DATA"
|
||||
img = tmp_path / "check.png"
|
||||
img.write_bytes(raw_bytes)
|
||||
|
||||
mock_api_client.vision.return_value = "ok"
|
||||
parser = ImageParser(mock_api_client)
|
||||
parser.parse(str(img))
|
||||
|
||||
call_args = mock_api_client.vision.call_args
|
||||
sent_base64 = call_args.kwargs.get("image_base64") or call_args[1].get("image_base64") or call_args[0][1]
|
||||
assert base64.b64decode(sent_base64) == raw_bytes
|
||||
|
||||
def test_system_prompt_passed_to_api(self, mock_api_client, tmp_path):
|
||||
"""验证使用了正确的系统提示词,且包含文件名上下文"""
|
||||
img = tmp_path / "prompt.png"
|
||||
img.write_bytes(b"\x00")
|
||||
|
||||
mock_api_client.vision.return_value = "text"
|
||||
parser = ImageParser(mock_api_client)
|
||||
parser.parse(str(img))
|
||||
|
||||
call_args = mock_api_client.vision.call_args
|
||||
sent_prompt = call_args.kwargs.get("system_prompt") or call_args[0][0]
|
||||
assert DEFAULT_VISION_PROMPT in sent_prompt
|
||||
assert "prompt" in sent_prompt
|
||||
|
||||
def test_file_not_found_raises_parse_error(self, parser):
|
||||
"""文件不存在时抛出 ParseError"""
|
||||
with pytest.raises(ParseError) as exc_info:
|
||||
parser.parse("/nonexistent/path/missing.png")
|
||||
assert exc_info.value.file_name == "missing.png"
|
||||
assert "文件读取失败" in exc_info.value.reason
|
||||
|
||||
def test_unreadable_file_raises_parse_error(self, mock_api_client, tmp_path):
|
||||
"""文件无法读取时抛出 ParseError(使用目录路径模拟不可读文件)"""
|
||||
dir_path = tmp_path / "fakefile.jpg"
|
||||
dir_path.mkdir()
|
||||
|
||||
parser = ImageParser(mock_api_client)
|
||||
with pytest.raises(ParseError) as exc_info:
|
||||
parser.parse(str(dir_path))
|
||||
assert exc_info.value.file_name == "fakefile.jpg"
|
||||
assert "文件读取失败" in exc_info.value.reason
|
||||
|
||||
def test_api_error_raises_parse_error(self, mock_api_client, tmp_path):
|
||||
"""API 调用失败时抛出 ParseError"""
|
||||
img = tmp_path / "api_fail.png"
|
||||
img.write_bytes(b"\x89PNG")
|
||||
|
||||
mock_api_client.vision.side_effect = ApiError("服务不可用", status_code=503)
|
||||
parser = ImageParser(mock_api_client)
|
||||
|
||||
with pytest.raises(ParseError) as exc_info:
|
||||
parser.parse(str(img))
|
||||
assert exc_info.value.file_name == "api_fail.png"
|
||||
assert "Vision API 调用失败" in exc_info.value.reason
|
||||
|
||||
def test_api_rate_limit_error_raises_parse_error(self, mock_api_client, tmp_path):
|
||||
"""API 速率限制错误(经重试耗尽后)也被包装为 ParseError"""
|
||||
img = tmp_path / "rate.png"
|
||||
img.write_bytes(b"\x89PNG")
|
||||
|
||||
mock_api_client.vision.side_effect = ApiError("速率限制重试耗尽", status_code=429)
|
||||
parser = ImageParser(mock_api_client)
|
||||
|
||||
with pytest.raises(ParseError) as exc_info:
|
||||
parser.parse(str(img))
|
||||
assert "Vision API 调用失败" in exc_info.value.reason
|
||||
|
||||
def test_parse_error_contains_filename_for_missing_file(self, parser):
|
||||
"""ParseError 包含正确的文件名"""
|
||||
with pytest.raises(ParseError) as exc_info:
|
||||
parser.parse("/tmp/does_not_exist/myimage.jpeg")
|
||||
assert exc_info.value.file_name == "myimage.jpeg"
|
||||
assert exc_info.value.reason != ""
|
||||
Reference in New Issue
Block a user