83 lines
2.7 KiB
Python
83 lines
2.7 KiB
Python
|
|
"""BaseParser 和 ParserRegistry 单元测试"""
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
from typing import List
|
||
|
|
|
||
|
|
from exceptions import UnsupportedFormatError
|
||
|
|
from parsers.base import BaseParser, ParserRegistry
|
||
|
|
|
||
|
|
|
||
|
|
class StubParser(BaseParser):
|
||
|
|
"""用于测试的具体解析器实现"""
|
||
|
|
|
||
|
|
def __init__(self, extensions: List[str]):
|
||
|
|
self._extensions = extensions
|
||
|
|
|
||
|
|
def supported_extensions(self) -> List[str]:
|
||
|
|
return self._extensions
|
||
|
|
|
||
|
|
def parse(self, file_path: str) -> str:
|
||
|
|
return f"parsed: {file_path}"
|
||
|
|
|
||
|
|
|
||
|
|
class TestBaseParser:
|
||
|
|
def test_cannot_instantiate_directly(self):
|
||
|
|
with pytest.raises(TypeError):
|
||
|
|
BaseParser()
|
||
|
|
|
||
|
|
def test_concrete_subclass_works(self):
|
||
|
|
parser = StubParser([".txt"])
|
||
|
|
assert parser.supported_extensions() == [".txt"]
|
||
|
|
assert parser.parse("test.txt") == "parsed: test.txt"
|
||
|
|
|
||
|
|
|
||
|
|
class TestParserRegistry:
|
||
|
|
def test_empty_registry_raises(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
with pytest.raises(UnsupportedFormatError):
|
||
|
|
registry.get_parser("file.pdf")
|
||
|
|
|
||
|
|
def test_register_and_get_parser(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
pdf_parser = StubParser([".pdf"])
|
||
|
|
registry.register(pdf_parser)
|
||
|
|
assert registry.get_parser("document.pdf") is pdf_parser
|
||
|
|
|
||
|
|
def test_multiple_parsers(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
pdf_parser = StubParser([".pdf"])
|
||
|
|
txt_parser = StubParser([".txt", ".md"])
|
||
|
|
registry.register(pdf_parser)
|
||
|
|
registry.register(txt_parser)
|
||
|
|
|
||
|
|
assert registry.get_parser("doc.pdf") is pdf_parser
|
||
|
|
assert registry.get_parser("readme.txt") is txt_parser
|
||
|
|
assert registry.get_parser("notes.md") is txt_parser
|
||
|
|
|
||
|
|
def test_unsupported_format_error_details(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
registry.register(StubParser([".pdf"]))
|
||
|
|
with pytest.raises(UnsupportedFormatError) as exc_info:
|
||
|
|
registry.get_parser("file.xyz")
|
||
|
|
assert exc_info.value.extension == ".xyz"
|
||
|
|
assert exc_info.value.file_name == "file.xyz"
|
||
|
|
|
||
|
|
def test_case_insensitive_extension(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
registry.register(StubParser([".pdf"]))
|
||
|
|
assert registry.get_parser("DOC.PDF") is not None
|
||
|
|
|
||
|
|
def test_file_path_with_directory(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
parser = StubParser([".csv"])
|
||
|
|
registry.register(parser)
|
||
|
|
assert registry.get_parser("/home/user/data/report.csv") is parser
|
||
|
|
|
||
|
|
def test_first_matching_parser_wins(self):
|
||
|
|
registry = ParserRegistry()
|
||
|
|
first = StubParser([".txt"])
|
||
|
|
second = StubParser([".txt"])
|
||
|
|
registry.register(first)
|
||
|
|
registry.register(second)
|
||
|
|
assert registry.get_parser("file.txt") is first
|