106 lines
3.5 KiB
Python
106 lines
3.5 KiB
Python
"""CsvParser 单元测试"""
|
|
|
|
import pytest
|
|
|
|
from exceptions import ParseError
|
|
from parsers.csv_parser import CsvParser
|
|
|
|
|
|
@pytest.fixture
|
|
def parser():
|
|
return CsvParser()
|
|
|
|
|
|
class TestSupportedExtensions:
|
|
def test_supports_csv(self, parser):
|
|
assert ".csv" in parser.supported_extensions()
|
|
|
|
def test_only_one_extension(self, parser):
|
|
assert len(parser.supported_extensions()) == 1
|
|
|
|
|
|
class TestParse:
|
|
def test_basic_csv(self, parser, tmp_path):
|
|
f = tmp_path / "basic.csv"
|
|
f.write_text("name,age,city\nAlice,30,Beijing\nBob,25,Shanghai\n", encoding="utf-8")
|
|
result = parser.parse(str(f))
|
|
assert "| name | age | city |" in result
|
|
assert "| --- | --- | --- |" in result
|
|
assert "| Alice | 30 | Beijing |" in result
|
|
assert "| Bob | 25 | Shanghai |" in result
|
|
|
|
def test_empty_file(self, parser, tmp_path):
|
|
f = tmp_path / "empty.csv"
|
|
f.write_bytes(b"")
|
|
assert parser.parse(str(f)) == ""
|
|
|
|
def test_header_only(self, parser, tmp_path):
|
|
f = tmp_path / "header.csv"
|
|
f.write_text("col1,col2,col3\n", encoding="utf-8")
|
|
result = parser.parse(str(f))
|
|
assert "| col1 | col2 | col3 |" in result
|
|
assert "| --- | --- | --- |" in result
|
|
lines = result.strip().split("\n")
|
|
assert len(lines) == 2
|
|
|
|
def test_pipe_char_escaped(self, parser, tmp_path):
|
|
f = tmp_path / "pipe.csv"
|
|
f.write_text('header\n"a|b"\n', encoding="utf-8")
|
|
result = parser.parse(str(f))
|
|
assert "|" in result
|
|
assert "a|b" in result
|
|
|
|
def test_newline_in_cell(self, parser, tmp_path):
|
|
f = tmp_path / "newline.csv"
|
|
f.write_text('header\n"line1\nline2"\n', encoding="utf-8")
|
|
result = parser.parse(str(f))
|
|
assert "<br>" in result
|
|
assert "line1<br>line2" in result
|
|
|
|
def test_gbk_encoded_csv(self, parser, tmp_path):
|
|
f = tmp_path / "gbk.csv"
|
|
content = "姓名,年龄,城市\n张三,28,北京\n李四,32,上海\n"
|
|
f.write_bytes(content.encode("gbk"))
|
|
result = parser.parse(str(f))
|
|
assert "张三" in result
|
|
assert "北京" in result
|
|
|
|
def test_nonexistent_file_raises(self, parser):
|
|
with pytest.raises(ParseError) as exc_info:
|
|
parser.parse("/nonexistent/path/data.csv")
|
|
assert "data.csv" in exc_info.value.file_name
|
|
assert exc_info.value.reason != ""
|
|
|
|
def test_short_row_padded(self, parser, tmp_path):
|
|
"""Rows shorter than header should be padded with empty cells."""
|
|
f = tmp_path / "short.csv"
|
|
f.write_text("a,b,c\n1\n", encoding="utf-8")
|
|
result = parser.parse(str(f))
|
|
assert "| 1 | | |" in result
|
|
|
|
def test_result_ends_with_newline(self, parser, tmp_path):
|
|
f = tmp_path / "trail.csv"
|
|
f.write_text("h1,h2\nv1,v2\n", encoding="utf-8")
|
|
result = parser.parse(str(f))
|
|
assert result.endswith("\n")
|
|
|
|
|
|
class TestEscapeCell:
|
|
def test_no_special_chars(self):
|
|
assert CsvParser._escape_cell("hello") == "hello"
|
|
|
|
def test_pipe_escaped(self):
|
|
assert CsvParser._escape_cell("a|b") == "a|b"
|
|
|
|
def test_newline_escaped(self):
|
|
assert CsvParser._escape_cell("a\nb") == "a<br>b"
|
|
|
|
def test_crlf_escaped(self):
|
|
assert CsvParser._escape_cell("a\r\nb") == "a<br>b"
|
|
|
|
def test_cr_escaped(self):
|
|
assert CsvParser._escape_cell("a\rb") == "a<br>b"
|
|
|
|
def test_combined_escapes(self):
|
|
assert CsvParser._escape_cell("a|b\nc") == "a|b<br>c"
|