Initial commit: AI 知识库文档智能分块工具
This commit is contained in:
105
tests/test_csv_parser.py
Normal file
105
tests/test_csv_parser.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""CsvParser 单元测试"""
|
||||
|
||||
import pytest
|
||||
|
||||
from exceptions import ParseError
|
||||
from parsers.csv_parser import CsvParser
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def parser():
|
||||
return CsvParser()
|
||||
|
||||
|
||||
class TestSupportedExtensions:
|
||||
def test_supports_csv(self, parser):
|
||||
assert ".csv" in parser.supported_extensions()
|
||||
|
||||
def test_only_one_extension(self, parser):
|
||||
assert len(parser.supported_extensions()) == 1
|
||||
|
||||
|
||||
class TestParse:
|
||||
def test_basic_csv(self, parser, tmp_path):
|
||||
f = tmp_path / "basic.csv"
|
||||
f.write_text("name,age,city\nAlice,30,Beijing\nBob,25,Shanghai\n", encoding="utf-8")
|
||||
result = parser.parse(str(f))
|
||||
assert "| name | age | city |" in result
|
||||
assert "| --- | --- | --- |" in result
|
||||
assert "| Alice | 30 | Beijing |" in result
|
||||
assert "| Bob | 25 | Shanghai |" in result
|
||||
|
||||
def test_empty_file(self, parser, tmp_path):
|
||||
f = tmp_path / "empty.csv"
|
||||
f.write_bytes(b"")
|
||||
assert parser.parse(str(f)) == ""
|
||||
|
||||
def test_header_only(self, parser, tmp_path):
|
||||
f = tmp_path / "header.csv"
|
||||
f.write_text("col1,col2,col3\n", encoding="utf-8")
|
||||
result = parser.parse(str(f))
|
||||
assert "| col1 | col2 | col3 |" in result
|
||||
assert "| --- | --- | --- |" in result
|
||||
lines = result.strip().split("\n")
|
||||
assert len(lines) == 2
|
||||
|
||||
def test_pipe_char_escaped(self, parser, tmp_path):
|
||||
f = tmp_path / "pipe.csv"
|
||||
f.write_text('header\n"a|b"\n', encoding="utf-8")
|
||||
result = parser.parse(str(f))
|
||||
assert "|" in result
|
||||
assert "a|b" in result
|
||||
|
||||
def test_newline_in_cell(self, parser, tmp_path):
|
||||
f = tmp_path / "newline.csv"
|
||||
f.write_text('header\n"line1\nline2"\n', encoding="utf-8")
|
||||
result = parser.parse(str(f))
|
||||
assert "<br>" in result
|
||||
assert "line1<br>line2" in result
|
||||
|
||||
def test_gbk_encoded_csv(self, parser, tmp_path):
|
||||
f = tmp_path / "gbk.csv"
|
||||
content = "姓名,年龄,城市\n张三,28,北京\n李四,32,上海\n"
|
||||
f.write_bytes(content.encode("gbk"))
|
||||
result = parser.parse(str(f))
|
||||
assert "张三" in result
|
||||
assert "北京" in result
|
||||
|
||||
def test_nonexistent_file_raises(self, parser):
|
||||
with pytest.raises(ParseError) as exc_info:
|
||||
parser.parse("/nonexistent/path/data.csv")
|
||||
assert "data.csv" in exc_info.value.file_name
|
||||
assert exc_info.value.reason != ""
|
||||
|
||||
def test_short_row_padded(self, parser, tmp_path):
|
||||
"""Rows shorter than header should be padded with empty cells."""
|
||||
f = tmp_path / "short.csv"
|
||||
f.write_text("a,b,c\n1\n", encoding="utf-8")
|
||||
result = parser.parse(str(f))
|
||||
assert "| 1 | | |" in result
|
||||
|
||||
def test_result_ends_with_newline(self, parser, tmp_path):
|
||||
f = tmp_path / "trail.csv"
|
||||
f.write_text("h1,h2\nv1,v2\n", encoding="utf-8")
|
||||
result = parser.parse(str(f))
|
||||
assert result.endswith("\n")
|
||||
|
||||
|
||||
class TestEscapeCell:
|
||||
def test_no_special_chars(self):
|
||||
assert CsvParser._escape_cell("hello") == "hello"
|
||||
|
||||
def test_pipe_escaped(self):
|
||||
assert CsvParser._escape_cell("a|b") == "a|b"
|
||||
|
||||
def test_newline_escaped(self):
|
||||
assert CsvParser._escape_cell("a\nb") == "a<br>b"
|
||||
|
||||
def test_crlf_escaped(self):
|
||||
assert CsvParser._escape_cell("a\r\nb") == "a<br>b"
|
||||
|
||||
def test_cr_escaped(self):
|
||||
assert CsvParser._escape_cell("a\rb") == "a<br>b"
|
||||
|
||||
def test_combined_escapes(self):
|
||||
assert CsvParser._escape_cell("a|b\nc") == "a|b<br>c"
|
||||
Reference in New Issue
Block a user