Files
bigwo/tests/test_xls_parser.py
2026-03-02 17:38:28 +08:00

179 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""XlsParser 单元测试"""
import pytest
import xlwt
from exceptions import ParseError
from parsers.xls_parser import XlsParser
@pytest.fixture
def parser():
return XlsParser()
def _create_xls(path, sheets=None):
"""
创建测试用 XLS 文件。
Args:
path: 输出文件路径
sheets: dictkey 为 sheet 名称value 为二维列表(行×列的数据)
如果为 None创建空工作簿
"""
wb = xlwt.Workbook()
if sheets:
for sheet_name, rows in sheets.items():
ws = wb.add_sheet(sheet_name)
for row_idx, row in enumerate(rows):
for col_idx, value in enumerate(row):
ws.write(row_idx, col_idx, value)
else:
# xlwt 需要至少一个 sheet
wb.add_sheet("Sheet1")
wb.save(str(path))
class TestSupportedExtensions:
def test_supports_xls(self, parser):
assert ".xls" in parser.supported_extensions()
def test_only_one_extension(self, parser):
assert len(parser.supported_extensions()) == 1
class TestParse:
def test_simple_table(self, parser, tmp_path):
"""基本表格转换为 Markdown"""
xls_path = tmp_path / "simple.xls"
_create_xls(xls_path, {
"Sheet1": [
["Name", "Age"],
["Alice", 30],
["Bob", 25],
]
})
result = parser.parse(str(xls_path))
assert "## Sheet1" in result
assert "| Name | Age |" in result
assert "| --- | --- |" in result
assert "Alice" in result
assert "Bob" in result
def test_multiple_sheets(self, parser, tmp_path):
"""多个工作表各自生成标题和表格"""
xls_path = tmp_path / "multi.xls"
_create_xls(xls_path, {
"Users": [["Name"], ["Alice"]],
"Orders": [["ID"], ["001"]],
})
result = parser.parse(str(xls_path))
assert "## Users" in result
assert "## Orders" in result
assert "| Name |" in result
assert "| ID |" in result
def test_empty_sheet_skipped(self, parser, tmp_path):
"""空工作表应被跳过"""
xls_path = tmp_path / "empty_sheet.xls"
wb = xlwt.Workbook()
wb.add_sheet("Empty") # no data written
ws = wb.add_sheet("Data")
ws.write(0, 0, "Col1")
ws.write(1, 0, "Val1")
wb.save(str(xls_path))
result = parser.parse(str(xls_path))
assert "## Empty" not in result
assert "## Data" in result
def test_pipe_escaped(self, parser, tmp_path):
"""单元格中的 | 应被转义为 |"""
xls_path = tmp_path / "pipe.xls"
_create_xls(xls_path, {
"Sheet1": [["Header"], ["value|with|pipes"]],
})
result = parser.parse(str(xls_path))
assert "|" in result
assert "value|with|pipes" in result
def test_newline_escaped(self, parser, tmp_path):
"""单元格中的换行符应被转义为 <br>"""
xls_path = tmp_path / "newline.xls"
_create_xls(xls_path, {
"Sheet1": [["Header"], ["line1\nline2"]],
})
result = parser.parse(str(xls_path))
assert "line1<br>line2" in result
def test_backtick_escaped(self, parser, tmp_path):
"""单元格中的反引号应被转义为 &#96;"""
xls_path = tmp_path / "backtick.xls"
_create_xls(xls_path, {
"Sheet1": [["Header"], ["code `snippet`"]],
})
result = parser.parse(str(xls_path))
assert "&#96;" in result
def test_empty_cell_becomes_empty(self, parser, tmp_path):
"""空单元格应显示为空字符串"""
xls_path = tmp_path / "empty_cell.xls"
wb = xlwt.Workbook()
ws = wb.add_sheet("Sheet1")
ws.write(0, 0, "A")
ws.write(0, 1, "B")
ws.write(1, 0, "val")
# cell (1,1) is not written — will be empty
wb.save(str(xls_path))
result = parser.parse(str(xls_path))
assert "| val | |" in result
def test_sheet_name_as_heading(self, parser, tmp_path):
"""工作表名称应作为 ## 标题"""
xls_path = tmp_path / "named.xls"
_create_xls(xls_path, {
"Sales Report": [["Month", "Revenue"], ["Jan", "1000"]],
})
result = parser.parse(str(xls_path))
assert "## Sales Report" in result
def test_nonexistent_file_raises(self, parser):
with pytest.raises(ParseError) as exc_info:
parser.parse("/nonexistent/path/file.xls")
assert "file.xls" in exc_info.value.file_name
assert exc_info.value.reason != ""
def test_corrupted_file_raises(self, parser, tmp_path):
xls_path = tmp_path / "corrupted.xls"
xls_path.write_bytes(b"this is not an xls file")
with pytest.raises(ParseError) as exc_info:
parser.parse(str(xls_path))
assert "corrupted.xls" in exc_info.value.file_name
def test_parse_error_contains_filename(self, parser):
with pytest.raises(ParseError) as exc_info:
parser.parse("/no/such/report.xls")
assert exc_info.value.file_name == "report.xls"
def test_numeric_values(self, parser, tmp_path):
"""数值类型应正确转换为字符串"""
xls_path = tmp_path / "numeric.xls"
_create_xls(xls_path, {
"Sheet1": [["Int", "Float"], [42, 3.14]],
})
result = parser.parse(str(xls_path))
assert "42" in result
assert "3.14" in result
def test_crlf_escaped(self, parser, tmp_path):
"""\\r\\n 应被转义为 <br>"""
xls_path = tmp_path / "crlf.xls"
_create_xls(xls_path, {
"Sheet1": [["Header"], ["line1\r\nline2"]],
})
result = parser.parse(str(xls_path))
assert "line1<br>line2" in result