"""XlsParser 单元测试""" import pytest import xlwt from exceptions import ParseError from parsers.xls_parser import XlsParser @pytest.fixture def parser(): return XlsParser() def _create_xls(path, sheets=None): """ 创建测试用 XLS 文件。 Args: path: 输出文件路径 sheets: dict,key 为 sheet 名称,value 为二维列表(行×列的数据) 如果为 None,创建空工作簿 """ wb = xlwt.Workbook() if sheets: for sheet_name, rows in sheets.items(): ws = wb.add_sheet(sheet_name) for row_idx, row in enumerate(rows): for col_idx, value in enumerate(row): ws.write(row_idx, col_idx, value) else: # xlwt 需要至少一个 sheet wb.add_sheet("Sheet1") wb.save(str(path)) class TestSupportedExtensions: def test_supports_xls(self, parser): assert ".xls" in parser.supported_extensions() def test_only_one_extension(self, parser): assert len(parser.supported_extensions()) == 1 class TestParse: def test_simple_table(self, parser, tmp_path): """基本表格转换为 Markdown""" xls_path = tmp_path / "simple.xls" _create_xls(xls_path, { "Sheet1": [ ["Name", "Age"], ["Alice", 30], ["Bob", 25], ] }) result = parser.parse(str(xls_path)) assert "## Sheet1" in result assert "| Name | Age |" in result assert "| --- | --- |" in result assert "Alice" in result assert "Bob" in result def test_multiple_sheets(self, parser, tmp_path): """多个工作表各自生成标题和表格""" xls_path = tmp_path / "multi.xls" _create_xls(xls_path, { "Users": [["Name"], ["Alice"]], "Orders": [["ID"], ["001"]], }) result = parser.parse(str(xls_path)) assert "## Users" in result assert "## Orders" in result assert "| Name |" in result assert "| ID |" in result def test_empty_sheet_skipped(self, parser, tmp_path): """空工作表应被跳过""" xls_path = tmp_path / "empty_sheet.xls" wb = xlwt.Workbook() wb.add_sheet("Empty") # no data written ws = wb.add_sheet("Data") ws.write(0, 0, "Col1") ws.write(1, 0, "Val1") wb.save(str(xls_path)) result = parser.parse(str(xls_path)) assert "## Empty" not in result assert "## Data" in result def test_pipe_escaped(self, parser, tmp_path): """单元格中的 | 应被转义为 |""" xls_path = tmp_path / "pipe.xls" _create_xls(xls_path, { "Sheet1": [["Header"], ["value|with|pipes"]], }) result = parser.parse(str(xls_path)) assert "|" in result assert "value|with|pipes" in result def test_newline_escaped(self, parser, tmp_path): """单元格中的换行符应被转义为
""" xls_path = tmp_path / "newline.xls" _create_xls(xls_path, { "Sheet1": [["Header"], ["line1\nline2"]], }) result = parser.parse(str(xls_path)) assert "line1
line2" in result def test_backtick_escaped(self, parser, tmp_path): """单元格中的反引号应被转义为 `""" xls_path = tmp_path / "backtick.xls" _create_xls(xls_path, { "Sheet1": [["Header"], ["code `snippet`"]], }) result = parser.parse(str(xls_path)) assert "`" in result def test_empty_cell_becomes_empty(self, parser, tmp_path): """空单元格应显示为空字符串""" xls_path = tmp_path / "empty_cell.xls" wb = xlwt.Workbook() ws = wb.add_sheet("Sheet1") ws.write(0, 0, "A") ws.write(0, 1, "B") ws.write(1, 0, "val") # cell (1,1) is not written — will be empty wb.save(str(xls_path)) result = parser.parse(str(xls_path)) assert "| val | |" in result def test_sheet_name_as_heading(self, parser, tmp_path): """工作表名称应作为 ## 标题""" xls_path = tmp_path / "named.xls" _create_xls(xls_path, { "Sales Report": [["Month", "Revenue"], ["Jan", "1000"]], }) result = parser.parse(str(xls_path)) assert "## Sales Report" in result def test_nonexistent_file_raises(self, parser): with pytest.raises(ParseError) as exc_info: parser.parse("/nonexistent/path/file.xls") assert "file.xls" in exc_info.value.file_name assert exc_info.value.reason != "" def test_corrupted_file_raises(self, parser, tmp_path): xls_path = tmp_path / "corrupted.xls" xls_path.write_bytes(b"this is not an xls file") with pytest.raises(ParseError) as exc_info: parser.parse(str(xls_path)) assert "corrupted.xls" in exc_info.value.file_name def test_parse_error_contains_filename(self, parser): with pytest.raises(ParseError) as exc_info: parser.parse("/no/such/report.xls") assert exc_info.value.file_name == "report.xls" def test_numeric_values(self, parser, tmp_path): """数值类型应正确转换为字符串""" xls_path = tmp_path / "numeric.xls" _create_xls(xls_path, { "Sheet1": [["Int", "Float"], [42, 3.14]], }) result = parser.parse(str(xls_path)) assert "42" in result assert "3.14" in result def test_crlf_escaped(self, parser, tmp_path): """\\r\\n 应被转义为
""" xls_path = tmp_path / "crlf.xls" _create_xls(xls_path, { "Sheet1": [["Header"], ["line1\r\nline2"]], }) result = parser.parse(str(xls_path)) assert "line1
line2" in result