"""XlsxParser 单元测试""" import pytest from openpyxl import Workbook from exceptions import ParseError from parsers.xlsx_parser import XlsxParser @pytest.fixture def parser(): return XlsxParser() def _create_xlsx(path, sheets=None): """ 创建测试用 XLSX 文件。 Args: path: 输出文件路径 sheets: dict,key 为 sheet 名称,value 为二维列表(行×列的数据) 如果为 None,创建空工作簿 """ wb = Workbook() # 删除默认 sheet wb.remove(wb.active) if sheets: for sheet_name, rows in sheets.items(): ws = wb.create_sheet(title=sheet_name) for row in rows: ws.append(row) wb.save(str(path)) def _create_xlsx_with_merge(path, sheet_name, rows, merges): """ 创建带合并单元格的 XLSX 文件。 Args: path: 输出文件路径 sheet_name: 工作表名称 rows: 二维列表(行×列的数据) merges: 合并区域列表,如 ["A1:B1", "A2:A3"] """ wb = Workbook() wb.remove(wb.active) ws = wb.create_sheet(title=sheet_name) for row in rows: ws.append(row) for merge_range in merges: ws.merge_cells(merge_range) wb.save(str(path)) class TestSupportedExtensions: def test_supports_xlsx(self, parser): assert ".xlsx" in parser.supported_extensions() def test_only_one_extension(self, parser): assert len(parser.supported_extensions()) == 1 class TestParse: def test_simple_table(self, parser, tmp_path): """基本表格转换为 Markdown""" xlsx_path = tmp_path / "simple.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [ ["Name", "Age"], ["Alice", 30], ["Bob", 25], ] }) result = parser.parse(str(xlsx_path)) assert "## Sheet1" in result assert "| Name | Age |" in result assert "| --- | --- |" in result assert "| Alice | 30 |" in result assert "| Bob | 25 |" in result def test_multiple_sheets(self, parser, tmp_path): """多个工作表各自生成标题和表格""" xlsx_path = tmp_path / "multi.xlsx" _create_xlsx(xlsx_path, { "Users": [["Name"], ["Alice"]], "Orders": [["ID"], ["001"]], }) result = parser.parse(str(xlsx_path)) assert "## Users" in result assert "## Orders" in result assert "| Name |" in result assert "| ID |" in result def test_empty_sheet_skipped(self, parser, tmp_path): """空工作表应被跳过""" xlsx_path = tmp_path / "empty_sheet.xlsx" _create_xlsx(xlsx_path, { "Empty": [], "Data": [["Col1"], ["Val1"]], }) result = parser.parse(str(xlsx_path)) assert "## Empty" not in result assert "## Data" in result def test_all_empty_sheets(self, parser, tmp_path): """所有工作表都为空时返回空字符串""" xlsx_path = tmp_path / "all_empty.xlsx" _create_xlsx(xlsx_path, {"Empty1": [], "Empty2": []}) result = parser.parse(str(xlsx_path)) assert result.strip() == "" def test_pipe_escaped(self, parser, tmp_path): """单元格中的 | 应被转义为 |""" xlsx_path = tmp_path / "pipe.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [["Header"], ["value|with|pipes"]], }) result = parser.parse(str(xlsx_path)) assert "|" in result assert "value|with|pipes" in result def test_newline_escaped(self, parser, tmp_path): """单元格中的换行符应被转义为
""" xlsx_path = tmp_path / "newline.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [["Header"], ["line1\nline2"]], }) result = parser.parse(str(xlsx_path)) assert "line1
line2" in result def test_backtick_escaped(self, parser, tmp_path): """单元格中的反引号应被转义为 `""" xlsx_path = tmp_path / "backtick.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [["Header"], ["code `snippet`"]], }) result = parser.parse(str(xlsx_path)) assert "`" in result def test_none_cell_becomes_empty(self, parser, tmp_path): """None 值的单元格应显示为空""" xlsx_path = tmp_path / "none.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [["A", "B"], ["val", None]], }) result = parser.parse(str(xlsx_path)) assert "| val | |" in result def test_merged_cells(self, parser, tmp_path): """合并单元格应填充左上角的值""" xlsx_path = tmp_path / "merged.xlsx" _create_xlsx_with_merge( xlsx_path, sheet_name="Data", rows=[ ["Category", "Value"], ["Fruit", 10], [None, 20], # A3 will be merged with A2 ], merges=["A2:A3"], ) result = parser.parse(str(xlsx_path)) assert "## Data" in result # The merged cell (A3) should have the value from A2 ("Fruit") lines = result.split("\n") data_lines = [l for l in lines if l.startswith("| ") and "---" not in l and "Category" not in l] assert len(data_lines) == 2 # Both data rows should contain "Fruit" assert all("Fruit" in line for line in data_lines) def test_sheet_name_as_heading(self, parser, tmp_path): """工作表名称应作为 ## 标题""" xlsx_path = tmp_path / "named.xlsx" _create_xlsx(xlsx_path, { "Sales Report": [["Month", "Revenue"], ["Jan", "1000"]], }) result = parser.parse(str(xlsx_path)) assert "## Sales Report" in result def test_nonexistent_file_raises(self, parser): with pytest.raises(ParseError) as exc_info: parser.parse("/nonexistent/path/file.xlsx") assert "file.xlsx" in exc_info.value.file_name assert exc_info.value.reason != "" def test_corrupted_file_raises(self, parser, tmp_path): xlsx_path = tmp_path / "corrupted.xlsx" xlsx_path.write_bytes(b"this is not an xlsx file") with pytest.raises(ParseError) as exc_info: parser.parse(str(xlsx_path)) assert "corrupted.xlsx" in exc_info.value.file_name def test_parse_error_contains_filename(self, parser): with pytest.raises(ParseError) as exc_info: parser.parse("/no/such/report.xlsx") assert exc_info.value.file_name == "report.xlsx" def test_numeric_values(self, parser, tmp_path): """数值类型应正确转换为字符串""" xlsx_path = tmp_path / "numeric.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [["Int", "Float"], [42, 3.14]], }) result = parser.parse(str(xlsx_path)) assert "42" in result assert "3.14" in result def test_crlf_escaped(self, parser, tmp_path): """\\r\\n 应被转义为
""" xlsx_path = tmp_path / "crlf.xlsx" _create_xlsx(xlsx_path, { "Sheet1": [["Header"], ["line1\r\nline2"]], }) result = parser.parse(str(xlsx_path)) assert "line1
line2" in result