Files
bigwo/tests/test_xlsx_parser.py
2026-03-02 17:38:28 +08:00

221 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""XlsxParser 单元测试"""
import pytest
from openpyxl import Workbook
from exceptions import ParseError
from parsers.xlsx_parser import XlsxParser
@pytest.fixture
def parser():
return XlsxParser()
def _create_xlsx(path, sheets=None):
"""
创建测试用 XLSX 文件。
Args:
path: 输出文件路径
sheets: dictkey 为 sheet 名称value 为二维列表(行×列的数据)
如果为 None创建空工作簿
"""
wb = Workbook()
# 删除默认 sheet
wb.remove(wb.active)
if sheets:
for sheet_name, rows in sheets.items():
ws = wb.create_sheet(title=sheet_name)
for row in rows:
ws.append(row)
wb.save(str(path))
def _create_xlsx_with_merge(path, sheet_name, rows, merges):
"""
创建带合并单元格的 XLSX 文件。
Args:
path: 输出文件路径
sheet_name: 工作表名称
rows: 二维列表(行×列的数据)
merges: 合并区域列表,如 ["A1:B1", "A2:A3"]
"""
wb = Workbook()
wb.remove(wb.active)
ws = wb.create_sheet(title=sheet_name)
for row in rows:
ws.append(row)
for merge_range in merges:
ws.merge_cells(merge_range)
wb.save(str(path))
class TestSupportedExtensions:
def test_supports_xlsx(self, parser):
assert ".xlsx" in parser.supported_extensions()
def test_only_one_extension(self, parser):
assert len(parser.supported_extensions()) == 1
class TestParse:
def test_simple_table(self, parser, tmp_path):
"""基本表格转换为 Markdown"""
xlsx_path = tmp_path / "simple.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [
["Name", "Age"],
["Alice", 30],
["Bob", 25],
]
})
result = parser.parse(str(xlsx_path))
assert "## Sheet1" in result
assert "| Name | Age |" in result
assert "| --- | --- |" in result
assert "| Alice | 30 |" in result
assert "| Bob | 25 |" in result
def test_multiple_sheets(self, parser, tmp_path):
"""多个工作表各自生成标题和表格"""
xlsx_path = tmp_path / "multi.xlsx"
_create_xlsx(xlsx_path, {
"Users": [["Name"], ["Alice"]],
"Orders": [["ID"], ["001"]],
})
result = parser.parse(str(xlsx_path))
assert "## Users" in result
assert "## Orders" in result
assert "| Name |" in result
assert "| ID |" in result
def test_empty_sheet_skipped(self, parser, tmp_path):
"""空工作表应被跳过"""
xlsx_path = tmp_path / "empty_sheet.xlsx"
_create_xlsx(xlsx_path, {
"Empty": [],
"Data": [["Col1"], ["Val1"]],
})
result = parser.parse(str(xlsx_path))
assert "## Empty" not in result
assert "## Data" in result
def test_all_empty_sheets(self, parser, tmp_path):
"""所有工作表都为空时返回空字符串"""
xlsx_path = tmp_path / "all_empty.xlsx"
_create_xlsx(xlsx_path, {"Empty1": [], "Empty2": []})
result = parser.parse(str(xlsx_path))
assert result.strip() == ""
def test_pipe_escaped(self, parser, tmp_path):
"""单元格中的 | 应被转义为 |"""
xlsx_path = tmp_path / "pipe.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [["Header"], ["value|with|pipes"]],
})
result = parser.parse(str(xlsx_path))
assert "|" in result
assert "value|with|pipes" in result
def test_newline_escaped(self, parser, tmp_path):
"""单元格中的换行符应被转义为 <br>"""
xlsx_path = tmp_path / "newline.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [["Header"], ["line1\nline2"]],
})
result = parser.parse(str(xlsx_path))
assert "line1<br>line2" in result
def test_backtick_escaped(self, parser, tmp_path):
"""单元格中的反引号应被转义为 &#96;"""
xlsx_path = tmp_path / "backtick.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [["Header"], ["code `snippet`"]],
})
result = parser.parse(str(xlsx_path))
assert "&#96;" in result
def test_none_cell_becomes_empty(self, parser, tmp_path):
"""None 值的单元格应显示为空"""
xlsx_path = tmp_path / "none.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [["A", "B"], ["val", None]],
})
result = parser.parse(str(xlsx_path))
assert "| val | |" in result
def test_merged_cells(self, parser, tmp_path):
"""合并单元格应填充左上角的值"""
xlsx_path = tmp_path / "merged.xlsx"
_create_xlsx_with_merge(
xlsx_path,
sheet_name="Data",
rows=[
["Category", "Value"],
["Fruit", 10],
[None, 20], # A3 will be merged with A2
],
merges=["A2:A3"],
)
result = parser.parse(str(xlsx_path))
assert "## Data" in result
# The merged cell (A3) should have the value from A2 ("Fruit")
lines = result.split("\n")
data_lines = [l for l in lines if l.startswith("| ") and "---" not in l and "Category" not in l]
assert len(data_lines) == 2
# Both data rows should contain "Fruit"
assert all("Fruit" in line for line in data_lines)
def test_sheet_name_as_heading(self, parser, tmp_path):
"""工作表名称应作为 ## 标题"""
xlsx_path = tmp_path / "named.xlsx"
_create_xlsx(xlsx_path, {
"Sales Report": [["Month", "Revenue"], ["Jan", "1000"]],
})
result = parser.parse(str(xlsx_path))
assert "## Sales Report" in result
def test_nonexistent_file_raises(self, parser):
with pytest.raises(ParseError) as exc_info:
parser.parse("/nonexistent/path/file.xlsx")
assert "file.xlsx" in exc_info.value.file_name
assert exc_info.value.reason != ""
def test_corrupted_file_raises(self, parser, tmp_path):
xlsx_path = tmp_path / "corrupted.xlsx"
xlsx_path.write_bytes(b"this is not an xlsx file")
with pytest.raises(ParseError) as exc_info:
parser.parse(str(xlsx_path))
assert "corrupted.xlsx" in exc_info.value.file_name
def test_parse_error_contains_filename(self, parser):
with pytest.raises(ParseError) as exc_info:
parser.parse("/no/such/report.xlsx")
assert exc_info.value.file_name == "report.xlsx"
def test_numeric_values(self, parser, tmp_path):
"""数值类型应正确转换为字符串"""
xlsx_path = tmp_path / "numeric.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [["Int", "Float"], [42, 3.14]],
})
result = parser.parse(str(xlsx_path))
assert "42" in result
assert "3.14" in result
def test_crlf_escaped(self, parser, tmp_path):
"""\\r\\n 应被转义为 <br>"""
xlsx_path = tmp_path / "crlf.xlsx"
_create_xlsx(xlsx_path, {
"Sheet1": [["Header"], ["line1\r\nline2"]],
})
result = parser.parse(str(xlsx_path))
assert "line1<br>line2" in result