221 lines
7.2 KiB
Python
221 lines
7.2 KiB
Python
|
|
"""XlsxParser 单元测试"""
|
|||
|
|
|
|||
|
|
import pytest
|
|||
|
|
from openpyxl import Workbook
|
|||
|
|
|
|||
|
|
from exceptions import ParseError
|
|||
|
|
from parsers.xlsx_parser import XlsxParser
|
|||
|
|
|
|||
|
|
|
|||
|
|
@pytest.fixture
|
|||
|
|
def parser():
|
|||
|
|
return XlsxParser()
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _create_xlsx(path, sheets=None):
|
|||
|
|
"""
|
|||
|
|
创建测试用 XLSX 文件。
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
path: 输出文件路径
|
|||
|
|
sheets: dict,key 为 sheet 名称,value 为二维列表(行×列的数据)
|
|||
|
|
如果为 None,创建空工作簿
|
|||
|
|
"""
|
|||
|
|
wb = Workbook()
|
|||
|
|
# 删除默认 sheet
|
|||
|
|
wb.remove(wb.active)
|
|||
|
|
|
|||
|
|
if sheets:
|
|||
|
|
for sheet_name, rows in sheets.items():
|
|||
|
|
ws = wb.create_sheet(title=sheet_name)
|
|||
|
|
for row in rows:
|
|||
|
|
ws.append(row)
|
|||
|
|
|
|||
|
|
wb.save(str(path))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def _create_xlsx_with_merge(path, sheet_name, rows, merges):
|
|||
|
|
"""
|
|||
|
|
创建带合并单元格的 XLSX 文件。
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
path: 输出文件路径
|
|||
|
|
sheet_name: 工作表名称
|
|||
|
|
rows: 二维列表(行×列的数据)
|
|||
|
|
merges: 合并区域列表,如 ["A1:B1", "A2:A3"]
|
|||
|
|
"""
|
|||
|
|
wb = Workbook()
|
|||
|
|
wb.remove(wb.active)
|
|||
|
|
ws = wb.create_sheet(title=sheet_name)
|
|||
|
|
|
|||
|
|
for row in rows:
|
|||
|
|
ws.append(row)
|
|||
|
|
|
|||
|
|
for merge_range in merges:
|
|||
|
|
ws.merge_cells(merge_range)
|
|||
|
|
|
|||
|
|
wb.save(str(path))
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TestSupportedExtensions:
|
|||
|
|
def test_supports_xlsx(self, parser):
|
|||
|
|
assert ".xlsx" in parser.supported_extensions()
|
|||
|
|
|
|||
|
|
def test_only_one_extension(self, parser):
|
|||
|
|
assert len(parser.supported_extensions()) == 1
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TestParse:
|
|||
|
|
def test_simple_table(self, parser, tmp_path):
|
|||
|
|
"""基本表格转换为 Markdown"""
|
|||
|
|
xlsx_path = tmp_path / "simple.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [
|
|||
|
|
["Name", "Age"],
|
|||
|
|
["Alice", 30],
|
|||
|
|
["Bob", 25],
|
|||
|
|
]
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "## Sheet1" in result
|
|||
|
|
assert "| Name | Age |" in result
|
|||
|
|
assert "| --- | --- |" in result
|
|||
|
|
assert "| Alice | 30 |" in result
|
|||
|
|
assert "| Bob | 25 |" in result
|
|||
|
|
|
|||
|
|
def test_multiple_sheets(self, parser, tmp_path):
|
|||
|
|
"""多个工作表各自生成标题和表格"""
|
|||
|
|
xlsx_path = tmp_path / "multi.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Users": [["Name"], ["Alice"]],
|
|||
|
|
"Orders": [["ID"], ["001"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "## Users" in result
|
|||
|
|
assert "## Orders" in result
|
|||
|
|
assert "| Name |" in result
|
|||
|
|
assert "| ID |" in result
|
|||
|
|
|
|||
|
|
def test_empty_sheet_skipped(self, parser, tmp_path):
|
|||
|
|
"""空工作表应被跳过"""
|
|||
|
|
xlsx_path = tmp_path / "empty_sheet.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Empty": [],
|
|||
|
|
"Data": [["Col1"], ["Val1"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "## Empty" not in result
|
|||
|
|
assert "## Data" in result
|
|||
|
|
|
|||
|
|
def test_all_empty_sheets(self, parser, tmp_path):
|
|||
|
|
"""所有工作表都为空时返回空字符串"""
|
|||
|
|
xlsx_path = tmp_path / "all_empty.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {"Empty1": [], "Empty2": []})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert result.strip() == ""
|
|||
|
|
|
|||
|
|
def test_pipe_escaped(self, parser, tmp_path):
|
|||
|
|
"""单元格中的 | 应被转义为 |"""
|
|||
|
|
xlsx_path = tmp_path / "pipe.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [["Header"], ["value|with|pipes"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "|" in result
|
|||
|
|
assert "value|with|pipes" in result
|
|||
|
|
|
|||
|
|
def test_newline_escaped(self, parser, tmp_path):
|
|||
|
|
"""单元格中的换行符应被转义为 <br>"""
|
|||
|
|
xlsx_path = tmp_path / "newline.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [["Header"], ["line1\nline2"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "line1<br>line2" in result
|
|||
|
|
|
|||
|
|
def test_backtick_escaped(self, parser, tmp_path):
|
|||
|
|
"""单元格中的反引号应被转义为 `"""
|
|||
|
|
xlsx_path = tmp_path / "backtick.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [["Header"], ["code `snippet`"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "`" in result
|
|||
|
|
|
|||
|
|
def test_none_cell_becomes_empty(self, parser, tmp_path):
|
|||
|
|
"""None 值的单元格应显示为空"""
|
|||
|
|
xlsx_path = tmp_path / "none.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [["A", "B"], ["val", None]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "| val | |" in result
|
|||
|
|
|
|||
|
|
def test_merged_cells(self, parser, tmp_path):
|
|||
|
|
"""合并单元格应填充左上角的值"""
|
|||
|
|
xlsx_path = tmp_path / "merged.xlsx"
|
|||
|
|
_create_xlsx_with_merge(
|
|||
|
|
xlsx_path,
|
|||
|
|
sheet_name="Data",
|
|||
|
|
rows=[
|
|||
|
|
["Category", "Value"],
|
|||
|
|
["Fruit", 10],
|
|||
|
|
[None, 20], # A3 will be merged with A2
|
|||
|
|
],
|
|||
|
|
merges=["A2:A3"],
|
|||
|
|
)
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "## Data" in result
|
|||
|
|
# The merged cell (A3) should have the value from A2 ("Fruit")
|
|||
|
|
lines = result.split("\n")
|
|||
|
|
data_lines = [l for l in lines if l.startswith("| ") and "---" not in l and "Category" not in l]
|
|||
|
|
assert len(data_lines) == 2
|
|||
|
|
# Both data rows should contain "Fruit"
|
|||
|
|
assert all("Fruit" in line for line in data_lines)
|
|||
|
|
|
|||
|
|
def test_sheet_name_as_heading(self, parser, tmp_path):
|
|||
|
|
"""工作表名称应作为 ## 标题"""
|
|||
|
|
xlsx_path = tmp_path / "named.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sales Report": [["Month", "Revenue"], ["Jan", "1000"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "## Sales Report" in result
|
|||
|
|
|
|||
|
|
def test_nonexistent_file_raises(self, parser):
|
|||
|
|
with pytest.raises(ParseError) as exc_info:
|
|||
|
|
parser.parse("/nonexistent/path/file.xlsx")
|
|||
|
|
assert "file.xlsx" in exc_info.value.file_name
|
|||
|
|
assert exc_info.value.reason != ""
|
|||
|
|
|
|||
|
|
def test_corrupted_file_raises(self, parser, tmp_path):
|
|||
|
|
xlsx_path = tmp_path / "corrupted.xlsx"
|
|||
|
|
xlsx_path.write_bytes(b"this is not an xlsx file")
|
|||
|
|
with pytest.raises(ParseError) as exc_info:
|
|||
|
|
parser.parse(str(xlsx_path))
|
|||
|
|
assert "corrupted.xlsx" in exc_info.value.file_name
|
|||
|
|
|
|||
|
|
def test_parse_error_contains_filename(self, parser):
|
|||
|
|
with pytest.raises(ParseError) as exc_info:
|
|||
|
|
parser.parse("/no/such/report.xlsx")
|
|||
|
|
assert exc_info.value.file_name == "report.xlsx"
|
|||
|
|
|
|||
|
|
def test_numeric_values(self, parser, tmp_path):
|
|||
|
|
"""数值类型应正确转换为字符串"""
|
|||
|
|
xlsx_path = tmp_path / "numeric.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [["Int", "Float"], [42, 3.14]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "42" in result
|
|||
|
|
assert "3.14" in result
|
|||
|
|
|
|||
|
|
def test_crlf_escaped(self, parser, tmp_path):
|
|||
|
|
"""\\r\\n 应被转义为 <br>"""
|
|||
|
|
xlsx_path = tmp_path / "crlf.xlsx"
|
|||
|
|
_create_xlsx(xlsx_path, {
|
|||
|
|
"Sheet1": [["Header"], ["line1\r\nline2"]],
|
|||
|
|
})
|
|||
|
|
result = parser.parse(str(xlsx_path))
|
|||
|
|
assert "line1<br>line2" in result
|