36 lines
947 B
Python
36 lines
947 B
Python
|
|
"""核心数据结构定义"""
|
|||
|
|
|
|||
|
|
from dataclasses import dataclass
|
|||
|
|
from datetime import datetime
|
|||
|
|
from typing import List, Optional
|
|||
|
|
|
|||
|
|
|
|||
|
|
@dataclass
|
|||
|
|
class Chunk:
|
|||
|
|
"""语义分块单元"""
|
|||
|
|
|
|||
|
|
title: str # AI 生成的摘要标题
|
|||
|
|
content: str # 分块的文本内容
|
|||
|
|
tag: str = "" # 业务分类标签(如:产品说明、问答、培训、招商等)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@dataclass
|
|||
|
|
class ProcessResult:
|
|||
|
|
"""处理结果"""
|
|||
|
|
|
|||
|
|
source_file: str # 源文件路径
|
|||
|
|
output_file: str # 输出文件路径
|
|||
|
|
chunks: List[Chunk] # 分块列表
|
|||
|
|
process_time: datetime # 处理时间
|
|||
|
|
total_chunks: int # 总分块数
|
|||
|
|
|
|||
|
|
|
|||
|
|
@dataclass
|
|||
|
|
class CLIArgs:
|
|||
|
|
"""命令行参数"""
|
|||
|
|
|
|||
|
|
input_file: str # 输入文件路径(必需)
|
|||
|
|
api_key: str # DeepSeek API Key(必需)
|
|||
|
|
output_file: Optional[str] = None # 输出文件路径(可选,默认同名 .md)
|
|||
|
|
delimiter: str = "---" # 分块分隔符(可选,默认 ---)
|