36 lines
947 B
Python
36 lines
947 B
Python
"""核心数据结构定义"""
|
||
|
||
from dataclasses import dataclass
|
||
from datetime import datetime
|
||
from typing import List, Optional
|
||
|
||
|
||
@dataclass
|
||
class Chunk:
|
||
"""语义分块单元"""
|
||
|
||
title: str # AI 生成的摘要标题
|
||
content: str # 分块的文本内容
|
||
tag: str = "" # 业务分类标签(如:产品说明、问答、培训、招商等)
|
||
|
||
|
||
@dataclass
|
||
class ProcessResult:
|
||
"""处理结果"""
|
||
|
||
source_file: str # 源文件路径
|
||
output_file: str # 输出文件路径
|
||
chunks: List[Chunk] # 分块列表
|
||
process_time: datetime # 处理时间
|
||
total_chunks: int # 总分块数
|
||
|
||
|
||
@dataclass
|
||
class CLIArgs:
|
||
"""命令行参数"""
|
||
|
||
input_file: str # 输入文件路径(必需)
|
||
api_key: str # DeepSeek API Key(必需)
|
||
output_file: Optional[str] = None # 输出文件路径(可选,默认同名 .md)
|
||
delimiter: str = "---" # 分块分隔符(可选,默认 ---)
|