Initial commit: AI 知识库文档智能分块工具
This commit is contained in:
35
models.py
Normal file
35
models.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""核心数据结构定义"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
"""语义分块单元"""
|
||||
|
||||
title: str # AI 生成的摘要标题
|
||||
content: str # 分块的文本内容
|
||||
tag: str = "" # 业务分类标签(如:产品说明、问答、培训、招商等)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessResult:
|
||||
"""处理结果"""
|
||||
|
||||
source_file: str # 源文件路径
|
||||
output_file: str # 输出文件路径
|
||||
chunks: List[Chunk] # 分块列表
|
||||
process_time: datetime # 处理时间
|
||||
total_chunks: int # 总分块数
|
||||
|
||||
|
||||
@dataclass
|
||||
class CLIArgs:
|
||||
"""命令行参数"""
|
||||
|
||||
input_file: str # 输入文件路径(必需)
|
||||
api_key: str # DeepSeek API Key(必需)
|
||||
output_file: Optional[str] = None # 输出文件路径(可选,默认同名 .md)
|
||||
delimiter: str = "---" # 分块分隔符(可选,默认 ---)
|
||||
Reference in New Issue
Block a user