Initial commit: AI 知识库文档智能分块工具

2026-03-02 17:38:28 +08:00
commit 92e7fc5bda
160 changed files with 9577 additions and 0 deletions
--- a/parsers/image_parser.py
+++ b/parsers/image_parser.py
@@ -0,0 +1,71 @@
+"""图片文件解析器，使用 Vision API 识别图片内容"""
+
+import base64
+import os
+from typing import List, Optional
+
+from api_client import ApiClient, EXTENSION_MIME_MAP
+from exceptions import ApiError, ParseError
+from parsers.base import BaseParser
+
+DEFAULT_VISION_PROMPT = """\
+请识别并提取图片中的所有文字和关键信息。请按以下结构输出：
+
+1. **产品/主题名称**：图片展示的主要产品或主题
+2. **文字内容**：图片中所有可见的文字，保持原始排版
+3. **关键信息**：成分、功效、用法用量、规格、价格等结构化信息
+4. **图片描述**：简要描述图片的视觉内容（产品外观、包装等）
+
+如果某项信息不存在，可以省略该项。"""
+
+
+class ImageParser(BaseParser):
+    """图片解析器，通过 Vision API 将图片转换为文本描述"""
+
+    def __init__(self, api_client: ApiClient, vision_prompt: Optional[str] = None):
+        self._api_client = api_client
+        self._vision_prompt = vision_prompt or DEFAULT_VISION_PROMPT
+
+    def supported_extensions(self) -> List[str]:
+        return [".png", ".jpg", ".jpeg", ".bmp", ".gif", ".webp"]
+
+    def parse(self, file_path: str) -> str:
+        """
+        解析图片文件：读取二进制 → base64 编码 → 调用 Vision API → 返回文本描述。
+
+        会将文件名作为上下文提示传入 prompt，提高识别准确度。
+        """
+        file_name = os.path.basename(file_path)
+        product_name = os.path.splitext(file_name)[0]
+
+        # 1. 读取图片文件
+        try:
+            with open(file_path, "rb") as f:
+                image_bytes = f.read()
+        except Exception as e:
+            raise ParseError(file_name, f"文件读取失败: {e}")
+
+        # 2. Base64 编码
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+
+        # 3. 根据扩展名确定 MIME 类型
+        ext = os.path.splitext(file_path)[1].lower()
+        mime_type = EXTENSION_MIME_MAP.get(ext, "image/png")
+
+        # 4. 构建带文件名上下文的 prompt
+        context_prompt = (
+            f"{self._vision_prompt}\n\n"
+            f"参考信息：该图片的文件名为「{product_name}」，可能与图片内容相关。"
+        )
+
+        # 5. 调用 Vision API
+        try:
+            result = self._api_client.vision(
+                system_prompt=context_prompt,
+                image_base64=image_base64,
+                mime_type=mime_type,
+            )
+        except ApiError as e:
+            raise ParseError(file_name, f"Vision API 调用失败: {e}")
+
+        return result