feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# BigWo 智能语音对话系统 — 系统架构文档
|
||||
|
||||
> 版本:1.0 | 更新日期:2026-03-09
|
||||
> 版本:2.0 | 更新日期:2026-03-13
|
||||
|
||||
---
|
||||
|
||||
@@ -12,7 +12,7 @@ BigWo 是一个**企业级智能客服对话系统**,支持**语音通话**和
|
||||
|
||||
| 能力 | 说明 |
|
||||
|------|------|
|
||||
| 实时语音对话 | 火山引擎 RTC + S2S 端到端语音大模型,混合编排模式 |
|
||||
| 实时语音对话 | Native WebSocket + S2S 端到端语音大模型,混合编排模式 |
|
||||
| 知识库问答 | Function Calling → 方舟私域知识库 → 本地知识库 |
|
||||
| 文字对话 | Coze v3 Chat API,支持 SSE 流式输出 |
|
||||
| 语音↔文字切换 | 同一 sessionId 贯穿,MySQL 持久化完整历史 |
|
||||
@@ -22,7 +22,7 @@ BigWo 是一个**企业级智能客服对话系统**,支持**语音通话**和
|
||||
|
||||
- **前端**:React 18 + Vite 5 + TailwindCSS 4 + Lucide Icons
|
||||
- **后端**:Node.js + Express 4 (port 3012)
|
||||
- **语音**:@volcengine/rtc SDK + S2S 端到端 + 方舟 LLM (API v2024-12-01)
|
||||
- **语音**:Native WebSocket (`wss://openspeech.bytedance.com/api/v3/realtime/dialogue`) + S2S 端到端 + 方舟 LLM
|
||||
- **文字**:Coze v3 Chat API(流式 SSE)
|
||||
- **知识库**:方舟 Chat Completions API + knowledge_base metadata
|
||||
- **数据库**:MySQL 8 (mysql2/promise)
|
||||
@@ -33,47 +33,50 @@ BigWo 是一个**企业级智能客服对话系统**,支持**语音通话**和
|
||||
## 2. 系统架构图
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ 浏览器客户端 │
|
||||
│ ┌────────────┐ ┌───────────┐ ┌──────────────────┐ │
|
||||
│ │ VoicePanel │ │ ChatPanel │ │ SettingsPanel │ │
|
||||
│ └─────┬──────┘ └─────┬─────┘ └──────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────▼──────┐ ┌─────▼─────┐ ┌────────────────┐ │
|
||||
│ │useVoiceChat│ │ chatApi │ │ voiceApi │ │
|
||||
│ │ (Hook) │ │ (HTTP/SSE)│ │ (HTTP 封装) │ │
|
||||
│ └─────┬──────┘ └─────┬─────┘ └────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────▼──────┐ │ │
|
||||
│ │ rtcService │ │ │
|
||||
│ │(WebRTC SDK)│ │ │
|
||||
│ └─────┬──────┘ │ │
|
||||
└────────┼───────────────┼────────────────────────────────┘
|
||||
│ WebRTC │ HTTPS
|
||||
│ 音频流 │ REST/SSE
|
||||
▼ ▼
|
||||
┌────────────────┐ ┌─────────────────────────────────────┐
|
||||
│ 火山引擎 RTC │ │ Express 后端 (port 3012) │
|
||||
│ 云端服务 │ │ │
|
||||
│ ┌────────────┐ │ │ routes/voice.js — 语音全生命周期 │
|
||||
│ │S2S 端到端 │ │ │ routes/chat.js — 文字对话 │
|
||||
│ │语音大模型 │ │ │ routes/session.js — 会话管理 │
|
||||
│ ├────────────┤ │ │ │
|
||||
│ │方舟 LLM │ │ │ services/volcengine.js — OpenAPI │
|
||||
│ │(工具决策) │ │ │ services/toolExecutor.js — 工具 │
|
||||
│ └─────┬──────┘ │ │ services/cozeChatService.js — Coze │
|
||||
│ │FC 回调 │ │ services/arkChatService.js — 方舟 │
|
||||
│ │(HTTP) │ │ config/voiceChatConfig.js — 配置 │
|
||||
│ ▼ │ │ db/index.js — MySQL │
|
||||
│ fc_callback ──►│ │ │
|
||||
│ ◄── Update ────│ └─────────────────────────────────────┘
|
||||
└────────────────┘ │
|
||||
▼
|
||||
┌─────────────────┐ ┌──────────────┐
|
||||
│ MySQL 8 │ │ 方舟知识库 │
|
||||
│ sessions 表 │ │ (远程 API) │
|
||||
│ messages 表 │ └──────────────┘
|
||||
└─────────────────┘
|
||||
┌───────────────────────────────────────────────────────────┐
|
||||
│ 浏览器客户端 │
|
||||
│ ┌────────────┐ ┌───────────┐ ┌────────────────────┐ │
|
||||
│ │ VoicePanel │ │ ChatPanel │ │SessionHistoryPanel │ │
|
||||
│ └─────┬──────┘ └─────┬─────┘ └────────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────▼──────────┐ ┌──▼───────┐ ┌────────────────┐ │
|
||||
│ │useNativeVoice │ │ chatApi │ │ voiceApi │ │
|
||||
│ │ Chat (Hook) │ │(HTTP/SSE)│ │ (HTTP 封装) │ │
|
||||
│ └─────┬──────────┘ └──┬───────┘ └────────────────┘ │
|
||||
│ │ │ │
|
||||
│ ┌─────▼──────────────┐ │ │
|
||||
│ │nativeVoiceService │ │ │
|
||||
│ │ (WebSocket+Audio) │ │ │
|
||||
│ └─────┬──────────────┘ │ │
|
||||
└────────┼────────────────┼─────────────────────────────────┘
|
||||
│ WebSocket │ HTTPS
|
||||
│ PCM 音频流 │ REST/SSE
|
||||
▼ ▼
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Express 后端 (port 3001) │
|
||||
│ │
|
||||
│ /ws/realtime-dialog — Native 语音网关 (核心) │
|
||||
│ routes/voice.js — 语音配置、直连会话 │
|
||||
│ routes/chat.js — 文字对话 (Coze SSE) │
|
||||
│ routes/session.js — 会话列表、历史、删除 │
|
||||
│ │
|
||||
│ services/nativeVoiceGateway.js — WebSocket 网关 │
|
||||
│ services/realtimeDialogProtocol.js — 二进制协议 │
|
||||
│ services/realtimeDialogRouting.js — 意图路由 │
|
||||
│ services/toolExecutor.js — 工具执行器 │
|
||||
│ services/arkChatService.js — 方舟 LLM │
|
||||
│ services/cozeChatService.js — Coze Chat │
|
||||
│ db/index.js — MySQL CRUD │
|
||||
│ │
|
||||
└──────┬───────────────┬────────────────────────────┘
|
||||
│ │
|
||||
│ WebSocket │
|
||||
▼ ▼
|
||||
┌───────────────┐ ┌─────────────────┐ ┌──────────────┐
|
||||
│ 火山 Realtime │ │ MySQL 8 │ │ 方舟知识库 │
|
||||
│ Dialog 服务 │ │ sessions 表 │ │ (远程 API) │
|
||||
│ (S2S + LLM) │ │ messages 表 │ └──────────────┘
|
||||
└───────────────┘ └─────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
@@ -84,157 +87,113 @@ BigWo 是一个**企业级智能客服对话系统**,支持**语音通话**和
|
||||
test2/
|
||||
├── client/ # 前端(React + Vite)
|
||||
│ ├── src/
|
||||
│ │ ├── App.jsx # 主应用,语音/文字模式切换
|
||||
│ │ ├── App.jsx # 主应用,模式切换 + 会话管理
|
||||
│ │ ├── components/
|
||||
│ │ │ ├── VoicePanel.jsx # 语音通话界面
|
||||
│ │ │ ├── ChatPanel.jsx # 文字对话界面(SSE 流式)
|
||||
│ │ │ ├── SessionHistoryPanel.jsx # 会话历史侧边栏
|
||||
│ │ │ ├── SettingsPanel.jsx # 语音参数设置面板
|
||||
│ │ │ └── SubtitleDisplay.jsx# 实时字幕展示
|
||||
│ │ ├── hooks/
|
||||
│ │ │ └── useVoiceChat.js # 语音通话核心 Hook
|
||||
│ │ │ └── useNativeVoiceChat.js # Native WebSocket 语音 Hook
|
||||
│ │ └── services/
|
||||
│ │ ├── rtcService.js # RTC SDK 封装(WebRTC)
|
||||
│ │ ├── voiceApi.js # 语音 HTTP 请求
|
||||
│ │ ├── nativeVoiceService.js # WebSocket 语音服务(音频采集/播放)
|
||||
│ │ ├── voiceApi.js # 语音/会话 HTTP 请求
|
||||
│ │ └── chatApi.js # 文字 HTTP/SSE 请求
|
||||
│ └── vite.config.js
|
||||
├── server/ # 后端(Express)
|
||||
│ ├── app.js # 入口 + FC 回调 raw body 解析
|
||||
│ ├── app.js # 入口,启动 HTTP + WebSocket 服务
|
||||
│ ├── routes/
|
||||
│ │ ├── voice.js # 语音全生命周期 + FC 回调(核心)
|
||||
│ │ ├── chat.js # 文字对话(Coze)
|
||||
│ │ └── session.js # 会话历史 & 模式切换
|
||||
│ │ ├── voice.js # 语音配置、直连会话、知识库查询
|
||||
│ │ ├── chat.js # 文字对话(Coze SSE 流式)
|
||||
│ │ └── session.js # 会话列表、历史、删除、模式切换
|
||||
│ ├── services/
|
||||
│ │ ├── volcengine.js # 火山引擎 OpenAPI 签名调用
|
||||
│ │ ├── nativeVoiceGateway.js # WebSocket 语音网关(核心)
|
||||
│ │ ├── realtimeDialogProtocol.js # 二进制协议编解码
|
||||
│ │ ├── realtimeDialogRouting.js # 意图路由 + 工具调度
|
||||
│ │ ├── toolExecutor.js # 工具执行器
|
||||
│ │ ├── arkChatService.js # 方舟 LLM(文字场景备选)
|
||||
│ │ ├── arkChatService.js # 方舟 LLM
|
||||
│ │ └── cozeChatService.js # Coze Chat API(文字主服务)
|
||||
│ ├── config/
|
||||
│ │ ├── voiceChatConfig.js # StartVoiceChat 配置构建器
|
||||
│ │ └── tools.js # FC 工具定义(5 个工具)
|
||||
│ ├── db/index.js # MySQL CRUD
|
||||
│ ├── lib/token.js # RTC Token 生成
|
||||
│ └── .env # 环境变量
|
||||
└── ecosystem.config.js # PM2 部署配置
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. 语音通话模块
|
||||
## 4. 语音通话模块(Native WebSocket 方案)
|
||||
|
||||
### 4.1 混合编排模式(OutputMode=1)
|
||||
### 4.1 混合编排模式
|
||||
|
||||
S2S 端到端模型处理普通闲聊(低延迟 ~300-800ms),方舟 LLM 同时决策是否需要调用工具。两者并行运行。
|
||||
S2S 端到端模型处理普通闲聊(低延迟),方舟 LLM 同时决策是否需要调用工具。两者并行运行。
|
||||
|
||||
### 4.2 会话生命周期
|
||||
|
||||
```
|
||||
POST /prepare → 创建房间 + 生成 RTC Token + 分配 TaskId
|
||||
客户端 WebSocket 连接 → /ws/realtime-dialog
|
||||
↓
|
||||
客户端 joinRoom() → 用户进入 RTC 房间、开启麦克风
|
||||
nativeVoiceGateway 创建会话 → 连接上游 Realtime Dialog 服务
|
||||
↓
|
||||
POST /start → 构建配置 → StartVoiceChat API → AI Bot 进房
|
||||
客户端采集麦克风 PCM → 发送音频帧 → 上游 ASR + S2S
|
||||
↓
|
||||
实时语音对话(S2S 直接回复 + LLM 工具决策)
|
||||
上游返回字幕/音频 → realtimeDialogRouting 意图路由
|
||||
↓
|
||||
POST /stop → StopVoiceChat API → 返回字幕 → 可切换文字模式
|
||||
工具调用或闲聊回复 → ChatTTSText 注入语音流 → 客户端播放
|
||||
↓
|
||||
客户端断开 WebSocket → 会话结束 → 可切换文字模式
|
||||
```
|
||||
|
||||
### 4.3 语音 API 端点(voice.js)
|
||||
### 4.3 语音相关端点
|
||||
|
||||
| 端点 | 方法 | 说明 |
|
||||
| 端点 | 类型 | 说明 |
|
||||
|------|------|------|
|
||||
| `/ws/realtime-dialog` | WebSocket | **核心**,Native 语音网关 |
|
||||
| `/api/voice/config` | GET | 获取模型、音色列表 |
|
||||
| `/api/voice/prepare` | POST | 创建房间、生成 Token |
|
||||
| `/api/voice/start` | POST | 启动 AI 语音对话 |
|
||||
| `/api/voice/stop` | POST | 停止对话、返回字幕 |
|
||||
| `/api/voice/fc_callback` | POST | FC 回调(RTC 服务端→服务端) |
|
||||
| `/api/voice/subtitle` | POST | 客户端转发确认字幕 |
|
||||
| `/api/voice/room_message` | POST | 客户端转发 RTC 房间消息 |
|
||||
| `/api/voice/direct/session` | POST | 创建直连会话 |
|
||||
| `/api/voice/direct/message` | POST | 添加消息 |
|
||||
| `/api/voice/direct/query` | POST | 知识库直接查询 |
|
||||
| `/api/voice/stop` | POST | 停止会话 |
|
||||
|
||||
### 4.4 内存数据映射
|
||||
### 4.4 核心服务文件
|
||||
|
||||
voice.js 维护以下 Map 用于会话状态关联:
|
||||
|
||||
| Map 名称 | Key | Value | 用途 |
|
||||
|----------|-----|-------|------|
|
||||
| `activeSessions` | sessionId | 完整会话对象 | 会话生命周期管理 |
|
||||
| `roomToBotUserId` | roomId | botUserId | FC 回调→UpdateVoiceChat |
|
||||
| `roomToHumanUserId` | roomId | userId | 日志追踪 |
|
||||
| `roomToSessionId` | roomId | sessionId | DB 写入关联 |
|
||||
| `roomToTaskId` | roomId | taskId | **UpdateVoiceChat 必须用此 TaskId** |
|
||||
| `latestUserSpeech` | roomId | {text, timestamp} | FC 参数解析兜底 |
|
||||
| `toolCallBuffers` | TaskID | buffer 对象 | FC chunk 收集 |
|
||||
| 文件 | 职责 |
|
||||
|------|------|
|
||||
| `nativeVoiceGateway.js` | WebSocket 网关,管理客户端↔上游连接、音频流转发、消息持久化 |
|
||||
| `realtimeDialogProtocol.js` | 二进制协议编解码(消息类型、标志位、序列化/反序列化) |
|
||||
| `realtimeDialogRouting.js` | 意图路由(规则+LLM 双层决策)、工具调度、语音播报 |
|
||||
|
||||
---
|
||||
|
||||
## 5. Function Calling 回调处理(核心)
|
||||
## 5. 语音意图路由与工具调用
|
||||
|
||||
### 5.1 数据流
|
||||
|
||||
```
|
||||
RTC 服务 (LLM 触发 tool_call)
|
||||
│ HTTP POST(无 Content-Type,body 为 JSON)
|
||||
用户语音 ASR 识别文本
|
||||
│ 上游 Realtime Dialog 服务返回
|
||||
▼
|
||||
app.js 手动读取 raw body → JSON.parse → 分配 _seq 序列号
|
||||
nativeVoiceGateway 接收字幕 → 持久化用户语音到 DB
|
||||
▼
|
||||
realtimeDialogRouting.resolveReply()
|
||||
│
|
||||
├─ 规则路由:时间/天气/订单/计算 → 直接工具调用
|
||||
├─ search_knowledge → 方舟知识库 API
|
||||
└─ chat → 方舟 LLM 闲聊回复
|
||||
│
|
||||
▼
|
||||
voice.js fc_callback 路由
|
||||
│
|
||||
├─ FormatA: Type="tool_calls" → OpenAI 格式数组,流式 chunk
|
||||
├─ FormatB: Type="information" → RTC 原生格式
|
||||
└─ FormatC: 会话状态回调 → 记录日志
|
||||
│
|
||||
▼(FormatA 为主)
|
||||
chunk 缓冲收集(toolCallBuffers Map,1s 超时触发)
|
||||
│
|
||||
▼ 1s 后
|
||||
参数解析尝试链:
|
||||
① JSON.parse(拼接 chunks)
|
||||
② latestUserSpeech(ASR 用户语音兜底)
|
||||
③ extractReadableText(从 chunks 提取中文字符)
|
||||
│
|
||||
工具结果/LLM回复 → ChatTTSText 注入上游语音流
|
||||
▼
|
||||
发送 interrupt 打断 S2S 直接回复
|
||||
│ UpdateVoiceChat({ Command: "interrupt", TaskId: sessionTaskId })
|
||||
▼
|
||||
执行工具(toolExecutor.js)
|
||||
│ search_knowledge: 方舟 KB(30s) → 本地 KB
|
||||
▼
|
||||
回传结果
|
||||
│ UpdateVoiceChat({
|
||||
│ Command: "function",
|
||||
│ TaskId: sessionTaskId, ← 必须是 StartVoiceChat 的 TaskId
|
||||
│ Message: JSON.stringify({ ToolCallID, Content })
|
||||
│ })
|
||||
▼
|
||||
AI 用知识库内容语音回复
|
||||
AI 语音播报 + 持久化到 DB
|
||||
```
|
||||
|
||||
### 5.2 关键设计决策
|
||||
### 5.2 关键设计
|
||||
|
||||
| 问题 | 解决方案 |
|
||||
|------|----------|
|
||||
| FC 回调无 Content-Type | app.js 在 express.json() 之前手动读取 raw body |
|
||||
| Chunk 乱序且不完整 | 1s 定时器收集全部 chunks 后拼接 |
|
||||
| JSON.parse 失败 | 用 ASR 用户语音文本作为查询参数(方案B) |
|
||||
| S2S 直接回复覆盖 FC 结果 | 先发 interrupt 打断,再发 function 结果 |
|
||||
| TaskId 不匹配 | roomToTaskId 存储 StartVoiceChat 的 TaskId |
|
||||
| HTTP 响应超时 | 立即返回 200,异步执行工具 |
|
||||
|
||||
### 5.3 用户语音文本获取(方案B)
|
||||
|
||||
FC 回调的 arguments 经常乱序无法解析,因此需要从其他途径获取用户的原始问题:
|
||||
|
||||
```
|
||||
客户端 RTC SDK
|
||||
│ onSubtitleMessageReceived / onRoomBinaryMessageReceived
|
||||
▼
|
||||
useVoiceChat.js
|
||||
│ 转发 definite=true 的用户字幕
|
||||
▼
|
||||
POST /api/voice/subtitle → latestUserSpeech.set(roomId, text)
|
||||
POST /api/voice/room_message → 解析二进制消息中的字幕数据
|
||||
```
|
||||
| 意图识别 | 规则路由 + LLM 路由双层决策 |
|
||||
| S2S 与工具冲突 | ChatTTSText 注入工具结果覆盖 S2S 直连回复 |
|
||||
| 音频编解码 | realtimeDialogProtocol 自定义二进制协议 |
|
||||
| 语音文本分段 | 按标点切分,估算播报时长,分段 TTS |
|
||||
|
||||
---
|
||||
|
||||
@@ -282,12 +241,14 @@ SSE 流式返回 → 前端逐字展示
|
||||
| `chat_user` | 文字对话用户输入 |
|
||||
| `chat_bot` | 文字对话 AI 回复 |
|
||||
|
||||
### 7.3 模式切换 API(session.js)
|
||||
### 7.3 会话管理 API(session.js)
|
||||
|
||||
| 端点 | 方法 | 说明 |
|
||||
|------|------|------|
|
||||
| `/api/session/list` | GET | 获取会话列表(带最后消息预览) |
|
||||
| `/api/session/:id/history` | GET | 获取完整历史(支持 llm/full 格式) |
|
||||
| `/api/session/:id/switch` | POST | 切换模式,返回上下文历史 |
|
||||
| `/api/session/:id` | DELETE | 删除会话及其消息 |
|
||||
|
||||
### 7.4 数据库表
|
||||
|
||||
@@ -302,30 +263,32 @@ SSE 流式返回 → 前端逐字展示
|
||||
### 8.1 组件树
|
||||
|
||||
```
|
||||
App.jsx # 模式切换 + 全局设置
|
||||
App.jsx # 模式切换 + 全局设置 + 会话管理
|
||||
├── VoicePanel.jsx # 语音通话 UI(开始/结束/静音/时长)
|
||||
│ └── SubtitleDisplay # 实时字幕(definite/interim 区分)
|
||||
├── ChatPanel.jsx # 文字对话 UI(消息列表 + SSE 流式显示)
|
||||
├── SessionHistoryPanel.jsx # 会话历史侧边栏(新建/切换/删除会话)
|
||||
└── SettingsPanel.jsx # 设置面板(模型/音色/系统角色/VAD)
|
||||
```
|
||||
|
||||
### 8.2 useVoiceChat Hook
|
||||
### 8.2 useNativeVoiceChat Hook
|
||||
|
||||
管理语音通话完整生命周期:
|
||||
管理 Native WebSocket 语音通话完整生命周期:
|
||||
|
||||
- **start(options)**:prepare → joinRoom → startVoiceChat
|
||||
- **stop()**:leaveRoom → stopVoiceChat → 返回字幕
|
||||
- **start(options)**:连接 WebSocket → 采集麦克风 PCM → 发送音频帧
|
||||
- **stop()**:断开 WebSocket → 返回字幕和 sessionId
|
||||
- **toggleMute()**:静音/取消静音
|
||||
- **状态**:isActive, isMuted, isConnecting, subtitles, duration, error
|
||||
|
||||
### 8.3 rtcService.js
|
||||
### 8.3 nativeVoiceService.js
|
||||
|
||||
封装 @volcengine/rtc SDK:
|
||||
封装 Native WebSocket 语音连接:
|
||||
|
||||
- **init(appId)**:创建引擎、注册事件监听
|
||||
- **joinRoom()**:入房 + 开始音频采集 + 启用字幕
|
||||
- **事件监听**:字幕(onSubtitleMessageReceived)、房间消息(binary/text)、诊断(音量/流)
|
||||
- **方案B 消息转发**:所有房间消息 → onRoomMessage 回调 → useVoiceChat → 后端
|
||||
- **connect(options)**:建立 WebSocket 连接,初始化音频采集(getUserMedia + Web Audio API 降采样)
|
||||
- **disconnect()**:关闭连接、停止音频
|
||||
- **setMuted(muted)**:控制麦克风
|
||||
- **事件回调**:onSubtitle(字幕)、onConnectionStateChange(连接状态)、onError(错误)
|
||||
- **音频播放**:接收 PCM 音频帧,通过 Web Audio API 播放
|
||||
|
||||
---
|
||||
|
||||
@@ -335,10 +298,6 @@ App.jsx # 模式切换 + 全局设置
|
||||
|
||||
| 变量 | 说明 |
|
||||
|------|------|
|
||||
| `VOLC_RTC_APP_ID` | RTC 应用 ID |
|
||||
| `VOLC_RTC_APP_KEY` | RTC 应用密钥(生成 Token) |
|
||||
| `VOLC_ACCESS_KEY_ID` | 火山引擎 AK(API 签名) |
|
||||
| `VOLC_SECRET_ACCESS_KEY` | 火山引擎 SK |
|
||||
| `VOLC_S2S_APP_ID` | S2S 端到端语音 AppID |
|
||||
| `VOLC_S2S_TOKEN` | S2S Token |
|
||||
| `VOLC_ARK_ENDPOINT_ID` | 方舟 LLM 推理接入点 ID |
|
||||
@@ -352,8 +311,8 @@ App.jsx # 模式切换 + 全局设置
|
||||
| `VOLC_ARK_KNOWLEDGE_BASE_IDS` | 方舟私域知识库数据集 ID(逗号分隔) |
|
||||
| `VOLC_ARK_API_KEY` | 方舟 API Key |
|
||||
| `VOLC_WEBSEARCH_API_KEY` | 联网搜索 Key |
|
||||
| `FC_SERVER_URL` | FC 回调地址 |
|
||||
| `FC_SIGNATURE` | FC 回调签名 |
|
||||
| `VOLC_S2S_SPEAKER_ID` | 自定义音色 ID |
|
||||
| `ENABLE_NATIVE_VOICE_GATEWAY` | 语音网关开关(默认开启,设 false 关闭) |
|
||||
| `MYSQL_HOST/PORT/USER/PASSWORD/DATABASE` | MySQL 配置 |
|
||||
|
||||
---
|
||||
@@ -363,13 +322,13 @@ App.jsx # 模式切换 + 全局设置
|
||||
```
|
||||
互联网用户
|
||||
│
|
||||
▼ HTTPS (443)
|
||||
▼ HTTPS (443) + WSS
|
||||
┌──────────────┐
|
||||
│ Nginx │ ← 宝塔面板管理
|
||||
│ (反向代理) │
|
||||
│ SSL 终止 │
|
||||
└──────┬───────┘
|
||||
│ http://localhost:3012
|
||||
│ http://localhost:3001
|
||||
▼
|
||||
┌──────────────┐
|
||||
│ PM2 │ ← ecosystem.config.js
|
||||
@@ -378,7 +337,7 @@ App.jsx # 模式切换 + 全局设置
|
||||
└──────┬───────┘
|
||||
│
|
||||
├── MySQL 8 (localhost:3306)
|
||||
├── 火山引擎 RTC API (rtc.volcengineapi.com)
|
||||
├── 火山 Realtime Dialog (wss://openspeech.bytedance.com)
|
||||
├── 方舟 LLM API (ark.cn-beijing.volces.com)
|
||||
└── Coze API (api.coze.cn)
|
||||
```
|
||||
@@ -433,8 +392,8 @@ App.jsx # 模式切换 + 全局设置
|
||||
|
||||
## 13. 已知问题与优化方向
|
||||
|
||||
1. **FC 响应延迟**:用户提问到 AI 用知识库回答约需 12-15s(LLM 决策 ~8s + KB 查询 ~5s),期间有静默
|
||||
2. **Chunk 乱序**:RTC FC 回调的 tool_call arguments 被拆成单字符 chunk 且乱序,只能靠 ASR 文本兜底
|
||||
3. **S2S 与 LLM 并行冲突**:S2S 会先给出直接回复,需 interrupt 打断后再发 FC 结果
|
||||
4. **Mock 工具**:天气和订单工具目前为 Mock 数据,可接入真实 API
|
||||
1. **天气和订单工具为 Mock**:queryWeather 和 queryOrder 使用硬编码数据,可接入真实 API
|
||||
2. **本地知识库简陋**:searchLocalKnowledge 仅 5 条硬编码记录,需接入真实知识库
|
||||
3. **方舟 LLM Mock 模式**:未配置 VOLC_ARK_ENDPOINT_ID 时返回硬编码回复
|
||||
4. **文字对话依赖 Coze**:Coze 未配置时文字对话模式不可用,可考虑 fallback 到 arkChatService
|
||||
5. **知识库冷启动**:方舟 KB 首次查询较慢(~10s),后续查询 ~3-5s
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import { useState, useEffect, useCallback, useRef } from 'react';
|
||||
import { Settings2, Zap, Mic, MessageSquare } from 'lucide-react';
|
||||
import { Settings2, Zap, Mic, MessageSquare, History, Plus } from 'lucide-react';
|
||||
import VoicePanel from './components/VoicePanel';
|
||||
import ChatPanel from './components/ChatPanel';
|
||||
import SettingsPanel from './components/SettingsPanel';
|
||||
import { getVoiceConfig } from './services/voiceApi';
|
||||
import SessionHistoryPanel from './components/SessionHistoryPanel';
|
||||
|
||||
export default function App() {
|
||||
const [showSettings, setShowSettings] = useState(false);
|
||||
const [showHistory, setShowHistory] = useState(false);
|
||||
const [voiceConfig, setVoiceConfig] = useState(null);
|
||||
// 'voice' | 'chat'
|
||||
const [mode, setMode] = useState('voice');
|
||||
@@ -20,6 +22,7 @@ export default function App() {
|
||||
botName: '小智',
|
||||
systemRole: '你是一个友善的智能助手,名叫小智。你擅长帮用户解答各类问题。',
|
||||
speakingStyle: '请使用温和、清晰的口吻。',
|
||||
greetingText: '你好,我是你的智能语音助手,有什么可以帮你的吗?',
|
||||
modelVersion: '1.2.1.0',
|
||||
speaker: 'zh_female_vv_jupiter_bigtts',
|
||||
enableWebSearch: false,
|
||||
@@ -51,16 +54,47 @@ export default function App() {
|
||||
setMode('voice');
|
||||
}, [currentSessionId]);
|
||||
|
||||
// 直接进入文字模式(新会话)
|
||||
// 切换到文字模式(复用已有 sessionId,没有时新建)
|
||||
const handleStartChat = useCallback(() => {
|
||||
const newSid = `chat_${Date.now().toString(36)}`;
|
||||
setCurrentSessionId(newSid);
|
||||
const sid = currentSessionId || `chat_${Date.now().toString(36)}`;
|
||||
setCurrentSessionId(sid);
|
||||
setHandoff({
|
||||
sessionId: newSid,
|
||||
sessionId: sid,
|
||||
subtitles: [],
|
||||
});
|
||||
setMode('chat');
|
||||
console.log(`[App] New chat session: ${newSid}`);
|
||||
console.log(`[App] Switch to chat, sessionId=${sid}`);
|
||||
}, [currentSessionId]);
|
||||
|
||||
// 语音会话创建时同步 sessionId 到 App 状态
|
||||
const handleSessionCreated = useCallback((sessionId) => {
|
||||
if (sessionId && sessionId !== currentSessionId) {
|
||||
setCurrentSessionId(sessionId);
|
||||
console.log(`[App] Voice session synced: ${sessionId}`);
|
||||
}
|
||||
}, [currentSessionId]);
|
||||
|
||||
// 新建会话:重置所有状态
|
||||
const handleNewSession = useCallback(() => {
|
||||
setCurrentSessionId(null);
|
||||
setHandoff(null);
|
||||
setChatMessages([]);
|
||||
setMode('voice');
|
||||
console.log('[App] New session created');
|
||||
}, []);
|
||||
|
||||
// 从历史记录中选择会话
|
||||
const handleSelectSession = useCallback((session) => {
|
||||
const sid = session.id;
|
||||
setCurrentSessionId(sid);
|
||||
setChatMessages([]);
|
||||
// 根据会话最后的模式决定打开方式,默认用文字模式查看历史
|
||||
setHandoff({
|
||||
sessionId: sid,
|
||||
subtitles: [],
|
||||
});
|
||||
setMode('chat');
|
||||
console.log(`[App] Selected session: ${sid}, mode: ${session.mode}`);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
@@ -78,7 +112,7 @@ export default function App() {
|
||||
</h1>
|
||||
<p className="text-[11px] text-slate-400 leading-tight">
|
||||
{mode === 'voice'
|
||||
? '混合编排模式 · OutputMode=1'
|
||||
? '直连 S2S 语音 · ChatTTSText'
|
||||
: handoff?.subtitles?.length > 0
|
||||
? '语音转接 · 上下文已延续'
|
||||
: '方舟 LLM · Function Calling'}
|
||||
@@ -86,6 +120,22 @@ export default function App() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
{/* History button */}
|
||||
<button
|
||||
onClick={() => setShowHistory(true)}
|
||||
className="p-2 rounded-lg hover:bg-slate-700/50 text-slate-400 hover:text-white transition-colors mr-1"
|
||||
title="会话历史"
|
||||
>
|
||||
<History className="w-4 h-4" />
|
||||
</button>
|
||||
{/* New session button */}
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="p-2 rounded-lg hover:bg-slate-700/50 text-slate-400 hover:text-white transition-colors"
|
||||
title="新建会话"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
</button>
|
||||
{/* Mode toggle buttons */}
|
||||
<div className="flex items-center bg-slate-800/60 rounded-lg border border-slate-700/40 p-0.5 mr-2">
|
||||
<button
|
||||
@@ -136,23 +186,23 @@ export default function App() {
|
||||
{mode === 'voice' ? (
|
||||
<>
|
||||
{/* Voice Panel */}
|
||||
<VoicePanel settings={settings} onVoiceEnd={handleVoiceEnd} chatHistory={chatMessages} sessionId={currentSessionId} />
|
||||
<VoicePanel settings={settings} onVoiceEnd={handleVoiceEnd} chatHistory={chatMessages} sessionId={currentSessionId} onSessionCreated={handleSessionCreated} />
|
||||
|
||||
{/* Architecture Info */}
|
||||
<div className="mt-6 p-4 rounded-xl bg-slate-800/40 border border-slate-700/40">
|
||||
<h3 className="text-xs font-semibold text-slate-400 uppercase tracking-wider mb-3">方案B 混合编排架构</h3>
|
||||
<h3 className="text-xs font-semibold text-slate-400 uppercase tracking-wider mb-3">RTC 直路由语音架构</h3>
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-3 text-xs">
|
||||
<div className="p-3 rounded-lg bg-slate-700/30 border border-slate-600/30">
|
||||
<div className="text-emerald-400 font-medium mb-1">闲聊场景</div>
|
||||
<div className="text-slate-400">端到端模型直接回复 · ~300-800ms</div>
|
||||
<div className="text-emerald-400 font-medium mb-1">上行链路</div>
|
||||
<div className="text-slate-400">浏览器 RTC 麦克风 → 房间字幕/消息 → 后端前置路由</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-slate-700/30 border border-slate-600/30">
|
||||
<div className="text-amber-400 font-medium mb-1">工具调用场景</div>
|
||||
<div className="text-slate-400">LLM 决策 + Function Calling · ~1-2s</div>
|
||||
<div className="text-amber-400 font-medium mb-1">应答链路</div>
|
||||
<div className="text-slate-400">知识库/工具结果 → ExternalTextToSpeech → 语音播报</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-slate-700/30 border border-slate-600/30">
|
||||
<div className="text-violet-400 font-medium mb-1">自动切换</div>
|
||||
<div className="text-slate-400">系统自动判断走 S2S 或 LLM 分支</div>
|
||||
<div className="text-violet-400 font-medium mb-1">当前目标</div>
|
||||
<div className="text-slate-400">彻底绕开原生链纯 S2S 抢答,保证知识库结果能播报</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -161,6 +211,7 @@ export default function App() {
|
||||
/* Chat Panel */
|
||||
handoff && (
|
||||
<ChatPanel
|
||||
key={handoff.sessionId}
|
||||
sessionId={handoff.sessionId}
|
||||
voiceSubtitles={handoff.subtitles}
|
||||
settings={settings}
|
||||
@@ -170,6 +221,16 @@ export default function App() {
|
||||
)
|
||||
)}
|
||||
</main>
|
||||
|
||||
{/* Session History Sidebar */}
|
||||
{showHistory && (
|
||||
<SessionHistoryPanel
|
||||
currentSessionId={currentSessionId}
|
||||
onSelectSession={handleSelectSession}
|
||||
onNewSession={handleNewSession}
|
||||
onClose={() => setShowHistory(false)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
170
test2/client/src/components/SessionHistoryPanel.jsx
Normal file
170
test2/client/src/components/SessionHistoryPanel.jsx
Normal file
@@ -0,0 +1,170 @@
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import { X, Plus, Trash2, MessageSquare, Mic, Clock, Loader2 } from 'lucide-react';
|
||||
import { getSessionList, deleteSessionById } from '../services/voiceApi';
|
||||
|
||||
export default function SessionHistoryPanel({ currentSessionId, onSelectSession, onNewSession, onClose }) {
|
||||
const [sessions, setSessions] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [deletingId, setDeletingId] = useState(null);
|
||||
|
||||
const loadSessions = useCallback(async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const list = await getSessionList(null, 50);
|
||||
setSessions(list || []);
|
||||
} catch (err) {
|
||||
console.warn('[SessionHistory] Load failed:', err.message);
|
||||
setSessions([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
loadSessions();
|
||||
}, [loadSessions]);
|
||||
|
||||
const handleDelete = async (e, sessionId) => {
|
||||
e.stopPropagation();
|
||||
if (deletingId) return;
|
||||
setDeletingId(sessionId);
|
||||
try {
|
||||
await deleteSessionById(sessionId);
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sessionId));
|
||||
} catch (err) {
|
||||
console.warn('[SessionHistory] Delete failed:', err.message);
|
||||
} finally {
|
||||
setDeletingId(null);
|
||||
}
|
||||
};
|
||||
|
||||
const formatTime = (timestamp) => {
|
||||
if (!timestamp) return '';
|
||||
const date = new Date(typeof timestamp === 'number' ? timestamp : parseInt(timestamp));
|
||||
const now = new Date();
|
||||
const diffMs = now - date;
|
||||
const diffMin = Math.floor(diffMs / 60000);
|
||||
if (diffMin < 1) return '刚刚';
|
||||
if (diffMin < 60) return `${diffMin}分钟前`;
|
||||
const diffHour = Math.floor(diffMin / 60);
|
||||
if (diffHour < 24) return `${diffHour}小时前`;
|
||||
const diffDay = Math.floor(diffHour / 24);
|
||||
if (diffDay < 7) return `${diffDay}天前`;
|
||||
return `${date.getMonth() + 1}/${date.getDate()}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex">
|
||||
{/* Backdrop */}
|
||||
<div className="absolute inset-0 bg-black/50 backdrop-blur-sm" onClick={onClose} />
|
||||
|
||||
{/* Sidebar */}
|
||||
<div className="relative w-80 max-w-[85vw] bg-slate-900 border-r border-slate-700/50 flex flex-col h-full shadow-2xl animate-slide-in">
|
||||
{/* Header */}
|
||||
<div className="px-4 py-3 border-b border-slate-700/40 flex items-center justify-between flex-shrink-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<Clock className="w-4 h-4 text-slate-400" />
|
||||
<h2 className="text-sm font-semibold text-white">会话历史</h2>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1.5 rounded-lg hover:bg-slate-700/50 text-slate-400 hover:text-white transition-colors"
|
||||
>
|
||||
<X className="w-4 h-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* New Session Button */}
|
||||
<div className="px-3 py-2 border-b border-slate-700/30 flex-shrink-0">
|
||||
<button
|
||||
onClick={() => {
|
||||
onNewSession();
|
||||
onClose();
|
||||
}}
|
||||
className="w-full flex items-center gap-2 px-3 py-2.5 rounded-xl bg-gradient-to-r from-violet-500/20 to-indigo-500/20 border border-violet-500/30 text-violet-300 hover:from-violet-500/30 hover:to-indigo-500/30 transition-all text-sm font-medium"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
新建会话
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="flex-1 overflow-y-auto px-3 py-2 space-y-1">
|
||||
{loading ? (
|
||||
<div className="flex items-center justify-center py-12 text-slate-500">
|
||||
<Loader2 className="w-5 h-5 animate-spin mr-2" />
|
||||
<span className="text-sm">加载中...</span>
|
||||
</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-center py-12 text-slate-500 text-sm">
|
||||
暂无会话记录
|
||||
</div>
|
||||
) : (
|
||||
sessions.map((session) => {
|
||||
const isActive = session.id === currentSessionId;
|
||||
return (
|
||||
<div
|
||||
key={session.id}
|
||||
onClick={() => {
|
||||
onSelectSession(session);
|
||||
onClose();
|
||||
}}
|
||||
className={`group relative flex items-start gap-2.5 px-3 py-2.5 rounded-xl cursor-pointer transition-all ${
|
||||
isActive
|
||||
? 'bg-violet-500/15 border border-violet-500/30'
|
||||
: 'hover:bg-slate-800/60 border border-transparent'
|
||||
}`}
|
||||
>
|
||||
{/* Mode icon */}
|
||||
<div className={`w-7 h-7 rounded-full flex items-center justify-center flex-shrink-0 mt-0.5 ${
|
||||
session.mode === 'voice' ? 'bg-emerald-500/15' : 'bg-indigo-500/15'
|
||||
}`}>
|
||||
{session.mode === 'voice' ? (
|
||||
<Mic className={`w-3.5 h-3.5 ${isActive ? 'text-emerald-400' : 'text-emerald-500/70'}`} />
|
||||
) : (
|
||||
<MessageSquare className={`w-3.5 h-3.5 ${isActive ? 'text-indigo-400' : 'text-indigo-500/70'}`} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center justify-between gap-1">
|
||||
<span className={`text-xs font-medium truncate ${isActive ? 'text-violet-300' : 'text-slate-300'}`}>
|
||||
{session.lastMessage || (session.mode === 'voice' ? '语音会话' : '文字会话')}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 mt-0.5">
|
||||
<span className="text-[10px] text-slate-500">
|
||||
{formatTime(session.updatedAt)}
|
||||
</span>
|
||||
{session.messageCount > 0 && (
|
||||
<span className="text-[10px] text-slate-600">
|
||||
{session.messageCount} 条消息
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Delete button */}
|
||||
{!isActive && (
|
||||
<button
|
||||
onClick={(e) => handleDelete(e, session.id)}
|
||||
className="opacity-0 group-hover:opacity-100 p-1 rounded-md hover:bg-red-500/20 text-slate-600 hover:text-red-400 transition-all flex-shrink-0"
|
||||
title="删除会话"
|
||||
>
|
||||
{deletingId === session.id ? (
|
||||
<Loader2 className="w-3.5 h-3.5 animate-spin" />
|
||||
) : (
|
||||
<Trash2 className="w-3.5 h-3.5" />
|
||||
)}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -80,6 +80,16 @@ export default function SettingsPanel({ settings, onChange, voiceConfig, onClose
|
||||
className="w-full px-3 py-1.5 rounded-lg bg-slate-700/50 border border-slate-600/40 text-sm text-white placeholder-slate-500 focus:outline-none focus:border-violet-500/50"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-xs text-slate-500 mb-1">欢迎语</label>
|
||||
<textarea
|
||||
value={settings.greetingText}
|
||||
onChange={(e) => update('greetingText', e.target.value)}
|
||||
rows={2}
|
||||
className="w-full px-3 py-1.5 rounded-lg bg-slate-700/50 border border-slate-600/40 text-sm text-white placeholder-slate-500 focus:outline-none focus:border-violet-500/50 resize-none"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 音色 & 模型 */}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { Mic, MicOff, Phone, PhoneOff, Loader2, MessageSquare } from 'lucide-react';
|
||||
import { useVoiceChat } from '../hooks/useVoiceChat';
|
||||
import { useNativeVoiceChat } from '../hooks/useNativeVoiceChat';
|
||||
import SubtitleDisplay from './SubtitleDisplay';
|
||||
|
||||
export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], sessionId: parentSessionId }) {
|
||||
export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], sessionId: parentSessionId, onSessionCreated }) {
|
||||
const {
|
||||
isActive,
|
||||
isMuted,
|
||||
@@ -15,7 +15,7 @@ export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], ses
|
||||
stop,
|
||||
toggleMute,
|
||||
clearError,
|
||||
} = useVoiceChat();
|
||||
} = useNativeVoiceChat();
|
||||
|
||||
const formatTime = (s) => {
|
||||
const m = Math.floor(s / 60);
|
||||
@@ -23,17 +23,21 @@ export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], ses
|
||||
return `${m.toString().padStart(2, '0')}:${sec.toString().padStart(2, '0')}`;
|
||||
};
|
||||
|
||||
const handleStart = () => {
|
||||
start({
|
||||
const handleStart = async () => {
|
||||
const result = await start({
|
||||
botName: settings.botName,
|
||||
systemRole: settings.systemRole,
|
||||
speakingStyle: settings.speakingStyle,
|
||||
greetingText: settings.greetingText,
|
||||
modelVersion: settings.modelVersion,
|
||||
speaker: settings.speaker,
|
||||
enableWebSearch: settings.enableWebSearch,
|
||||
chatHistory: chatHistory.length > 0 ? chatHistory.slice(-10) : undefined,
|
||||
parentSessionId,
|
||||
});
|
||||
if (result?.sessionId && onSessionCreated) {
|
||||
onSessionCreated(result.sessionId);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
|
||||
148
test2/client/src/hooks/useNativeVoiceChat.js
Normal file
148
test2/client/src/hooks/useNativeVoiceChat.js
Normal file
@@ -0,0 +1,148 @@
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import nativeVoiceService from '../services/nativeVoiceService';
|
||||
|
||||
export function useNativeVoiceChat() {
|
||||
const [isActive, setIsActive] = useState(false);
|
||||
const [isMuted, setIsMuted] = useState(false);
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [subtitles, setSubtitles] = useState([]);
|
||||
const [connectionState, setConnectionState] = useState('disconnected');
|
||||
const [error, setError] = useState(null);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const sessionRef = useRef(null);
|
||||
const timerRef = useRef(null);
|
||||
const greetingUtteranceRef = useRef(null);
|
||||
|
||||
const stopGreeting = useCallback(() => {
|
||||
if (typeof window !== 'undefined' && 'speechSynthesis' in window) {
|
||||
window.speechSynthesis.cancel();
|
||||
}
|
||||
greetingUtteranceRef.current = null;
|
||||
}, []);
|
||||
|
||||
const playGreeting = useCallback((text) => {
|
||||
const greetingText = String(text || '').trim();
|
||||
if (!greetingText || typeof window === 'undefined' || !('speechSynthesis' in window) || typeof window.SpeechSynthesisUtterance === 'undefined') {
|
||||
return;
|
||||
}
|
||||
stopGreeting();
|
||||
const utterance = new window.SpeechSynthesisUtterance(greetingText);
|
||||
utterance.lang = 'zh-CN';
|
||||
utterance.rate = 1;
|
||||
utterance.pitch = 1;
|
||||
greetingUtteranceRef.current = utterance;
|
||||
window.speechSynthesis.speak(utterance);
|
||||
}, [stopGreeting]);
|
||||
|
||||
useEffect(() => {
|
||||
nativeVoiceService.on('onSubtitle', (subtitle) => {
|
||||
setSubtitles((prev) => {
|
||||
if (subtitle.isFinal) {
|
||||
const isDup = prev.some((s) => s.isFinal && s.role === subtitle.role && s.text === subtitle.text);
|
||||
if (isDup) return prev;
|
||||
return [...prev.filter((s) => s.isFinal || s.role !== subtitle.role), subtitle];
|
||||
}
|
||||
const finals = prev.filter((s) => s.isFinal);
|
||||
return [...finals, subtitle];
|
||||
});
|
||||
});
|
||||
|
||||
nativeVoiceService.on('onConnectionStateChange', setConnectionState);
|
||||
nativeVoiceService.on('onError', (err) => setError(err?.message || 'Native voice error'));
|
||||
|
||||
return () => {
|
||||
stopGreeting();
|
||||
nativeVoiceService.disconnect();
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
};
|
||||
}, [stopGreeting]);
|
||||
|
||||
const start = useCallback(async (options = {}) => {
|
||||
setError(null);
|
||||
setIsConnecting(true);
|
||||
|
||||
try {
|
||||
const userId = `user_${Date.now().toString(36)}`;
|
||||
const sessionId = options.parentSessionId || `native_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`;
|
||||
sessionRef.current = { sessionId, userId };
|
||||
|
||||
await nativeVoiceService.connect({
|
||||
sessionId,
|
||||
userId,
|
||||
botName: options.botName,
|
||||
systemRole: options.systemRole,
|
||||
speakingStyle: options.speakingStyle,
|
||||
modelVersion: options.modelVersion,
|
||||
speaker: options.speaker,
|
||||
});
|
||||
|
||||
setIsActive(true);
|
||||
setSubtitles([]);
|
||||
setDuration(0);
|
||||
playGreeting(options.greetingText);
|
||||
timerRef.current = setInterval(() => {
|
||||
setDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
return { sessionId };
|
||||
} catch (err) {
|
||||
console.error('[useNativeVoiceChat] Start failed:', err);
|
||||
setError(err.message || 'Failed to start native voice chat');
|
||||
await nativeVoiceService.disconnect();
|
||||
sessionRef.current = null;
|
||||
return null;
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
let result = { sessionId: null, subtitles: [] };
|
||||
try {
|
||||
const localFinalSubtitles = subtitles.filter((s) => s.isFinal);
|
||||
if (sessionRef.current) {
|
||||
result = {
|
||||
sessionId: sessionRef.current.sessionId,
|
||||
subtitles: localFinalSubtitles,
|
||||
};
|
||||
sessionRef.current = null;
|
||||
}
|
||||
|
||||
await nativeVoiceService.disconnect();
|
||||
stopGreeting();
|
||||
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
|
||||
setIsActive(false);
|
||||
setIsMuted(false);
|
||||
setConnectionState('disconnected');
|
||||
} catch (err) {
|
||||
console.error('[useNativeVoiceChat] Stop failed:', err);
|
||||
}
|
||||
return result;
|
||||
}, [stopGreeting, subtitles]);
|
||||
|
||||
const toggleMute = useCallback(async () => {
|
||||
const next = !isMuted;
|
||||
await nativeVoiceService.setMuted(next);
|
||||
setIsMuted(next);
|
||||
}, [isMuted]);
|
||||
|
||||
const clearError = useCallback(() => setError(null), []);
|
||||
|
||||
return {
|
||||
isActive,
|
||||
isMuted,
|
||||
isConnecting,
|
||||
subtitles,
|
||||
connectionState,
|
||||
error,
|
||||
duration,
|
||||
start,
|
||||
stop,
|
||||
toggleMute,
|
||||
clearError,
|
||||
};
|
||||
}
|
||||
@@ -1,198 +0,0 @@
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import rtcService from '../services/rtcService';
|
||||
import { prepareVoiceChat, startVoiceChat, stopVoiceChat, executeToolCall, executeFcCallback, sendSubtitle, forwardRoomMessage } from '../services/voiceApi';
|
||||
|
||||
export function useVoiceChat() {
|
||||
const [isActive, setIsActive] = useState(false);
|
||||
const [isMuted, setIsMuted] = useState(false);
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [subtitles, setSubtitles] = useState([]);
|
||||
const [connectionState, setConnectionState] = useState('disconnected');
|
||||
const [error, setError] = useState(null);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const sessionRef = useRef(null);
|
||||
const timerRef = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
rtcService.on('onSubtitle', (subtitle) => {
|
||||
setSubtitles((prev) => {
|
||||
if (subtitle.isFinal) {
|
||||
return [...prev.filter((s) => s.sequence !== subtitle.sequence), subtitle];
|
||||
}
|
||||
const idx = prev.findIndex((s) => s.sequence === subtitle.sequence && !s.isFinal);
|
||||
if (idx >= 0) {
|
||||
const updated = [...prev];
|
||||
updated[idx] = subtitle;
|
||||
return updated;
|
||||
}
|
||||
return [...prev, subtitle];
|
||||
});
|
||||
|
||||
// 方案B:将用户最终字幕转发到后端,供 FC 回调时作为知识库查询
|
||||
if (subtitle.isFinal && subtitle.role === 'user' && subtitle.text) {
|
||||
const session = sessionRef.current;
|
||||
if (session) {
|
||||
sendSubtitle({
|
||||
sessionId: session.sessionId,
|
||||
roomId: session.roomId,
|
||||
text: subtitle.text,
|
||||
role: 'user',
|
||||
definite: true,
|
||||
sequence: subtitle.sequence,
|
||||
}).catch((err) => console.warn('[useVoiceChat] Send subtitle failed:', err.message));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
rtcService.on('onToolCall', async (toolCall) => {
|
||||
const session = sessionRef.current;
|
||||
if (!session) {
|
||||
console.warn('[useVoiceChat] Tool call received but no active session');
|
||||
return;
|
||||
}
|
||||
console.log(`[useVoiceChat] Tool call: ${toolCall.function_name}, session: ${session.sessionId}`);
|
||||
try {
|
||||
// 构建FC回调消息格式
|
||||
const message = JSON.stringify([{
|
||||
id: toolCall.tool_call_id,
|
||||
function: {
|
||||
name: toolCall.function_name,
|
||||
arguments: toolCall.arguments
|
||||
},
|
||||
seq: 1
|
||||
}]);
|
||||
|
||||
// 调用fc_callback端点,传递必要的参数
|
||||
const result = await executeFcCallback({
|
||||
roomId: session.roomId,
|
||||
taskId: session.taskId || session.sessionId,
|
||||
type: 'tool_calls',
|
||||
message: message
|
||||
});
|
||||
console.log('[useVoiceChat] FC callback result:', result);
|
||||
} catch (err) {
|
||||
console.error('[useVoiceChat] FC callback failed:', err);
|
||||
}
|
||||
});
|
||||
|
||||
// 方案B:转发所有 RTC 房间消息到后端(可能包含 ASR/会话状态数据)
|
||||
rtcService.on('onRoomMessage', (msg) => {
|
||||
const session = sessionRef.current;
|
||||
if (session && msg.text) {
|
||||
forwardRoomMessage({
|
||||
roomId: session.roomId,
|
||||
uid: msg.uid,
|
||||
text: msg.text,
|
||||
}).catch(() => {}); // 静默失败,不影响主流程
|
||||
}
|
||||
});
|
||||
|
||||
rtcService.on('onConnectionStateChange', setConnectionState);
|
||||
rtcService.on('onError', (err) => setError(err?.message || 'RTC error'));
|
||||
|
||||
return () => {
|
||||
rtcService.destroy();
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
};
|
||||
}, []);
|
||||
|
||||
const start = useCallback(async (options = {}) => {
|
||||
setError(null);
|
||||
setIsConnecting(true);
|
||||
|
||||
try {
|
||||
const userId = `user_${Date.now().toString(36)}`;
|
||||
const { parentSessionId, ...startOptions } = options;
|
||||
|
||||
// 第一步:准备房间,获取 token
|
||||
const prepareRes = await prepareVoiceChat({ userId });
|
||||
if (!prepareRes.success) throw new Error(prepareRes.error);
|
||||
|
||||
const { sessionId, roomId, taskId, rtcToken, rtcAppId } = prepareRes.data;
|
||||
sessionRef.current = { sessionId, roomId, taskId, parentSessionId };
|
||||
|
||||
// 第二步:用户先进房
|
||||
await rtcService.init(rtcAppId);
|
||||
await rtcService.joinRoom(roomId, userId, rtcToken);
|
||||
console.log('[useVoiceChat] User joined room, now starting AI...');
|
||||
|
||||
// 第三步:用户已在房间内,启动 AI 语音对话
|
||||
const startRes = await startVoiceChat({ sessionId, ...startOptions });
|
||||
if (!startRes.success) throw new Error(startRes.error);
|
||||
|
||||
setIsActive(true);
|
||||
setSubtitles([]);
|
||||
setDuration(0);
|
||||
timerRef.current = setInterval(() => {
|
||||
setDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
} catch (err) {
|
||||
console.error('[useVoiceChat] Start failed:', err);
|
||||
setError(err.message || 'Failed to start voice chat');
|
||||
rtcService.destroy();
|
||||
if (sessionRef.current) {
|
||||
stopVoiceChat(sessionRef.current.sessionId).catch(() => {});
|
||||
sessionRef.current = null;
|
||||
}
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
let result = { sessionId: null, subtitles: [] };
|
||||
try {
|
||||
// 在离开房间前,先从前端 state 中提取已确认的字幕
|
||||
const localFinalSubtitles = subtitles.filter((s) => s.isFinal);
|
||||
|
||||
await rtcService.leaveRoom();
|
||||
|
||||
if (sessionRef.current) {
|
||||
const sid = sessionRef.current.sessionId;
|
||||
const response = await stopVoiceChat(sid);
|
||||
const backendSubtitles = response?.data?.subtitles || [];
|
||||
|
||||
// 优先使用前端本地字幕(RTC 直接接收,更完整),后端字幕作为 fallback
|
||||
result = {
|
||||
sessionId: sid,
|
||||
subtitles: localFinalSubtitles.length > 0 ? localFinalSubtitles : backendSubtitles,
|
||||
};
|
||||
sessionRef.current = null;
|
||||
}
|
||||
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
|
||||
setIsActive(false);
|
||||
setIsMuted(false);
|
||||
setConnectionState('disconnected');
|
||||
} catch (err) {
|
||||
console.error('[useVoiceChat] Stop failed:', err);
|
||||
}
|
||||
return result;
|
||||
}, [subtitles]);
|
||||
|
||||
const toggleMute = useCallback(async () => {
|
||||
const next = !isMuted;
|
||||
await rtcService.setMuted(next);
|
||||
setIsMuted(next);
|
||||
}, [isMuted]);
|
||||
|
||||
const clearError = useCallback(() => setError(null), []);
|
||||
|
||||
return {
|
||||
isActive,
|
||||
isMuted,
|
||||
isConnecting,
|
||||
subtitles,
|
||||
connectionState,
|
||||
error,
|
||||
duration,
|
||||
start,
|
||||
stop,
|
||||
toggleMute,
|
||||
clearError,
|
||||
};
|
||||
}
|
||||
@@ -17,3 +17,11 @@ body {
|
||||
background: #334155;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
@keyframes slide-in {
|
||||
from { transform: translateX(-100%); }
|
||||
to { transform: translateX(0); }
|
||||
}
|
||||
.animate-slide-in {
|
||||
animation: slide-in 0.2s ease-out;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,26 @@
|
||||
import axios from 'axios';
|
||||
|
||||
function resolveApiBaseURL(configured, path) {
|
||||
if (configured) {
|
||||
return configured;
|
||||
}
|
||||
if (typeof window === 'undefined') {
|
||||
return path;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `${protocol}//${hostname || '127.0.0.1'}:3012${path}`;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
const chatApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_CHAT_API_BASE_URL, '/api/chat');
|
||||
|
||||
const api = axios.create({
|
||||
baseURL: '/api/chat',
|
||||
baseURL: chatApiBaseURL,
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
@@ -27,7 +46,7 @@ export function sendMessageStream(sessionId, message, { onChunk, onToolCall, onD
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
const response = await fetch('/api/chat/send-stream', {
|
||||
const response = await fetch(`${chatApiBaseURL}/send-stream`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, message }),
|
||||
|
||||
332
test2/client/src/services/nativeVoiceService.js
Normal file
332
test2/client/src/services/nativeVoiceService.js
Normal file
@@ -0,0 +1,332 @@
|
||||
class NativeVoiceService {
|
||||
constructor() {
|
||||
this.ws = null;
|
||||
this.mediaStream = null;
|
||||
this.captureContext = null;
|
||||
this.captureSource = null;
|
||||
this.captureProcessor = null;
|
||||
this.captureSilenceGain = null;
|
||||
this.playbackContext = null;
|
||||
this.playbackTime = 0;
|
||||
this.activeSources = new Set();
|
||||
this.pendingSamples = [];
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
this.callbacks = {
|
||||
onSubtitle: null,
|
||||
onConnectionStateChange: null,
|
||||
onError: null,
|
||||
onAssistantPending: null,
|
||||
onDiagnostic: null,
|
||||
};
|
||||
}
|
||||
|
||||
resolveWebSocketUrl(sessionId, userId) {
|
||||
const query = new URLSearchParams({
|
||||
sessionId,
|
||||
userId: userId || '',
|
||||
});
|
||||
const configuredBase = import.meta.env.VITE_VOICE_WS_BASE_URL || import.meta.env.VITE_VOICE_API_BASE_URL || '';
|
||||
if (configuredBase && !configuredBase.startsWith('/')) {
|
||||
let base = configuredBase.replace(/\/$/, '');
|
||||
if (base.startsWith('https://')) {
|
||||
base = `wss://${base.slice('https://'.length)}`;
|
||||
} else if (base.startsWith('http://')) {
|
||||
base = `ws://${base.slice('http://'.length)}`;
|
||||
}
|
||||
if (base.endsWith('/api/voice')) {
|
||||
base = base.slice(0, -'/api/voice'.length);
|
||||
} else if (base.endsWith('/api')) {
|
||||
base = base.slice(0, -'/api'.length);
|
||||
}
|
||||
return `${base}/ws/realtime-dialog?${query.toString()}`;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `ws://${hostname || '127.0.0.1'}:3012/ws/realtime-dialog?${query.toString()}`;
|
||||
}
|
||||
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
return `${protocol}//${window.location.host}/ws/realtime-dialog?${query.toString()}`;
|
||||
}
|
||||
|
||||
emitConnectionState(state) {
|
||||
this.callbacks.onConnectionStateChange?.(state);
|
||||
}
|
||||
|
||||
emitDiagnostic(type, payload) {
|
||||
this.callbacks.onDiagnostic?.({ type, payload, timestamp: Date.now() });
|
||||
}
|
||||
|
||||
resetPlaybackQueue() {
|
||||
this.activeSources.forEach((source) => {
|
||||
try {
|
||||
source.stop();
|
||||
} catch (_) {}
|
||||
try {
|
||||
source.disconnect();
|
||||
} catch (_) {}
|
||||
});
|
||||
this.activeSources.clear();
|
||||
if (this.playbackContext) {
|
||||
this.playbackTime = this.playbackContext.currentTime + 0.02;
|
||||
} else {
|
||||
this.playbackTime = 0;
|
||||
}
|
||||
}
|
||||
|
||||
async connect({ sessionId, userId, botName, systemRole, speakingStyle, modelVersion, speaker }) {
|
||||
await this.disconnect();
|
||||
const wsUrl = this.resolveWebSocketUrl(sessionId, userId);
|
||||
this.emitConnectionState('connecting');
|
||||
this.playbackContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
if (this.playbackContext.state === 'suspended') {
|
||||
await this.playbackContext.resume().catch(() => {});
|
||||
}
|
||||
this.playbackTime = this.playbackContext.currentTime;
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
this.readyResolver = resolve;
|
||||
this.readyRejector = reject;
|
||||
const ws = new WebSocket(wsUrl);
|
||||
ws.binaryType = 'arraybuffer';
|
||||
this.ws = ws;
|
||||
|
||||
ws.onopen = () => {
|
||||
this.emitConnectionState('connected');
|
||||
ws.send(JSON.stringify({
|
||||
type: 'start',
|
||||
sessionId,
|
||||
userId,
|
||||
botName,
|
||||
systemRole,
|
||||
speakingStyle,
|
||||
modelVersion,
|
||||
speaker,
|
||||
}));
|
||||
};
|
||||
|
||||
ws.onerror = () => {
|
||||
const error = new Error('WebSocket connection failed');
|
||||
this.callbacks.onError?.(error);
|
||||
this.readyRejector?.(error);
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
reject(error);
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
this.emitConnectionState('disconnected');
|
||||
if (this.readyRejector) {
|
||||
this.readyRejector(new Error('WebSocket closed before ready'));
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
}
|
||||
};
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
if (typeof event.data === 'string') {
|
||||
this.handleJsonMessage(event.data);
|
||||
return;
|
||||
}
|
||||
this.handleAudioMessage(event.data);
|
||||
};
|
||||
});
|
||||
|
||||
await this.startCapture();
|
||||
}
|
||||
|
||||
handleJsonMessage(raw) {
|
||||
try {
|
||||
const msg = JSON.parse(raw);
|
||||
if (msg.type === 'ready') {
|
||||
this.readyResolver?.();
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'subtitle') {
|
||||
this.callbacks.onSubtitle?.({
|
||||
text: msg.text,
|
||||
role: msg.role,
|
||||
isFinal: !!msg.isFinal,
|
||||
sequence: msg.sequence,
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'tts_reset') {
|
||||
this.resetPlaybackQueue();
|
||||
this.emitDiagnostic('tts_reset', msg);
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'assistant_pending') {
|
||||
this.callbacks.onAssistantPending?.(!!msg.active);
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'error') {
|
||||
this.callbacks.onError?.(new Error(msg.error || 'native voice error'));
|
||||
return;
|
||||
}
|
||||
this.emitDiagnostic('ws_message', msg);
|
||||
} catch (error) {
|
||||
this.emitDiagnostic('ws_raw_text', raw);
|
||||
}
|
||||
}
|
||||
|
||||
handleAudioMessage(arrayBuffer) {
|
||||
if (!this.playbackContext) {
|
||||
return;
|
||||
}
|
||||
const pcm16 = new Int16Array(arrayBuffer);
|
||||
if (!pcm16.length) {
|
||||
return;
|
||||
}
|
||||
const audioBuffer = this.playbackContext.createBuffer(1, pcm16.length, 24000);
|
||||
const channel = audioBuffer.getChannelData(0);
|
||||
for (let i = 0; i < pcm16.length; i += 1) {
|
||||
channel[i] = pcm16[i] / 32768;
|
||||
}
|
||||
const source = this.playbackContext.createBufferSource();
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(this.playbackContext.destination);
|
||||
this.activeSources.add(source);
|
||||
source.onended = () => {
|
||||
this.activeSources.delete(source);
|
||||
try {
|
||||
source.disconnect();
|
||||
} catch (_) {}
|
||||
};
|
||||
const now = this.playbackContext.currentTime;
|
||||
if (this.playbackTime < now) {
|
||||
this.playbackTime = now + 0.02;
|
||||
}
|
||||
source.start(this.playbackTime);
|
||||
this.playbackTime += audioBuffer.duration;
|
||||
this.emitDiagnostic('audio_chunk', { samples: pcm16.length, duration: audioBuffer.duration });
|
||||
}
|
||||
|
||||
async startCapture() {
|
||||
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
noiseSuppression: true,
|
||||
echoCancellation: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
video: false,
|
||||
});
|
||||
this.captureContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
this.captureSource = this.captureContext.createMediaStreamSource(this.mediaStream);
|
||||
this.captureProcessor = this.captureContext.createScriptProcessor(4096, 1, 1);
|
||||
this.captureSilenceGain = this.captureContext.createGain();
|
||||
this.captureSilenceGain.gain.value = 0;
|
||||
this.captureProcessor.onaudioprocess = (event) => {
|
||||
const input = event.inputBuffer.getChannelData(0);
|
||||
const downsampled = this.downsampleBuffer(input, this.captureContext.sampleRate, 16000);
|
||||
for (let i = 0; i < downsampled.length; i += 1) {
|
||||
this.pendingSamples.push(downsampled[i]);
|
||||
}
|
||||
while (this.pendingSamples.length >= 320) {
|
||||
const chunk = this.pendingSamples.splice(0, 320);
|
||||
const pcm = new Int16Array(chunk.length);
|
||||
for (let i = 0; i < chunk.length; i += 1) {
|
||||
const sample = Math.max(-1, Math.min(1, chunk[i]));
|
||||
pcm[i] = sample < 0 ? sample * 32768 : sample * 32767;
|
||||
}
|
||||
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(pcm.buffer);
|
||||
}
|
||||
}
|
||||
};
|
||||
this.captureSource.connect(this.captureProcessor);
|
||||
this.captureProcessor.connect(this.captureSilenceGain);
|
||||
this.captureSilenceGain.connect(this.captureContext.destination);
|
||||
}
|
||||
|
||||
downsampleBuffer(buffer, inputRate, outputRate) {
|
||||
if (outputRate >= inputRate) {
|
||||
return Array.from(buffer);
|
||||
}
|
||||
const sampleRateRatio = inputRate / outputRate;
|
||||
const newLength = Math.round(buffer.length / sampleRateRatio);
|
||||
const result = new Array(newLength);
|
||||
let offsetResult = 0;
|
||||
let offsetBuffer = 0;
|
||||
while (offsetResult < result.length) {
|
||||
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
|
||||
let accum = 0;
|
||||
let count = 0;
|
||||
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i += 1) {
|
||||
accum += buffer[i];
|
||||
count += 1;
|
||||
}
|
||||
result[offsetResult] = count > 0 ? accum / count : 0;
|
||||
offsetResult += 1;
|
||||
offsetBuffer = nextOffsetBuffer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async setMuted(muted) {
|
||||
this.mediaStream?.getAudioTracks().forEach((track) => {
|
||||
track.enabled = !muted;
|
||||
});
|
||||
}
|
||||
|
||||
async disconnect() {
|
||||
if (this.captureProcessor) {
|
||||
this.captureProcessor.disconnect();
|
||||
this.captureProcessor.onaudioprocess = null;
|
||||
this.captureProcessor = null;
|
||||
}
|
||||
if (this.captureSource) {
|
||||
this.captureSource.disconnect();
|
||||
this.captureSource = null;
|
||||
}
|
||||
if (this.captureSilenceGain) {
|
||||
this.captureSilenceGain.disconnect();
|
||||
this.captureSilenceGain = null;
|
||||
}
|
||||
if (this.captureContext) {
|
||||
await this.captureContext.close().catch(() => {});
|
||||
this.captureContext = null;
|
||||
}
|
||||
if (this.mediaStream) {
|
||||
this.mediaStream.getTracks().forEach((track) => track.stop());
|
||||
this.mediaStream = null;
|
||||
}
|
||||
if (this.ws) {
|
||||
try {
|
||||
if (this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(JSON.stringify({ type: 'stop' }));
|
||||
this.ws.close();
|
||||
}
|
||||
} catch (_) {}
|
||||
this.ws = null;
|
||||
}
|
||||
if (this.playbackContext) {
|
||||
this.resetPlaybackQueue();
|
||||
await this.playbackContext.close().catch(() => {});
|
||||
this.playbackContext = null;
|
||||
}
|
||||
this.playbackTime = 0;
|
||||
this.pendingSamples = [];
|
||||
this.emitConnectionState('disconnected');
|
||||
}
|
||||
|
||||
on(event, callback) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = callback;
|
||||
}
|
||||
}
|
||||
|
||||
off(event) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const nativeVoiceService = new NativeVoiceService();
|
||||
export default nativeVoiceService;
|
||||
@@ -1,323 +0,0 @@
|
||||
/**
|
||||
* 火山引擎 RTC SDK 封装
|
||||
* 负责 WebRTC 音频流的建立和管理
|
||||
*/
|
||||
class RTCService {
|
||||
constructor() {
|
||||
this.engine = null;
|
||||
this.joined = false;
|
||||
this.callbacks = {
|
||||
onSubtitle: null,
|
||||
onAudioStatus: null,
|
||||
onConnectionStateChange: null,
|
||||
onError: null,
|
||||
onUserJoined: null,
|
||||
onUserLeft: null,
|
||||
onToolCall: null,
|
||||
onRoomMessage: null,
|
||||
};
|
||||
}
|
||||
|
||||
async init(appId) {
|
||||
if (this.engine) {
|
||||
this.destroy();
|
||||
}
|
||||
|
||||
try {
|
||||
const VERTC = await import('@volcengine/rtc');
|
||||
const createEngine = VERTC.default?.createEngine || VERTC.createEngine;
|
||||
const events = VERTC.default?.events || VERTC.events;
|
||||
|
||||
if (!createEngine) {
|
||||
throw new Error('Failed to load RTC SDK: createEngine not found');
|
||||
}
|
||||
|
||||
this.engine = createEngine(appId);
|
||||
this.events = events;
|
||||
|
||||
this.engine.on(events.onConnectionStateChanged, (state) => {
|
||||
console.log('[RTC] Connection state:', state);
|
||||
this.callbacks.onConnectionStateChange?.(state);
|
||||
});
|
||||
|
||||
if (events.onSubtitleStateChanged) {
|
||||
this.engine.on(events.onSubtitleStateChanged, (state) => {
|
||||
console.log('[RTC] Subtitle state changed:', state);
|
||||
});
|
||||
}
|
||||
|
||||
if (events.onSubtitleMessageReceived) {
|
||||
this.engine.on(events.onSubtitleMessageReceived, (subtitles) => {
|
||||
console.log('[RTC] Subtitle received:', subtitles.length, 'items');
|
||||
subtitles.forEach((sub) => {
|
||||
// bot 的 userId 以 'bot_' 开头,无 userId 或 bot_ 开头都是 assistant
|
||||
const isBot = !sub.userId || sub.userId.startsWith('bot_');
|
||||
this.callbacks.onSubtitle?.({
|
||||
text: sub.text,
|
||||
role: isBot ? 'assistant' : 'user',
|
||||
isFinal: sub.definite,
|
||||
sequence: sub.sequence,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
this.engine.on(events.onUserJoined, (info) => {
|
||||
console.log('[RTC] User joined:', info.userInfo?.userId);
|
||||
this.callbacks.onUserJoined?.(info);
|
||||
});
|
||||
|
||||
this.engine.on(events.onUserLeave, (info) => {
|
||||
console.log('[RTC] User left:', info.userInfo?.userId);
|
||||
this.callbacks.onUserLeft?.(info);
|
||||
});
|
||||
|
||||
this.engine.on(events.onError, (error) => {
|
||||
console.error('[RTC] Error:', error);
|
||||
this.callbacks.onError?.(error);
|
||||
});
|
||||
|
||||
// === Function Calling: 监听房间消息(SDK 回调参数是单个 event 对象) ===
|
||||
if (events.onRoomBinaryMessageReceived) {
|
||||
this.engine.on(events.onRoomBinaryMessageReceived, (event) => {
|
||||
try {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const raw = event.message;
|
||||
const text = typeof raw === 'string' ? raw : new TextDecoder().decode(raw);
|
||||
console.log('[RTC][FC] Room binary from', uid, ':', text.substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text });
|
||||
const parsed = JSON.parse(text);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] Room binary (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (events.onRoomMessageReceived) {
|
||||
this.engine.on(events.onRoomMessageReceived, (event) => {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const msg = event.message || '';
|
||||
console.log('[RTC][FC] Room text from', uid, ':', String(msg).substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text: String(msg) });
|
||||
try {
|
||||
const parsed = JSON.parse(msg);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] Room text (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (events.onUserBinaryMessageReceived) {
|
||||
this.engine.on(events.onUserBinaryMessageReceived, (event) => {
|
||||
try {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const raw = event.message;
|
||||
const text = typeof raw === 'string' ? raw : new TextDecoder().decode(raw);
|
||||
console.log('[RTC][FC] User binary from', uid, ':', text.substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text });
|
||||
const parsed = JSON.parse(text);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] User binary (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (events.onUserMessageReceived) {
|
||||
this.engine.on(events.onUserMessageReceived, (event) => {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const msg = event.message || '';
|
||||
console.log('[RTC][FC] User text from', uid, ':', String(msg).substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text: String(msg) });
|
||||
try {
|
||||
const parsed = JSON.parse(msg);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] User text (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// === 诊断事件 ===
|
||||
if (events.onUserPublishStream) {
|
||||
this.engine.on(events.onUserPublishStream, (info) => {
|
||||
console.log('[RTC][DIAG] Remote user published stream:', info.userId, 'mediaType:', info.mediaType);
|
||||
});
|
||||
}
|
||||
if (events.onUserUnpublishStream) {
|
||||
this.engine.on(events.onUserUnpublishStream, (info) => {
|
||||
console.log('[RTC][DIAG] Remote user unpublished stream:', info.userId, 'mediaType:', info.mediaType);
|
||||
});
|
||||
}
|
||||
if (events.onAutoplayFailed) {
|
||||
this.engine.on(events.onAutoplayFailed, (info) => {
|
||||
console.error('[RTC][DIAG] ❌ Autoplay FAILED! Audio blocked by browser:', info);
|
||||
});
|
||||
}
|
||||
if (events.onPlayerEvent) {
|
||||
this.engine.on(events.onPlayerEvent, (info) => {
|
||||
console.log('[RTC][DIAG] Player event:', info);
|
||||
});
|
||||
}
|
||||
if (events.onRemoteStreamStats) {
|
||||
this.engine.on(events.onRemoteStreamStats, (stats) => {
|
||||
if (stats.audioRecvBytes > 0) {
|
||||
console.log('[RTC][DIAG] Receiving audio from:', stats.uid, 'bytes:', stats.audioRecvBytes);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 启用音频属性报告,检测是否有远端音频
|
||||
try {
|
||||
this.engine.enableAudioPropertiesReport?.({ interval: 3000 });
|
||||
if (events.onRemoteAudioPropertiesReport) {
|
||||
this.engine.on(events.onRemoteAudioPropertiesReport, (infos) => {
|
||||
infos?.forEach((info) => {
|
||||
if (info.audioPropertiesInfo?.linearVolume > 0) {
|
||||
console.log('[RTC][DIAG] 🔊 Remote audio detected! user:', info.streamKey?.userId, 'volume:', info.audioPropertiesInfo.linearVolume);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
if (events.onLocalAudioPropertiesReport) {
|
||||
this.engine.on(events.onLocalAudioPropertiesReport, (infos) => {
|
||||
infos?.forEach((info) => {
|
||||
if (info.audioPropertiesInfo?.linearVolume > 0) {
|
||||
console.log('[RTC][DIAG] 🎤 Local mic active, volume:', info.audioPropertiesInfo.linearVolume);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[RTC][DIAG] enableAudioPropertiesReport not available:', e.message);
|
||||
}
|
||||
|
||||
console.log('[RTC] Engine initialized with diagnostic listeners');
|
||||
console.log('[RTC] Available events:', Object.keys(events).filter(k => k.startsWith('on')).join(', '));
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('[RTC] Init failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async joinRoom(roomId, userId, token) {
|
||||
if (!this.engine) throw new Error('Engine not initialized');
|
||||
|
||||
await this.engine.joinRoom(
|
||||
token,
|
||||
roomId,
|
||||
{ userId },
|
||||
{
|
||||
isAutoPublish: true,
|
||||
isAutoSubscribeAudio: true,
|
||||
isAutoSubscribeVideo: false,
|
||||
}
|
||||
);
|
||||
|
||||
await this.engine.startAudioCapture();
|
||||
|
||||
// 激活字幕接收(必须在 joinRoom 之后调用)
|
||||
try {
|
||||
await this.engine.startSubtitle({});
|
||||
console.log('[RTC] Subtitle enabled');
|
||||
} catch (e) {
|
||||
console.warn('[RTC] startSubtitle failed:', e.message || e);
|
||||
}
|
||||
|
||||
this.joined = true;
|
||||
console.log(`[RTC] Joined room ${roomId} as ${userId}`);
|
||||
}
|
||||
|
||||
async leaveRoom() {
|
||||
if (!this.engine || !this.joined) return;
|
||||
try {
|
||||
await this.engine.stopAudioCapture();
|
||||
await this.engine.leaveRoom();
|
||||
this.joined = false;
|
||||
console.log('[RTC] Left room');
|
||||
} catch (e) {
|
||||
console.warn('[RTC] Leave room error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
async setMuted(muted) {
|
||||
if (!this.engine) return;
|
||||
if (muted) {
|
||||
await this.engine.stopAudioCapture();
|
||||
} else {
|
||||
await this.engine.startAudioCapture();
|
||||
}
|
||||
}
|
||||
|
||||
_handleRoomMessage(uid, parsed) {
|
||||
console.log('[RTC][FC] Parsed message type:', parsed.type || parsed.event || 'unknown', 'from:', uid);
|
||||
|
||||
// 尝试多种可能的 tool call 消息格式
|
||||
let toolCalls = null;
|
||||
|
||||
// 格式1: { type: "function_call", data: { tool_calls: [...] } }
|
||||
if (parsed.type === 'function_call' && parsed.data?.tool_calls) {
|
||||
toolCalls = parsed.data.tool_calls;
|
||||
}
|
||||
// 格式2: { event: "function_call", tool_calls: [...] }
|
||||
else if (parsed.event === 'function_call' && parsed.tool_calls) {
|
||||
toolCalls = parsed.tool_calls;
|
||||
}
|
||||
// 格式3: { type: "conversation", data: { event: "function_call", ... } }
|
||||
else if (parsed.type === 'conversation' && parsed.data?.event === 'function_call') {
|
||||
toolCalls = parsed.data.tool_calls || [parsed.data];
|
||||
}
|
||||
// 格式4: 直接是 tool_calls 数组
|
||||
else if (parsed.tool_calls) {
|
||||
toolCalls = parsed.tool_calls;
|
||||
}
|
||||
// 格式5: 单个 function_call 对象
|
||||
else if (parsed.function?.name || parsed.function_name) {
|
||||
toolCalls = [parsed];
|
||||
}
|
||||
|
||||
if (toolCalls && toolCalls.length > 0) {
|
||||
console.log('[RTC][FC] ✅ Tool calls detected:', toolCalls.length);
|
||||
toolCalls.forEach((tc) => {
|
||||
const callId = tc.id || tc.tool_call_id || `tc_${Date.now()}`;
|
||||
const funcName = tc.function?.name || tc.function_name || 'unknown';
|
||||
const args = tc.function?.arguments || tc.arguments || '{}';
|
||||
console.log(`[RTC][FC] Tool call: ${funcName}(${args}), id=${callId}`);
|
||||
this.callbacks.onToolCall?.({ tool_call_id: callId, function_name: funcName, arguments: args });
|
||||
});
|
||||
} else {
|
||||
console.log('[RTC][FC] Message is not a tool call, full payload:', JSON.stringify(parsed).substring(0, 300));
|
||||
}
|
||||
}
|
||||
|
||||
on(event, callback) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = callback;
|
||||
}
|
||||
}
|
||||
|
||||
off(event) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = null;
|
||||
}
|
||||
}
|
||||
|
||||
destroy() {
|
||||
if (this.engine) {
|
||||
try {
|
||||
if (this.joined) {
|
||||
this.engine.stopAudioCapture().catch(() => {});
|
||||
this.engine.leaveRoom().catch(() => {});
|
||||
}
|
||||
this.engine.destroyEngine?.();
|
||||
} catch (e) {
|
||||
console.warn('[RTC] Destroy error:', e);
|
||||
}
|
||||
this.engine = null;
|
||||
this.joined = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rtcService = new RTCService();
|
||||
export default rtcService;
|
||||
@@ -1,7 +1,27 @@
|
||||
import axios from 'axios';
|
||||
|
||||
function resolveApiBaseURL(configured, path) {
|
||||
if (configured) {
|
||||
return configured;
|
||||
}
|
||||
if (typeof window === 'undefined') {
|
||||
return path;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `${protocol}//${hostname || '127.0.0.1'}:3012${path}`;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
const voiceApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_VOICE_API_BASE_URL, '/api/voice');
|
||||
const sessionApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_SESSION_API_BASE_URL, '/api/session');
|
||||
|
||||
const api = axios.create({
|
||||
baseURL: '/api/voice',
|
||||
baseURL: voiceApiBaseURL,
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
@@ -10,64 +30,9 @@ export async function getVoiceConfig() {
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function prepareVoiceChat(params) {
|
||||
const { data } = await api.post('/prepare', params);
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function startVoiceChat(params) {
|
||||
const { data } = await api.post('/start', params);
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function stopVoiceChat(sessionId) {
|
||||
const { data } = await api.post('/stop', { sessionId });
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function sendSubtitle(params) {
|
||||
const { data } = await api.post('/subtitle', params);
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function getSubtitles(sessionId) {
|
||||
const { data } = await api.get(`/subtitles/${sessionId}`);
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function getActiveSessions() {
|
||||
const { data } = await api.get('/sessions');
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function forwardRoomMessage({ roomId, uid, text }) {
|
||||
const { data } = await api.post('/room_message', { roomId, uid, text });
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function executeToolCall({ sessionId, toolCallId, functionName, arguments: args }) {
|
||||
const { data } = await api.post('/tool-callback', {
|
||||
sessionId,
|
||||
tool_call_id: toolCallId,
|
||||
function_name: functionName,
|
||||
arguments: args,
|
||||
});
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function executeFcCallback({ roomId, taskId, type, message }) {
|
||||
const { data } = await api.post('/fc_callback', {
|
||||
RoomID: roomId,
|
||||
TaskID: taskId,
|
||||
Type: type,
|
||||
Message: message,
|
||||
});
|
||||
return data;
|
||||
}
|
||||
|
||||
// ========== 会话历史 API ==========
|
||||
const sessionApi = axios.create({
|
||||
baseURL: '/api/session',
|
||||
baseURL: sessionApiBaseURL,
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
@@ -80,3 +45,15 @@ export async function switchSessionMode(sessionId, targetMode) {
|
||||
const { data } = await sessionApi.post(`/${sessionId}/switch`, { targetMode });
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function getSessionList(userId, limit = 50) {
|
||||
const params = { limit };
|
||||
if (userId) params.userId = userId;
|
||||
const { data } = await sessionApi.get('/list', { params });
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function deleteSessionById(sessionId) {
|
||||
const { data } = await sessionApi.delete(`/${sessionId}`);
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ import { defineConfig } from 'vite';
|
||||
import react from '@vitejs/plugin-react';
|
||||
import tailwindcss from '@tailwindcss/vite';
|
||||
|
||||
const backendTarget = 'http://localhost:3012';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react(), tailwindcss()],
|
||||
build: {
|
||||
@@ -12,9 +14,14 @@ export default defineConfig({
|
||||
port: 5174,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:3012',
|
||||
target: backendTarget,
|
||||
changeOrigin: true,
|
||||
},
|
||||
'/ws': {
|
||||
target: backendTarget,
|
||||
changeOrigin: true,
|
||||
ws: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,2 @@
|
||||
|
||||
d380d7e2af5a2cf37ae5fb72fa007f7001f06957 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/aws-ssl-profiles","integrity":"sha512-Dp3ybUiE5jFOgaUXSH6uSt5UBNRu53rJzGSc5V0hYu+SlArRvtlRcIuxQwEUZzP5eZuRdj2n85J173YdglI2FA==","time":1773315323018,"size":13795,"metadata":{"time":1773315323014,"url":"https://mirrors.huaweicloud.com/repository/npm/aws-ssl-profiles","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
523af94e047b9947a5d9046504c4d65982aa50cb {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/denque","integrity":"sha512-YeSRmZLQw/XL64foX/5qPuvRrPz57yqZyjeswq75g7ZWOk4ZcQANU8jCsNz7CI4THNvPZz4i9L6m1M6HvrYsHQ==","time":1773315323182,"size":63388,"metadata":{"time":1773315323149,"url":"https://mirrors.huaweicloud.com/repository/npm/denque","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
c15b27837b2670e24538cb58617684fe9d5bbd22 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/iconv-lite","integrity":"sha512-jDTN0+qwg4vqqJgB5O+hnRyWt23NuulJhNAKpR99axqdgEA2zZMjJlHpOcgRmNzwPAWE+Xe5oM3XeP6mS719hQ==","time":1773315323255,"size":121657,"metadata":{"time":1773315323179,"url":"https://mirrors.huaweicloud.com/repository/npm/iconv-lite","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
0b9dce51067b606f9877acebc96cb272050dc55e {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/denque/-/denque-2.1.0.tgz","integrity":"sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==","time":1773315323515,"size":9230,"metadata":{"time":1773315323511,"url":"https://mirrors.huaweicloud.com/repository/npm/denque/-/denque-2.1.0.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"e93e1a6569fb5e66f16a3c2a2964617d349d6ab1","last-modified":"Wed, 28 Feb 2024 01:18:45 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
9df2b0ba2174f6b92afc09549ac0d8987c9d910a {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/lru.min/-/lru.min-1.1.4.tgz","integrity":"sha512-DqC6n3QQ77zdFpCMASA1a3Jlb64Hv2N2DciFGkO/4L9+q/IpIAuRlKOvCXabtRW6cQf8usbmM6BE/TOPysCdIA==","time":1773315323444,"size":8626,"metadata":{"time":1773315323428,"url":"https://mirrors.huaweicloud.com/repository/npm/lru.min/-/lru.min-1.1.4.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"6ea1737a8c1ba2300cc87ad46910a4bdffa0117b","last-modified":"Mon, 02 Feb 2026 15:18:20 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
9cc8aceba6a475afd3466dca3f0b3e1ba0338d84 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/iconv-lite/-/iconv-lite-0.7.2.tgz","integrity":"sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==","time":1773315323495,"size":189646,"metadata":{"time":1773315323415,"url":"https://mirrors.huaweicloud.com/repository/npm/iconv-lite/-/iconv-lite-0.7.2.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"d0bdeac3f12b4835b7359c2ad89c422a4d1cc72e","last-modified":"Thu, 08 Jan 2026 17:02:50 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
6c98f85e3f86d5d22864e82a02bcdc1483e069f6 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/lru.min","integrity":"sha512-Xss5D+wiUWQr8ir6cZWg/PG3Y4EgY2CUNYzsbgytaxPOaEg+ci6A/R1dBFSpKkng+sBNqFNahdiQgpe74iCXBA==","time":1773315323202,"size":51877,"metadata":{"time":1773315323167,"url":"https://mirrors.huaweicloud.com/repository/npm/lru.min","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
946e347b3448945cf9d4a6becc013a204d434233 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/mysql2","integrity":"sha512-fSScsZIUl0lj2PyxZOM/iPrOXCjlf5zDGq3Ct0JQ+X5JSEhnyxsDR6I9drGS1dKl2URzmM2OA6ILP9EPwcQpWA==","time":1773315322644,"size":758151,"metadata":{"time":1773315322523,"url":"https://mirrors.huaweicloud.com/repository/npm/mysql2","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
529da59f3bfb7b44783459679363d30170fd0b5f {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/bufferutil","integrity":"sha512-EHNCh06G/GWJotPm+RaTGPlE+YaZLf9f4qsKPqhpW4VMiMUDyjipSAz1clv7M1oCDBAY9FyAPZx7IHo/iaoLVA==","time":1773314267671,"size":53283,"metadata":{"time":1773314267642,"url":"https://mirrors.huaweicloud.com/repository/npm/bufferutil","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:17:47 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
91557f40d1cc51be00cc8c0eed8b873898f2ffe8 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/is-property","integrity":"sha512-H4Y+4gzfCYLg0qInL9OqjBlUH/U3aEfYi6tVbV0blnSu9YYdbQNt76cKnrYhcb4KiAJBEHX6byCA7YGrGXIIBA==","time":1773315323327,"size":6825,"metadata":{"time":1773315323323,"url":"https://mirrors.huaweicloud.com/repository/npm/is-property","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
13f57b49cde67082488d9c19ce63f0d975031a2a {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/named-placeholders","integrity":"sha512-xYdG5fFp1H5E0h982HJCwrsdeRHmDEl99gzB4DSWINIs+Tcu+3i1jLEoY9G5D8XTQPO3B7fqtbC2wkdsGR2OBw==","time":1773315323209,"size":20578,"metadata":{"time":1773315323206,"url":"https://mirrors.huaweicloud.com/repository/npm/named-placeholders","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
72e43ad78cf05a16c0863d15d771c3fc91c6c038 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/generate-function","integrity":"sha512-elVGfZkmdRyDOMf/H0E9igeS8N5ogO6uKI1StYwtcKzh92IIQztJEJeu/uYUrvlRLZlrKpOlsP7zZlJcG0vq8w==","time":1773315323168,"size":22510,"metadata":{"time":1773315323164,"url":"https://mirrors.huaweicloud.com/repository/npm/generate-function","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
3710d4186e7ac1e7bea33b7ff78d22986d105289 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/sql-escaper","integrity":"sha512-s5ok74jC+GBPuFzPx8qE9Jz72t8TlBl++ZNr6DstRv8DH7ztl/0mksx42A9EJE0m/3qosUTZvGNRVfyeTxx/Zw==","time":1773315323256,"size":47552,"metadata":{"time":1773315323214,"url":"https://mirrors.huaweicloud.com/repository/npm/sql-escaper","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
145e11a4089bbc5c7cdb4b968d3d5000c701c08c {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/aws-ssl-profiles/-/aws-ssl-profiles-1.1.2.tgz","integrity":"sha512-NZKeq9AfyQvEeNlN0zSYAaWrmBffJh3IELMZfRpJVWgrpEbtEpnjvzqBPf+mxoI287JohRDoa+/nsfqqiZmF6g==","time":1773315323612,"size":89249,"metadata":{"time":1773315323537,"url":"https://mirrors.huaweicloud.com/repository/npm/aws-ssl-profiles/-/aws-ssl-profiles-1.1.2.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"157dd77e9f19b1d123678e93f120e6f193022641","last-modified":"Fri, 30 Aug 2024 02:13:10 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
8c0afa1d534b76565b3ce13fc19320529753c898 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/ws/-/ws-8.19.0.tgz","integrity":"sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==","time":1773314272433,"size":34331,"metadata":{"time":1773314272421,"url":"https://mirrors.huaweicloud.com/repository/npm/ws/-/ws-8.19.0.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:17:52 GMT","etag":"ddc2bdfa5b9ad860204f5a72a4863a8895fd8c8b","last-modified":"Mon, 05 Jan 2026 21:02:15 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
0719793954554778f00efe94abdae4e2992fd16a {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/mysql2/-/mysql2-3.19.1.tgz","integrity":"sha512-yn4zh+Uxu5J3Zvi6Ao96lJ7BSBRkspHflWQAmOPND+htbpIKDQw99TTvPzgihKO/QyMickZopO4OsnixnpcUwA==","time":1773315323682,"size":130520,"metadata":{"time":1773315323528,"url":"https://mirrors.huaweicloud.com/repository/npm/mysql2/-/mysql2-3.19.1.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"1e9c88646cb2f0cb3d8df6ce56b8f4d6cbf6b013","last-modified":"Mon, 09 Mar 2026 17:42:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
095e8cbec77796220635fc36481959461af2e52d {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/sql-escaper/-/sql-escaper-1.3.3.tgz","integrity":"sha512-BsTCV265VpTp8tm1wyIm1xqQCS+Q9NHx2Sr+WcnUrgLrQ6yiDIvHYJV5gHxsj1lMBy2zm5twLaZao8Jd+S8JJw==","time":1773315323445,"size":12353,"metadata":{"time":1773315323423,"url":"https://mirrors.huaweicloud.com/repository/npm/sql-escaper/-/sql-escaper-1.3.3.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"65faf89f048d26bb9a75566b82b5990ddf8a5b7f","last-modified":"Mon, 16 Feb 2026 18:49:06 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
4c24c6fd8e8692d004ab3493a5ca8348d53df454 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/@types%2fnode","integrity":"sha512-9yYGkoWM0dli7tnk8CE6olhmbmPW6hQYYKbFKA1geKzf/G/t1gdE6tjIHBgr/Dm0OoKsA1iPtRl2Nl+XV8IdRw==","time":1773315322889,"size":10982588,"metadata":{"time":1773315322693,"url":"https://mirrors.huaweicloud.com/repository/npm/@types%2fnode","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:22 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
24e11487eb9e52d783cd272764cbc2b31500744f {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/is-property/-/is-property-1.0.2.tgz","integrity":"sha512-Ks/IoX00TtClbGQr4TWXemAnktAQvYB7HzcCxDGqEZU6oCmb2INHuOoKxbtR+HFkmYWBKv/dOZtGRiAjDhj92g==","time":1773315323442,"size":4498,"metadata":{"time":1773315323420,"url":"https://mirrors.huaweicloud.com/repository/npm/is-property/-/is-property-1.0.2.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"57fe1c4e48474edd65b09911f26b1cd4095dda84","last-modified":"Thu, 14 Mar 2024 11:18:08 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
f5d722a3e669f6421fcb2d510b346f7d7552c578 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/named-placeholders/-/named-placeholders-1.1.6.tgz","integrity":"sha512-Tz09sEL2EEuv5fFowm419c1+a/jSMiBjI9gHxVLrVdbUkkNUUfjsVYs9pVZu5oCon/kmRh9TfLEObFtkVxmY0w==","time":1773315323449,"size":3023,"metadata":{"time":1773315323431,"url":"https://mirrors.huaweicloud.com/repository/npm/named-placeholders/-/named-placeholders-1.1.6.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"c50c6920b43f258f59c16add1e56654f5cc02bb5","last-modified":"Mon, 15 Dec 2025 22:47:01 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
815364e769fce124ed3801d5cc933679ccc2b91d {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/utf-8-validate","integrity":"sha512-HflRcSvCWW6RgwBBiKhfSBVPc1zsljnyaO02qH08yGTd36QZgm4Uc1Wm19WsvzSjU9AiF0PJJfCAAq4SPEgWMQ==","time":1773314271540,"size":77828,"metadata":{"time":1773314271508,"url":"https://mirrors.huaweicloud.com/repository/npm/utf-8-validate","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:17:51 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
6ea075ce738ef1c3d4da26bcd7ef589b28bb7c68 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/ws","integrity":"sha512-zIEV8YW1QKLailWq2/C407++FibOaxFyk8xex06aEXAMnOAGcY1Ld8UJZC/P4Aw0mZ2lADu9u+k/NWdOM1oyBw==","time":1773314262933,"size":416767,"metadata":{"time":1773314262844,"url":"https://mirrors.huaweicloud.com/repository/npm/ws","reqHeaders":{"accept":"application/json"},"resHeaders":{"content-type":"application/json;charset=UTF-8","date":"Thu, 12 Mar 2026 11:17:42 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -0,0 +1,2 @@
|
||||
|
||||
97ac0a839d7c62d69ee4d9669585b301cefa0222 {"key":"make-fetch-happen:request-cache:https://mirrors.huaweicloud.com/repository/npm/generate-function/-/generate-function-2.3.1.tgz","integrity":"sha512-eeB5GfMNeevm/GRYq20ShmsaGcmI81kIX2K9XQx5miC8KdHaC6Jm0qQ8ZNeGOi7wYB8OsdxKs+Y2oVuTFuVwKQ==","time":1773315323449,"size":3632,"metadata":{"time":1773315323433,"url":"https://mirrors.huaweicloud.com/repository/npm/generate-function/-/generate-function-2.3.1.tgz","reqHeaders":{},"resHeaders":{"content-type":"application/octet-stream;charset=UTF-8","date":"Thu, 12 Mar 2026 11:35:23 GMT","etag":"f069617690c10c868e73b8465746764f97c3479f","last-modified":"Thu, 14 Mar 2024 11:18:08 GMT","vary":"Origin, Access-Control-Request-Method, Access-Control-Request-Headers"},"options":{"compress":true}}}
|
||||
@@ -1,4 +1,5 @@
|
||||
require('dotenv').config();
|
||||
const http = require('http');
|
||||
const express = require('express');
|
||||
const cors = require('cors');
|
||||
const path = require('path');
|
||||
@@ -6,14 +7,11 @@ const db = require('./db');
|
||||
const voiceRoutes = require('./routes/voice');
|
||||
const chatRoutes = require('./routes/chat');
|
||||
const sessionRoutes = require('./routes/session');
|
||||
const { setupNativeVoiceGateway } = require('./services/nativeVoiceGateway');
|
||||
|
||||
// ========== 环境变量校验 ==========
|
||||
function validateEnv() {
|
||||
const required = [
|
||||
{ key: 'VOLC_RTC_APP_ID', desc: 'RTC 应用 ID' },
|
||||
{ key: 'VOLC_RTC_APP_KEY', desc: 'RTC 应用密钥' },
|
||||
{ key: 'VOLC_ACCESS_KEY_ID', desc: '火山引擎 AccessKey ID' },
|
||||
{ key: 'VOLC_SECRET_ACCESS_KEY', desc: '火山引擎 Secret Access Key' },
|
||||
{ key: 'VOLC_S2S_APP_ID', desc: 'S2S 端到端语音 AppID' },
|
||||
{ key: 'VOLC_S2S_TOKEN', desc: 'S2S 端到端语音 Token' },
|
||||
{ key: 'VOLC_ARK_ENDPOINT_ID', desc: '方舟 LLM 推理接入点 ID' },
|
||||
@@ -68,42 +66,11 @@ function validateEnv() {
|
||||
|
||||
// ========== Express 应用 ==========
|
||||
const app = express();
|
||||
const server = http.createServer(app);
|
||||
const PORT = process.env.PORT || 3001;
|
||||
|
||||
app.use(cors());
|
||||
|
||||
// RTC Function Calling 回调不带 Content-Type,必须在标准 body parser 之前手动读取
|
||||
// 全局序列号:在 body 读取前同步分配,确保反映真实请求到达顺序
|
||||
let fcCallbackSeq = 0;
|
||||
app.post('/api/voice/fc_callback', (req, res, next) => {
|
||||
const seq = ++fcCallbackSeq; // 同步分配,在任何异步操作之前
|
||||
if (!req.headers['content-type']) {
|
||||
const chunks = [];
|
||||
req.on('data', (chunk) => chunks.push(chunk));
|
||||
req.on('end', () => {
|
||||
const rawBuf = Buffer.concat(chunks);
|
||||
const raw = rawBuf.toString('utf-8');
|
||||
console.log(`[RawBody] seq=${seq} Read ${rawBuf.length} bytes`);
|
||||
// 将所有回调原始内容追加到日志文件
|
||||
try {
|
||||
const fs = require('fs');
|
||||
fs.appendFileSync('fc_all_callbacks.log', `\n=== SEQ=${seq} TIME=${new Date().toISOString()} BYTES=${rawBuf.length} ===\n${raw}\n`);
|
||||
} catch(e) {}
|
||||
try { req.body = JSON.parse(raw); } catch (e) { console.error('[RawBody] JSON parse failed:', e.message); req.body = { _raw: raw }; }
|
||||
req.body._seq = seq;
|
||||
next();
|
||||
});
|
||||
req.on('error', (err) => {
|
||||
console.error('[RawBody] Error:', err.message);
|
||||
next();
|
||||
});
|
||||
} else {
|
||||
req.body = req.body || {};
|
||||
req.body._seq = seq;
|
||||
next();
|
||||
}
|
||||
});
|
||||
|
||||
app.use(express.json({ limit: '1mb' }));
|
||||
app.use(express.urlencoded({ extended: true, limit: '1mb' }));
|
||||
|
||||
@@ -123,16 +90,8 @@ app.use('/api/voice', voiceRoutes);
|
||||
app.use('/api/chat', chatRoutes);
|
||||
app.use('/api/session', sessionRoutes);
|
||||
|
||||
// 静态文件服务
|
||||
app.use(express.static('../client/dist'));
|
||||
|
||||
// 处理单页应用路由
|
||||
app.get('*', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, '../client/dist/index.html'));
|
||||
});
|
||||
|
||||
app.get('/api/health', (req, res) => {
|
||||
const envReady = !process.env.VOLC_RTC_APP_ID?.startsWith('your_');
|
||||
const envReady = !process.env.VOLC_S2S_APP_ID?.startsWith('your_');
|
||||
res.json({
|
||||
status: 'ok',
|
||||
mode: 's2s-hybrid',
|
||||
@@ -149,6 +108,14 @@ app.get('/api/health', (req, res) => {
|
||||
});
|
||||
});
|
||||
|
||||
// 静态文件服务
|
||||
app.use(express.static('../client/dist'));
|
||||
|
||||
// 处理单页应用路由
|
||||
app.get('*', (req, res) => {
|
||||
res.sendFile(path.join(__dirname, '../client/dist/index.html'));
|
||||
});
|
||||
|
||||
// 统一错误处理中间件
|
||||
app.use((err, req, res, _next) => {
|
||||
console.error(`[Error] ${req.method} ${req.path}:`, err.message);
|
||||
@@ -174,7 +141,14 @@ async function start() {
|
||||
console.warn('[DB] Continuing without database — context switching will use in-memory fallback');
|
||||
}
|
||||
|
||||
app.listen(PORT, () => {
|
||||
if (process.env.ENABLE_NATIVE_VOICE_GATEWAY !== 'false') {
|
||||
setupNativeVoiceGateway(server);
|
||||
console.log('[NativeVoice] Gateway enabled at /ws/realtime-dialog');
|
||||
} else {
|
||||
console.log('[NativeVoice] Gateway disabled (ENABLE_NATIVE_VOICE_GATEWAY=false)');
|
||||
}
|
||||
|
||||
server.listen(PORT, () => {
|
||||
console.log('\n========================================');
|
||||
console.log(` 🚀 Voice Chat Backend`);
|
||||
console.log(` 📡 http://localhost:${PORT}`);
|
||||
|
||||
@@ -1,172 +0,0 @@
|
||||
const { v4: uuidv4 } = require('uuid');
|
||||
|
||||
class VoiceChatConfigBuilder {
|
||||
/**
|
||||
* 构建 StartVoiceChat 的完整配置(S2S 端到端语音大模型 + LLM 混合编排)
|
||||
* OutputMode=1: 混合模式,S2S 处理普通对话,LLM 处理工具调用
|
||||
*/
|
||||
static build(options) {
|
||||
const {
|
||||
roomId,
|
||||
taskId,
|
||||
userId,
|
||||
botName = '小智',
|
||||
systemRole = '你是一个友善的智能助手。',
|
||||
speakingStyle = '请使用温和、清晰的口吻。',
|
||||
modelVersion = '1.2.1.0',
|
||||
speaker = 'zh_female_vv_jupiter_bigtts',
|
||||
tools = [],
|
||||
llmSystemPrompt = '',
|
||||
enableWebSearch = false,
|
||||
vadEndMs = 1200,
|
||||
chatHistory = [],
|
||||
} = options;
|
||||
|
||||
const botUserId = `bot_${uuidv4().slice(0, 8)}`;
|
||||
|
||||
const providerParams = {
|
||||
app: {
|
||||
appid: process.env.VOLC_S2S_APP_ID,
|
||||
token: process.env.VOLC_S2S_TOKEN,
|
||||
},
|
||||
dialog: this._buildDialogConfig(modelVersion, botName, systemRole, speakingStyle, enableWebSearch, chatHistory),
|
||||
tts: {
|
||||
speaker: speaker,
|
||||
},
|
||||
asr: {
|
||||
extra: {
|
||||
enable_custom_vad: true,
|
||||
end_smooth_window_ms: vadEndMs,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
// === 调试模式:纯 S2S(OutputMode=0),排除 LLM 干扰 ===
|
||||
// ARK 端点已配置正确,启用混合编排模式
|
||||
const DEBUG_PURE_S2S = false;
|
||||
|
||||
const llmConfig = {
|
||||
Mode: 'ArkV3',
|
||||
EndPointId: process.env.VOLC_ARK_ENDPOINT_ID,
|
||||
MaxTokens: 1024,
|
||||
Temperature: 0.1,
|
||||
TopP: 0.3,
|
||||
SystemMessages: [llmSystemPrompt || this._buildDefaultLLMPrompt(tools)],
|
||||
HistoryLength: 10,
|
||||
ThinkingType: 'disabled',
|
||||
};
|
||||
if (tools.length > 0) {
|
||||
llmConfig.Tools = tools;
|
||||
}
|
||||
|
||||
// 混合模式:通过 UserPrompts 传入聊天历史作为上下文(官方推荐方式)
|
||||
if (chatHistory && chatHistory.length > 0 && !DEBUG_PURE_S2S) {
|
||||
const userPrompts = chatHistory.slice(-10).map(m => ({
|
||||
Role: m.role === 'user' ? 'user' : 'assistant',
|
||||
Content: m.content,
|
||||
}));
|
||||
llmConfig.UserPrompts = userPrompts;
|
||||
console.log(`[VoiceChatConfig] Injected ${userPrompts.length} UserPrompts into LLMConfig`);
|
||||
}
|
||||
|
||||
const config = {
|
||||
AppId: process.env.VOLC_RTC_APP_ID,
|
||||
RoomId: roomId,
|
||||
TaskId: taskId,
|
||||
AgentConfig: {
|
||||
TargetUserId: [userId],
|
||||
WelcomeMessage: `你好,我是${botName},有什么需要帮忙的吗?`,
|
||||
UserId: botUserId,
|
||||
EnableConversationStateCallback: true,
|
||||
},
|
||||
Config: {
|
||||
S2SConfig: {
|
||||
Provider: 'volcano',
|
||||
OutputMode: DEBUG_PURE_S2S ? 0 : 1,
|
||||
ProviderParams: providerParams,
|
||||
},
|
||||
// 注意:S2S 端到端模式下不需要独立 TTSConfig
|
||||
// ExternalTextToSpeech 在 S2S 模式下不产生音频,只用 Command:function
|
||||
SubtitleConfig: {
|
||||
SubtitleMode: 1,
|
||||
},
|
||||
InterruptMode: 0,
|
||||
},
|
||||
};
|
||||
|
||||
// 混合模式才需要 LLMConfig
|
||||
if (!DEBUG_PURE_S2S) {
|
||||
config.Config.LLMConfig = llmConfig;
|
||||
|
||||
// Function Calling 回调配置:RTC 服务通过此 URL 发送 tool call 请求
|
||||
if (tools.length > 0) {
|
||||
const serverUrl = process.env.FC_SERVER_URL || 'https://demo.tensorgrove.com.cn/api/voice/fc_callback';
|
||||
config.Config.FunctionCallingConfig = {
|
||||
ServerMessageUrl: serverUrl,
|
||||
ServerMessageSignature: process.env.FC_SIGNATURE || 'default_signature',
|
||||
};
|
||||
console.log(`[VoiceChatConfig] FunctionCallingConfig enabled, URL: ${serverUrl}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('[VoiceChatConfig] DEBUG_PURE_S2S:', DEBUG_PURE_S2S);
|
||||
console.log('[VoiceChatConfig] OutputMode:', config.Config.S2SConfig.OutputMode);
|
||||
console.log('[VoiceChatConfig] ProviderParams type:', typeof config.Config.S2SConfig.ProviderParams);
|
||||
console.log('[VoiceChatConfig] S2S AppId:', providerParams.app.appid);
|
||||
console.log('[VoiceChatConfig] S2S Token:', providerParams.app.token ? '***set***' : '***MISSING***');
|
||||
|
||||
return { config, botUserId };
|
||||
}
|
||||
|
||||
static _buildDialogConfig(modelVersion, botName, systemRole, speakingStyle, enableWebSearch, chatHistory = []) {
|
||||
const isOSeries = modelVersion === 'O' || modelVersion.startsWith('1.');
|
||||
const dialog = {
|
||||
extra: { model: modelVersion },
|
||||
};
|
||||
|
||||
// 如果有文字聊天历史,将其追加到 system_role 作为上下文
|
||||
let fullSystemRole = systemRole;
|
||||
if (chatHistory && chatHistory.length > 0) {
|
||||
const historyText = chatHistory
|
||||
.slice(-10)
|
||||
.map(m => `${m.role === 'user' ? '用户' : '助手'}:${m.content}`)
|
||||
.join('\n');
|
||||
fullSystemRole += `\n\n## 之前的对话记录(请延续此上下文)\n${historyText}`;
|
||||
console.log(`[VoiceChatConfig] Injected ${chatHistory.length} chat history messages into system_role`);
|
||||
}
|
||||
|
||||
if (isOSeries) {
|
||||
dialog.bot_name = botName;
|
||||
dialog.system_role = fullSystemRole;
|
||||
dialog.speaking_style = speakingStyle;
|
||||
} else {
|
||||
dialog.character_manifest = `${fullSystemRole}\n你的名字是${botName}。${speakingStyle}`;
|
||||
}
|
||||
|
||||
if (enableWebSearch && process.env.VOLC_WEBSEARCH_API_KEY) {
|
||||
dialog.extra.enable_volc_websearch = true;
|
||||
dialog.extra.volc_websearch_api_key = process.env.VOLC_WEBSEARCH_API_KEY;
|
||||
dialog.extra.volc_websearch_type = 'web_summary';
|
||||
dialog.extra.volc_websearch_no_result_message = '抱歉,我没有查到相关信息。';
|
||||
}
|
||||
|
||||
return dialog;
|
||||
}
|
||||
|
||||
static _buildDefaultLLMPrompt(tools) {
|
||||
const toolNames = tools.map((t) => t.function?.name).filter(Boolean);
|
||||
if (toolNames.length === 0) {
|
||||
return '你是一个智能助手。对于所有问题直接回答即可。';
|
||||
}
|
||||
return `你是一个企业智能客服助手。你可以使用以下工具:${toolNames.join('、')}。
|
||||
|
||||
## 最高优先级规则
|
||||
1. 每次用户提问,你**必须**先调用 search_knowledge 工具查询知识库
|
||||
2. 收到工具返回的知识库内容后,你**必须完整、详细地朗读**知识库返回的内容给用户
|
||||
3. 不要省略、总结或缩写知识库的内容,要逐字朗读
|
||||
4. 如果知识库没有相关内容,再用你自己的知识简洁回答
|
||||
5. 如果知识库返回"未找到相关信息",直接告诉用户并提供建议`;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = VoiceChatConfigBuilder;
|
||||
@@ -2,6 +2,23 @@ const mysql = require('mysql2/promise');
|
||||
|
||||
let pool = null;
|
||||
|
||||
async function ensureColumnExists(tableName, columnName, definitionSql) {
|
||||
const [rows] = await pool.query(`SHOW COLUMNS FROM \`${tableName}\` LIKE ?`, [columnName]);
|
||||
if (rows.length === 0) {
|
||||
await pool.execute(`ALTER TABLE \`${tableName}\` ADD COLUMN ${definitionSql}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function migrateSchema() {
|
||||
await pool.execute("ALTER TABLE `sessions` MODIFY COLUMN `mode` ENUM('voice', 'chat') DEFAULT 'chat'");
|
||||
await pool.execute("ALTER TABLE `messages` MODIFY COLUMN `role` ENUM('user', 'assistant', 'tool', 'system') NOT NULL");
|
||||
await pool.execute("ALTER TABLE `messages` MODIFY COLUMN `source` ENUM('voice_asr', 'voice_bot', 'voice_tool', 'chat_user', 'chat_bot') NOT NULL");
|
||||
await ensureColumnExists('messages', 'tool_name', '`tool_name` VARCHAR(64) NULL AFTER `source`');
|
||||
await ensureColumnExists('messages', 'meta_json', '`meta_json` JSON NULL AFTER `tool_name`');
|
||||
await ensureColumnExists('messages', 'created_at', '`created_at` BIGINT NULL AFTER `tool_name`');
|
||||
await ensureColumnExists('sessions', 'updated_at', '`updated_at` BIGINT NULL AFTER `created_at`');
|
||||
}
|
||||
|
||||
/**
|
||||
* 初始化 MySQL 连接池 + 自动建表
|
||||
*/
|
||||
@@ -50,12 +67,15 @@ async function initialize() {
|
||||
content TEXT NOT NULL,
|
||||
source ENUM('voice_asr', 'voice_bot', 'voice_tool', 'chat_user', 'chat_bot') NOT NULL,
|
||||
tool_name VARCHAR(64),
|
||||
meta_json JSON,
|
||||
created_at BIGINT,
|
||||
INDEX idx_session (session_id),
|
||||
INDEX idx_session_time (session_id, created_at)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
`);
|
||||
|
||||
await migrateSchema();
|
||||
|
||||
console.log(`[DB] MySQL connected: ${dbName}, tables ready`);
|
||||
return pool;
|
||||
}
|
||||
@@ -93,22 +113,23 @@ async function getSession(sessionId) {
|
||||
|
||||
// ==================== Messages ====================
|
||||
|
||||
async function addMessage(sessionId, role, content, source, toolName = null) {
|
||||
async function addMessage(sessionId, role, content, source, toolName = null, meta = null) {
|
||||
if (!content || content.trim() === '') return null;
|
||||
const now = Date.now();
|
||||
const metaJson = meta == null ? null : JSON.stringify(meta);
|
||||
const [result] = await pool.execute(
|
||||
'INSERT INTO messages (session_id, role, content, source, tool_name, created_at) VALUES (?, ?, ?, ?, ?, ?)',
|
||||
[sessionId, role, content, source, toolName, now]
|
||||
'INSERT INTO messages (session_id, role, content, source, tool_name, meta_json, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
|
||||
[sessionId, role, content, source, toolName, metaJson, now]
|
||||
);
|
||||
// 更新 session 时间
|
||||
await pool.execute('UPDATE sessions SET updated_at=? WHERE id=?', [now, sessionId]);
|
||||
return { id: result.insertId, session_id: sessionId, role, content, source, tool_name: toolName, created_at: now };
|
||||
return { id: result.insertId, session_id: sessionId, role, content, source, tool_name: toolName, meta_json: metaJson, created_at: now };
|
||||
}
|
||||
|
||||
async function getMessages(sessionId, limit = 20) {
|
||||
const safeLimit = Math.max(1, Math.min(parseInt(limit) || 20, 100));
|
||||
const [rows] = await pool.query(
|
||||
'SELECT role, content, source, tool_name, created_at FROM messages WHERE session_id=? ORDER BY created_at ASC LIMIT ?',
|
||||
'SELECT role, content, source, tool_name, meta_json, created_at FROM messages WHERE session_id=? ORDER BY created_at ASC LIMIT ?',
|
||||
[sessionId, safeLimit]
|
||||
);
|
||||
return rows;
|
||||
@@ -118,7 +139,7 @@ async function getRecentMessages(sessionId, limit = 20) {
|
||||
// 获取最近 N 条,按时间正序返回
|
||||
const safeLimit = Math.max(1, Math.min(parseInt(limit) || 20, 100));
|
||||
const [rows] = await pool.query(
|
||||
`SELECT role, content, source, tool_name, created_at FROM messages
|
||||
`SELECT role, content, source, tool_name, meta_json, created_at FROM messages
|
||||
WHERE session_id=? ORDER BY created_at DESC LIMIT ?`,
|
||||
[sessionId, safeLimit]
|
||||
);
|
||||
@@ -136,6 +157,38 @@ async function getHistoryForLLM(sessionId, limit = 20) {
|
||||
.map(m => ({ role: m.role, content: m.content }));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取会话列表(按更新时间倒序,带最后一条消息预览)
|
||||
*/
|
||||
async function getSessionList(userId, limit = 50) {
|
||||
const safeLimit = Math.max(1, Math.min(parseInt(limit) || 50, 200));
|
||||
let query;
|
||||
let params;
|
||||
if (userId) {
|
||||
query = `SELECT s.id, s.user_id, s.mode, s.created_at, s.updated_at,
|
||||
(SELECT content FROM messages WHERE session_id = s.id ORDER BY created_at DESC LIMIT 1) AS last_message,
|
||||
(SELECT COUNT(*) FROM messages WHERE session_id = s.id) AS message_count
|
||||
FROM sessions s WHERE s.user_id = ? ORDER BY s.updated_at DESC LIMIT ?`;
|
||||
params = [userId, safeLimit];
|
||||
} else {
|
||||
query = `SELECT s.id, s.user_id, s.mode, s.created_at, s.updated_at,
|
||||
(SELECT content FROM messages WHERE session_id = s.id ORDER BY created_at DESC LIMIT 1) AS last_message,
|
||||
(SELECT COUNT(*) FROM messages WHERE session_id = s.id) AS message_count
|
||||
FROM sessions s ORDER BY s.updated_at DESC LIMIT ?`;
|
||||
params = [safeLimit];
|
||||
}
|
||||
const [rows] = await pool.query(query, params);
|
||||
return rows;
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除会话及其所有消息
|
||||
*/
|
||||
async function deleteSession(sessionId) {
|
||||
await pool.execute('DELETE FROM messages WHERE session_id = ?', [sessionId]);
|
||||
await pool.execute('DELETE FROM sessions WHERE id = ?', [sessionId]);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
initialize,
|
||||
getPool,
|
||||
@@ -146,4 +199,6 @@ module.exports = {
|
||||
getMessages,
|
||||
getRecentMessages,
|
||||
getHistoryForLLM,
|
||||
getSessionList,
|
||||
deleteSession,
|
||||
};
|
||||
|
||||
@@ -1,171 +0,0 @@
|
||||
/**
|
||||
* Copyright 2025 Beijing Volcano Engine Technology Co., Ltd. All Rights Reserved.
|
||||
* SPDX-license-identifier: BSD-3-Clause
|
||||
*
|
||||
* 火山引擎 RTC AccessToken 生成器
|
||||
* 来源:https://github.com/volcengine/rtc-aigc-demo/blob/main/Server/token.js
|
||||
*/
|
||||
|
||||
var crypto = require('crypto');
|
||||
|
||||
var randomInt = Math.floor(Math.random() * 0xFFFFFFFF);
|
||||
|
||||
const VERSION = "001";
|
||||
const VERSION_LENGTH = 3;
|
||||
|
||||
const APP_ID_LENGTH = 24;
|
||||
|
||||
privileges = {
|
||||
PrivPublishStream: 0,
|
||||
|
||||
// not exported, do not use directly
|
||||
privPublishAudioStream: 1,
|
||||
privPublishVideoStream: 2,
|
||||
privPublishDataStream: 3,
|
||||
|
||||
PrivSubscribeStream: 4,
|
||||
};
|
||||
|
||||
|
||||
module.exports.privileges = privileges;
|
||||
|
||||
// Initializes token struct by required parameters.
|
||||
var AccessToken = function (appID, appKey, roomID, userID) {
|
||||
let token = this;
|
||||
this.appID = appID;
|
||||
this.appKey = appKey;
|
||||
this.roomID = roomID;
|
||||
this.userID = userID;
|
||||
this.issuedAt = Math.floor(new Date() / 1000);
|
||||
this.nonce = randomInt;
|
||||
this.expireAt = 0;
|
||||
this.privileges = {};
|
||||
|
||||
// AddPrivilege adds permission for token with an expiration.
|
||||
this.addPrivilege = function (privilege, expireTimestamp) {
|
||||
if (token.privileges === undefined) {
|
||||
token.privileges = {}
|
||||
}
|
||||
token.privileges[privilege] = expireTimestamp;
|
||||
|
||||
if (privilege === privileges.PrivPublishStream) {
|
||||
token.privileges[privileges.privPublishVideoStream] = expireTimestamp;
|
||||
token.privileges[privileges.privPublishAudioStream] = expireTimestamp;
|
||||
token.privileges[privileges.privPublishDataStream] = expireTimestamp;
|
||||
}
|
||||
};
|
||||
|
||||
// ExpireTime sets token expire time, won't expire by default.
|
||||
// The token will be invalid after expireTime no matter what privilege's expireTime is.
|
||||
this.expireTime = function (expireTimestamp) {
|
||||
token.expireAt = expireTimestamp;
|
||||
};
|
||||
|
||||
this.packMsg = function () {
|
||||
var bufM = new ByteBuf();
|
||||
bufM.putUint32(token.nonce);
|
||||
bufM.putUint32(token.issuedAt);
|
||||
bufM.putUint32(token.expireAt);
|
||||
bufM.putString(token.roomID);
|
||||
bufM.putString(token.userID);
|
||||
bufM.putTreeMapUInt32(token.privileges);
|
||||
return bufM.pack()
|
||||
};
|
||||
|
||||
// Serialize generates the token string
|
||||
this.serialize = function () {
|
||||
var bytesM = this.packMsg();
|
||||
|
||||
var signature = encodeHMac(token.appKey, bytesM);
|
||||
var content = new ByteBuf().putBytes(bytesM).putBytes(signature).pack();
|
||||
|
||||
return (VERSION + token.appID + content.toString('base64'));
|
||||
};
|
||||
|
||||
// Verify checks if this token valid, called by server side.
|
||||
this.verify = function (key) {
|
||||
if (token.expireAt > 0 && Math.floor(new Date() / 1000) > token.expireAt) {
|
||||
return false
|
||||
}
|
||||
|
||||
token.appKey = key;
|
||||
return encodeHMac(token.appKey, this.packMsg()).toString() === token.signature;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
module.exports.version = VERSION;
|
||||
module.exports.AccessToken = AccessToken;
|
||||
|
||||
var encodeHMac = function (key, message) {
|
||||
return crypto.createHmac('sha256', key).update(message).digest();
|
||||
};
|
||||
|
||||
var ByteBuf = function () {
|
||||
var that = {
|
||||
buffer: Buffer.alloc(1024)
|
||||
, position: 0
|
||||
};
|
||||
|
||||
|
||||
that.pack = function () {
|
||||
var out = Buffer.alloc(that.position);
|
||||
that.buffer.copy(out, 0, 0, out.length);
|
||||
return out;
|
||||
};
|
||||
|
||||
that.putUint16 = function (v) {
|
||||
that.buffer.writeUInt16LE(v, that.position);
|
||||
that.position += 2;
|
||||
return that;
|
||||
};
|
||||
|
||||
that.putUint32 = function (v) {
|
||||
that.buffer.writeUInt32LE(v, that.position);
|
||||
that.position += 4;
|
||||
return that;
|
||||
};
|
||||
|
||||
that.putBytes = function (bytes) {
|
||||
that.putUint16(bytes.length);
|
||||
bytes.copy(that.buffer, that.position);
|
||||
that.position += bytes.length;
|
||||
return that;
|
||||
};
|
||||
|
||||
that.putString = function (str) {
|
||||
return that.putBytes(Buffer.from(str));
|
||||
};
|
||||
|
||||
that.putTreeMap = function (map) {
|
||||
if (!map) {
|
||||
that.putUint16(0);
|
||||
return that;
|
||||
}
|
||||
|
||||
that.putUint16(Object.keys(map).length);
|
||||
for (var key in map) {
|
||||
that.putUint16(key);
|
||||
that.putString(map[key]);
|
||||
}
|
||||
|
||||
return that;
|
||||
};
|
||||
|
||||
that.putTreeMapUInt32 = function (map) {
|
||||
if (!map) {
|
||||
that.putUint16(0);
|
||||
return that;
|
||||
}
|
||||
|
||||
that.putUint16(Object.keys(map).length);
|
||||
for (var key in map) {
|
||||
that.putUint16(key);
|
||||
that.putUint32(map[key]);
|
||||
}
|
||||
|
||||
return that;
|
||||
};
|
||||
|
||||
return that;
|
||||
};
|
||||
139
test2/server/package-lock.json
generated
139
test2/server/package-lock.json
generated
@@ -14,8 +14,10 @@
|
||||
"crypto-js": "^4.2.0",
|
||||
"dotenv": "^16.6.1",
|
||||
"express": "^4.18.2",
|
||||
"mysql2": "^3.19.1",
|
||||
"node-fetch": "^2.7.0",
|
||||
"uuid": "^9.0.0"
|
||||
"uuid": "^9.0.0",
|
||||
"ws": "^8.19.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
@@ -87,6 +89,7 @@
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/@types/node/-/node-25.3.3.tgz",
|
||||
"integrity": "sha512-DpzbrH7wIcBaJibpKo9nnSQL0MTRdnWttGyE5haGwK86xgMOkFLp7vEyfQPGLOJh5wNYiJ3V9PmUMDhV9u8kkQ==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~7.18.0"
|
||||
}
|
||||
@@ -194,6 +197,15 @@
|
||||
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/aws-ssl-profiles": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/aws-ssl-profiles/-/aws-ssl-profiles-1.1.2.tgz",
|
||||
"integrity": "sha512-NZKeq9AfyQvEeNlN0zSYAaWrmBffJh3IELMZfRpJVWgrpEbtEpnjvzqBPf+mxoI287JohRDoa+/nsfqqiZmF6g==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/axios": {
|
||||
"version": "1.13.5",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/axios/-/axios-1.13.5.tgz",
|
||||
@@ -379,6 +391,15 @@
|
||||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/denque": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/denque/-/denque-2.1.0.tgz",
|
||||
"integrity": "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/depd": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/depd/-/depd-2.0.0.tgz",
|
||||
@@ -626,6 +647,15 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/generate-function": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/generate-function/-/generate-function-2.3.1.tgz",
|
||||
"integrity": "sha512-eeB5GfMNeevm/GRYq20ShmsaGcmI81kIX2K9XQx5miC8KdHaC6Jm0qQ8ZNeGOi7wYB8OsdxKs+Y2oVuTFuVwKQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"is-property": "^1.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/get-intrinsic": {
|
||||
"version": "1.3.0",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
|
||||
@@ -761,6 +791,12 @@
|
||||
"node": ">= 0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/is-property": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/is-property/-/is-property-1.0.2.tgz",
|
||||
"integrity": "sha512-Ks/IoX00TtClbGQr4TWXemAnktAQvYB7HzcCxDGqEZU6oCmb2INHuOoKxbtR+HFkmYWBKv/dOZtGRiAjDhj92g==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/lodash.get": {
|
||||
"version": "4.4.2",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/lodash.get/-/lodash.get-4.4.2.tgz",
|
||||
@@ -774,6 +810,21 @@
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/lru.min": {
|
||||
"version": "1.1.4",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/lru.min/-/lru.min-1.1.4.tgz",
|
||||
"integrity": "sha512-DqC6n3QQ77zdFpCMASA1a3Jlb64Hv2N2DciFGkO/4L9+q/IpIAuRlKOvCXabtRW6cQf8usbmM6BE/TOPysCdIA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"bun": ">=1.0.0",
|
||||
"deno": ">=1.30.0",
|
||||
"node": ">=8.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wellwelwel"
|
||||
}
|
||||
},
|
||||
"node_modules/math-intrinsics": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
|
||||
@@ -849,6 +900,56 @@
|
||||
"integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/mysql2": {
|
||||
"version": "3.19.1",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/mysql2/-/mysql2-3.19.1.tgz",
|
||||
"integrity": "sha512-yn4zh+Uxu5J3Zvi6Ao96lJ7BSBRkspHflWQAmOPND+htbpIKDQw99TTvPzgihKO/QyMickZopO4OsnixnpcUwA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"aws-ssl-profiles": "^1.1.2",
|
||||
"denque": "^2.1.0",
|
||||
"generate-function": "^2.3.1",
|
||||
"iconv-lite": "^0.7.2",
|
||||
"long": "^5.3.2",
|
||||
"lru.min": "^1.1.4",
|
||||
"named-placeholders": "^1.1.6",
|
||||
"sql-escaper": "^1.3.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 8.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/mysql2/node_modules/iconv-lite": {
|
||||
"version": "0.7.2",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/iconv-lite/-/iconv-lite-0.7.2.tgz",
|
||||
"integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"safer-buffer": ">= 2.1.2 < 3.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/express"
|
||||
}
|
||||
},
|
||||
"node_modules/named-placeholders": {
|
||||
"version": "1.1.6",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/named-placeholders/-/named-placeholders-1.1.6.tgz",
|
||||
"integrity": "sha512-Tz09sEL2EEuv5fFowm419c1+a/jSMiBjI9gHxVLrVdbUkkNUUfjsVYs9pVZu5oCon/kmRh9TfLEObFtkVxmY0w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"lru.min": "^1.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/negotiator": {
|
||||
"version": "0.6.3",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/negotiator/-/negotiator-0.6.3.tgz",
|
||||
@@ -1172,6 +1273,21 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/sql-escaper": {
|
||||
"version": "1.3.3",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/sql-escaper/-/sql-escaper-1.3.3.tgz",
|
||||
"integrity": "sha512-BsTCV265VpTp8tm1wyIm1xqQCS+Q9NHx2Sr+WcnUrgLrQ6yiDIvHYJV5gHxsj1lMBy2zm5twLaZao8Jd+S8JJw==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"bun": ">=1.0.0",
|
||||
"deno": ">=2.0.0",
|
||||
"node": ">=12.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/mysqljs/sql-escaper?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/statuses": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/statuses/-/statuses-2.0.2.tgz",
|
||||
@@ -1271,6 +1387,27 @@
|
||||
"webidl-conversions": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.19.0",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/ws/-/ws-8.19.0.tgz",
|
||||
"integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/yocto-queue": {
|
||||
"version": "0.1.0",
|
||||
"resolved": "https://mirrors.huaweicloud.com/repository/npm/yocto-queue/-/yocto-queue-0.1.0.tgz",
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
"crypto-js": "^4.2.0",
|
||||
"dotenv": "^16.6.1",
|
||||
"express": "^4.18.2",
|
||||
"mysql2": "^3.19.1",
|
||||
"node-fetch": "^2.7.0",
|
||||
"uuid": "^9.0.0"
|
||||
"uuid": "^9.0.0",
|
||||
"ws": "^8.19.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,133 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const cozeChatService = require('../services/cozeChatService');
|
||||
const arkChatService = require('../services/arkChatService');
|
||||
const ToolExecutor = require('../services/toolExecutor');
|
||||
const { shouldForceKnowledgeRoute } = require('../services/realtimeDialogRouting');
|
||||
const db = require('../db');
|
||||
|
||||
// 存储文字对话的会话状态(sessionId -> session)
|
||||
const chatSessions = new Map();
|
||||
|
||||
function normalizeAssistantText(text) {
|
||||
return String(text || '')
|
||||
.replace(/\r/g, ' ')
|
||||
.replace(/\n{2,}/g, '。')
|
||||
.replace(/\n/g, ' ')
|
||||
.replace(/。{2,}/g, '。')
|
||||
.replace(/([!?;,])\1+/g, '$1')
|
||||
.replace(/([。!?;,])\s*([。!?;,])/g, '$2')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
async function loadHandoffMessages(sessionId, voiceSubtitles = []) {
|
||||
let voiceMessages = [];
|
||||
try {
|
||||
const dbHistory = await db.getHistoryForLLM(sessionId, 20);
|
||||
if (dbHistory.length > 0) {
|
||||
voiceMessages = dbHistory;
|
||||
console.log(`[Chat] Loaded ${dbHistory.length} messages from DB for session ${sessionId}`);
|
||||
}
|
||||
} catch (e) { console.warn('[DB] getHistoryForLLM failed:', e.message); }
|
||||
|
||||
if (voiceMessages.length === 0 && voiceSubtitles.length > 0) {
|
||||
const recentSubtitles = voiceSubtitles.slice(-10);
|
||||
for (const sub of recentSubtitles) {
|
||||
voiceMessages.push({
|
||||
role: sub.role === 'user' ? 'user' : 'assistant',
|
||||
content: sub.text,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return voiceMessages;
|
||||
}
|
||||
|
||||
async function buildChatSessionState(sessionId, voiceSubtitles = []) {
|
||||
const voiceMessages = await loadHandoffMessages(sessionId, voiceSubtitles);
|
||||
let handoffSummary = '';
|
||||
try {
|
||||
handoffSummary = await arkChatService.summarizeContextForHandoff(voiceMessages, 3);
|
||||
} catch (error) {
|
||||
console.warn('[Chat] summarizeContextForHandoff failed:', error.message);
|
||||
}
|
||||
return {
|
||||
userId: `user_${sessionId.slice(0, 12)}`,
|
||||
conversationId: null,
|
||||
voiceMessages,
|
||||
handoffSummary,
|
||||
handoffSummaryUsed: false,
|
||||
createdAt: Date.now(),
|
||||
lastActiveAt: Date.now(),
|
||||
fromVoice: voiceSubtitles.length > 0 || voiceMessages.length > 0,
|
||||
};
|
||||
}
|
||||
|
||||
function buildInitialContextMessages(session) {
|
||||
const summary = String(session?.handoffSummary || '').trim();
|
||||
const extraMessages = [];
|
||||
if (summary && !session?.handoffSummaryUsed) {
|
||||
extraMessages.push({ role: 'assistant', content: `会话交接摘要:${summary}` });
|
||||
}
|
||||
if (Array.isArray(session?.voiceMessages) && session.voiceMessages.length > 0) {
|
||||
extraMessages.push(...session.voiceMessages.slice(-6));
|
||||
}
|
||||
return extraMessages;
|
||||
}
|
||||
|
||||
async function buildKnowledgeContextMessages(sessionId, session) {
|
||||
const dbHistory = await db.getHistoryForLLM(sessionId, 20).catch(() => []);
|
||||
const summary = String(session?.handoffSummary || '').trim();
|
||||
if (!summary || session?.handoffSummaryUsed) {
|
||||
return dbHistory;
|
||||
}
|
||||
return [
|
||||
{ role: 'assistant', content: `会话交接摘要:${summary}` },
|
||||
...dbHistory,
|
||||
];
|
||||
}
|
||||
|
||||
function extractKnowledgeReply(result) {
|
||||
if (result && result.results && Array.isArray(result.results)) {
|
||||
return result.results.map((item) => item.content || JSON.stringify(item)).join('\n');
|
||||
}
|
||||
if (result && result.error) {
|
||||
return result.error;
|
||||
}
|
||||
return typeof result === 'string' ? result : '';
|
||||
}
|
||||
|
||||
async function tryKnowledgeReply(sessionId, session, message) {
|
||||
const text = String(message || '').trim();
|
||||
if (!text) return null;
|
||||
const context = await buildKnowledgeContextMessages(sessionId, session);
|
||||
if (!shouldForceKnowledgeRoute(text, context)) {
|
||||
return null;
|
||||
}
|
||||
const result = await ToolExecutor.execute('search_knowledge', { query: text }, context);
|
||||
const content = normalizeAssistantText(extractKnowledgeReply(result));
|
||||
if (!content) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
content,
|
||||
meta: {
|
||||
route: 'search_knowledge',
|
||||
original_text: text,
|
||||
tool_name: 'search_knowledge',
|
||||
tool_args: { query: text },
|
||||
source: result?.source || null,
|
||||
original_query: result?.original_query || text,
|
||||
rewritten_query: result?.rewritten_query || null,
|
||||
hit: typeof result?.hit === 'boolean' ? result.hit : null,
|
||||
reason: result?.reason || null,
|
||||
error_type: result?.errorType || null,
|
||||
latency_ms: result?.latency_ms || null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/chat/start
|
||||
* 创建文字对话会话,可选传入语音通话的历史字幕
|
||||
@@ -21,46 +143,21 @@ router.post('/start', async (req, res) => {
|
||||
return res.status(500).json({ success: false, error: 'Coze 智能体未配置,请设置 COZE_API_TOKEN 和 COZE_BOT_ID' });
|
||||
}
|
||||
|
||||
// 优先从数据库加载完整历史(包含语音通话中的工具结果等)
|
||||
let voiceMessages = [];
|
||||
try {
|
||||
const dbHistory = await db.getHistoryForLLM(sessionId, 20);
|
||||
if (dbHistory.length > 0) {
|
||||
voiceMessages = dbHistory;
|
||||
console.log(`[Chat] Loaded ${dbHistory.length} messages from DB for session ${sessionId}`);
|
||||
}
|
||||
} catch (e) { console.warn('[DB] getHistoryForLLM failed:', e.message); }
|
||||
|
||||
// 如果数据库没有历史,回退到 voiceSubtitles
|
||||
if (voiceMessages.length === 0 && voiceSubtitles.length > 0) {
|
||||
const recentSubtitles = voiceSubtitles.slice(-10);
|
||||
for (const sub of recentSubtitles) {
|
||||
voiceMessages.push({
|
||||
role: sub.role === 'user' ? 'user' : 'assistant',
|
||||
content: sub.text,
|
||||
});
|
||||
}
|
||||
}
|
||||
const sessionState = await buildChatSessionState(sessionId, voiceSubtitles);
|
||||
|
||||
// 更新数据库会话模式为 chat
|
||||
try { await db.createSession(sessionId, `user_${sessionId.slice(0, 12)}`, 'chat'); } catch (e) {}
|
||||
|
||||
chatSessions.set(sessionId, {
|
||||
userId: `user_${sessionId.slice(0, 12)}`,
|
||||
conversationId: null,
|
||||
voiceMessages,
|
||||
createdAt: Date.now(),
|
||||
fromVoice: voiceSubtitles.length > 0 || voiceMessages.length > 0,
|
||||
});
|
||||
chatSessions.set(sessionId, sessionState);
|
||||
|
||||
console.log(`[Chat] Session started: ${sessionId}, fromVoice: ${voiceSubtitles.length > 0}, voiceMessages: ${voiceMessages.length}`);
|
||||
console.log(`[Chat] Session started: ${sessionId}, fromVoice: ${sessionState.fromVoice}, voiceMessages: ${sessionState.voiceMessages.length}, summary: ${sessionState.handoffSummary ? 'yes' : 'no'}`);
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
sessionId,
|
||||
messageCount: voiceMessages.length,
|
||||
fromVoice: voiceSubtitles.length > 0 || voiceMessages.length > 0,
|
||||
messageCount: sessionState.voiceMessages.length,
|
||||
fromVoice: sessionState.fromVoice,
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -81,23 +178,30 @@ router.post('/send', async (req, res) => {
|
||||
|
||||
// 自动创建会话(如果不存在)
|
||||
if (!session) {
|
||||
session = {
|
||||
userId: `user_${sessionId.slice(0, 12)}`,
|
||||
conversationId: null,
|
||||
voiceMessages: [],
|
||||
createdAt: Date.now(),
|
||||
fromVoice: false,
|
||||
};
|
||||
session = await buildChatSessionState(sessionId, []);
|
||||
chatSessions.set(sessionId, session);
|
||||
}
|
||||
session.lastActiveAt = Date.now();
|
||||
|
||||
console.log(`[Chat] User(${sessionId}): ${message}`);
|
||||
|
||||
// 写入数据库:用户消息
|
||||
db.addMessage(sessionId, 'user', message, 'chat_user').catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
|
||||
const knowledgeReply = await tryKnowledgeReply(sessionId, session, message);
|
||||
if (knowledgeReply) {
|
||||
session.handoffSummaryUsed = true;
|
||||
db.addMessage(sessionId, 'assistant', knowledgeReply.content, 'chat_bot', 'search_knowledge', knowledgeReply.meta).catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
return res.json({
|
||||
success: true,
|
||||
data: {
|
||||
content: knowledgeReply.content,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// 首次对话时注入语音历史作为上下文,之后 Coze 自动管理会话历史
|
||||
const extraMessages = !session.conversationId ? session.voiceMessages : [];
|
||||
const extraMessages = !session.conversationId ? buildInitialContextMessages(session) : [];
|
||||
|
||||
const result = await cozeChatService.chat(
|
||||
session.userId,
|
||||
@@ -105,21 +209,23 @@ router.post('/send', async (req, res) => {
|
||||
session.conversationId,
|
||||
extraMessages
|
||||
);
|
||||
const normalizedContent = normalizeAssistantText(result.content);
|
||||
|
||||
// 保存 Coze 返回的 conversationId
|
||||
session.conversationId = result.conversationId;
|
||||
session.handoffSummaryUsed = true;
|
||||
|
||||
console.log(`[Chat] Assistant(${sessionId}): ${result.content?.substring(0, 100)}`);
|
||||
console.log(`[Chat] Assistant(${sessionId}): ${normalizedContent?.substring(0, 100)}`);
|
||||
|
||||
// 写入数据库:AI 回复
|
||||
if (result.content) {
|
||||
db.addMessage(sessionId, 'assistant', result.content, 'chat_bot').catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
if (normalizedContent) {
|
||||
db.addMessage(sessionId, 'assistant', normalizedContent, 'chat_bot').catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
}
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
content: result.content,
|
||||
content: normalizedContent,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
@@ -160,15 +266,10 @@ router.post('/send-stream', async (req, res) => {
|
||||
|
||||
let session = chatSessions.get(sessionId);
|
||||
if (!session) {
|
||||
session = {
|
||||
userId: `user_${sessionId.slice(0, 12)}`,
|
||||
conversationId: null,
|
||||
voiceMessages: [],
|
||||
createdAt: Date.now(),
|
||||
fromVoice: false,
|
||||
};
|
||||
session = await buildChatSessionState(sessionId, []);
|
||||
chatSessions.set(sessionId, session);
|
||||
}
|
||||
session.lastActiveAt = Date.now();
|
||||
|
||||
console.log(`[Chat][SSE] User(${sessionId}): ${message}`);
|
||||
|
||||
@@ -182,9 +283,17 @@ router.post('/send-stream', async (req, res) => {
|
||||
res.setHeader('X-Accel-Buffering', 'no');
|
||||
res.flushHeaders();
|
||||
|
||||
const knowledgeReply = await tryKnowledgeReply(sessionId, session, message);
|
||||
if (knowledgeReply) {
|
||||
session.handoffSummaryUsed = true;
|
||||
db.addMessage(sessionId, 'assistant', knowledgeReply.content, 'chat_bot', 'search_knowledge', knowledgeReply.meta).catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
res.write(`data: ${JSON.stringify({ type: 'done', content: knowledgeReply.content })}\n\n`);
|
||||
return res.end();
|
||||
}
|
||||
|
||||
try {
|
||||
// 首次对话时注入语音历史作为上下文
|
||||
const extraMessages = !session.conversationId ? session.voiceMessages : [];
|
||||
const extraMessages = !session.conversationId ? buildInitialContextMessages(session) : [];
|
||||
|
||||
const result = await cozeChatService.chatStream(
|
||||
session.userId,
|
||||
@@ -198,17 +307,19 @@ router.post('/send-stream', async (req, res) => {
|
||||
onDone: () => {},
|
||||
}
|
||||
);
|
||||
const normalizedContent = normalizeAssistantText(result.content);
|
||||
|
||||
// 保存 Coze 返回的 conversationId
|
||||
session.conversationId = result.conversationId;
|
||||
console.log(`[Chat][SSE] Assistant(${sessionId}): ${result.content?.substring(0, 100)}`);
|
||||
session.handoffSummaryUsed = true;
|
||||
console.log(`[Chat][SSE] Assistant(${sessionId}): ${normalizedContent?.substring(0, 100)}`);
|
||||
|
||||
// 写入数据库:AI 回复
|
||||
if (result.content) {
|
||||
db.addMessage(sessionId, 'assistant', result.content, 'chat_bot').catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
if (normalizedContent) {
|
||||
db.addMessage(sessionId, 'assistant', normalizedContent, 'chat_bot').catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
}
|
||||
|
||||
res.write(`data: ${JSON.stringify({ type: 'done', content: result.content })}\n\n`);
|
||||
res.write(`data: ${JSON.stringify({ type: 'done', content: normalizedContent })}\n\n`);
|
||||
res.end();
|
||||
} catch (error) {
|
||||
console.error('[Chat][SSE] Stream failed:', error.message);
|
||||
@@ -231,7 +342,7 @@ setInterval(() => {
|
||||
const now = Date.now();
|
||||
const TTL = 30 * 60 * 1000;
|
||||
for (const [id, session] of chatSessions) {
|
||||
if (now - session.createdAt > TTL) {
|
||||
if (now - (session.lastActiveAt || session.createdAt) > TTL) {
|
||||
chatSessions.delete(id);
|
||||
console.log(`[Chat] Session expired and cleaned: ${id}`);
|
||||
}
|
||||
|
||||
@@ -2,6 +2,48 @@ const express = require('express');
|
||||
const router = express.Router();
|
||||
const db = require('../db');
|
||||
|
||||
/**
|
||||
* GET /api/session/list
|
||||
* 获取会话列表(按更新时间倒序,带最后一条消息预览)
|
||||
*/
|
||||
router.get('/list', async (req, res) => {
|
||||
try {
|
||||
const userId = req.query.userId || null;
|
||||
const limit = parseInt(req.query.limit) || 50;
|
||||
const sessions = await db.getSessionList(userId, limit);
|
||||
res.json({
|
||||
success: true,
|
||||
data: sessions.map((s) => ({
|
||||
id: s.id,
|
||||
userId: s.user_id,
|
||||
mode: s.mode,
|
||||
createdAt: s.created_at,
|
||||
updatedAt: s.updated_at,
|
||||
lastMessage: s.last_message ? (s.last_message.length > 60 ? s.last_message.slice(0, 60) + '...' : s.last_message) : null,
|
||||
messageCount: parseInt(s.message_count) || 0,
|
||||
})),
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[Session] List failed:', err.message);
|
||||
res.status(500).json({ success: false, error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* DELETE /api/session/:id
|
||||
* 删除会话及其所有消息
|
||||
*/
|
||||
router.delete('/:id', async (req, res) => {
|
||||
try {
|
||||
const { id } = req.params;
|
||||
await db.deleteSession(id);
|
||||
res.json({ success: true });
|
||||
} catch (err) {
|
||||
console.error('[Session] Delete failed:', err.message);
|
||||
res.status(500).json({ success: false, error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /api/session/:id/history
|
||||
* 获取会话完整历史(用于文字↔语音切换时加载上下文)
|
||||
|
||||
@@ -1,20 +1,11 @@
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const { v4: uuidv4 } = require('uuid');
|
||||
const volcengine = require('../services/volcengine');
|
||||
const VoiceChatConfigBuilder = require('../config/voiceChatConfig');
|
||||
const ToolExecutor = require('../services/toolExecutor');
|
||||
const DEFAULT_TOOLS = require('../config/tools');
|
||||
const db = require('../db');
|
||||
|
||||
const activeSessions = new Map();
|
||||
const completedSessions = new Map();
|
||||
const roomToBotUserId = new Map();
|
||||
const roomToHumanUserId = new Map();
|
||||
const roomToSessionId = new Map();
|
||||
const roomToTaskId = new Map();
|
||||
const latestUserSpeech = new Map();
|
||||
const toolCallBuffers = new Map();
|
||||
const directSessions = new Map();
|
||||
|
||||
router.get('/config', (req, res) => {
|
||||
res.json({
|
||||
@@ -44,516 +35,133 @@ router.get('/config', (req, res) => {
|
||||
});
|
||||
});
|
||||
|
||||
router.post('/prepare', async (req, res) => {
|
||||
router.post('/direct/session', async (req, res) => {
|
||||
try {
|
||||
const { userId } = req.body;
|
||||
if (!userId) {
|
||||
return res.status(400).json({ success: false, error: 'userId is required' });
|
||||
}
|
||||
const sessionId = uuidv4();
|
||||
const roomId = `room_${sessionId.slice(0, 8)}`;
|
||||
const taskId = `task_${sessionId.slice(0, 8)}_${Date.now()}`;
|
||||
const rtcToken = volcengine.generateRTCToken(roomId, userId);
|
||||
activeSessions.set(sessionId, {
|
||||
roomId,
|
||||
taskId,
|
||||
userId,
|
||||
const { userId, sessionId } = req.body || {};
|
||||
const sid = sessionId || uuidv4();
|
||||
const directSession = {
|
||||
sessionId: sid,
|
||||
userId: userId || null,
|
||||
startTime: Date.now(),
|
||||
subtitles: [],
|
||||
started: false,
|
||||
direct: true,
|
||||
};
|
||||
directSessions.set(sid, directSession);
|
||||
await db.createSession(sid, userId || null, 'voice');
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
sessionId: sid,
|
||||
userId: userId || null,
|
||||
},
|
||||
});
|
||||
roomToTaskId.set(roomId, taskId);
|
||||
roomToSessionId.set(roomId, sessionId);
|
||||
console.log(`[Voice] Session prepared: ${sessionId}, room: ${roomId}, user: ${userId}`);
|
||||
try { await db.createSession(sessionId, userId, 'voice'); } catch (e) { console.warn('[DB] createSession failed:', e.message); }
|
||||
} catch (error) {
|
||||
console.error('[DirectVoice] Create session failed:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/direct/message', async (req, res) => {
|
||||
try {
|
||||
const { sessionId, role, text, source, toolName } = req.body || {};
|
||||
if (!sessionId || !text || !source) {
|
||||
return res.status(400).json({ success: false, error: 'sessionId, text and source are required' });
|
||||
}
|
||||
await db.addMessage(sessionId, role === 'user' ? 'user' : 'assistant', text, source, toolName || null);
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
console.error('[DirectVoice] Add message failed:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/diag', (req, res) => {
|
||||
try {
|
||||
const { sessionId, roomId, type, payload } = req.body || {};
|
||||
console.log(`[Diag] type=${type || 'unknown'} session=${sessionId || '-'} room=${roomId || '-'} payload=${JSON.stringify(payload || {})}`);
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
console.error('[Diag] Error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/direct/query', async (req, res) => {
|
||||
try {
|
||||
const { sessionId, query, appendUserMessage } = req.body || {};
|
||||
if (!sessionId) {
|
||||
return res.status(400).json({ success: false, error: 'sessionId is required' });
|
||||
}
|
||||
const context = await db.getHistoryForLLM(sessionId, 20).catch(() => []);
|
||||
const cleanQuery = (query || '').trim();
|
||||
if (appendUserMessage && cleanQuery) {
|
||||
await db.addMessage(sessionId, 'user', cleanQuery, 'voice_asr').catch(() => null);
|
||||
}
|
||||
const result = await ToolExecutor.execute('search_knowledge', { query: cleanQuery }, context);
|
||||
let contentText = JSON.stringify(result);
|
||||
if (result && result.results && Array.isArray(result.results)) {
|
||||
contentText = result.results.map((item) => item.content || JSON.stringify(item)).join('\n');
|
||||
} else if (result && result.error) {
|
||||
contentText = result.error;
|
||||
} else if (typeof result === 'string') {
|
||||
contentText = result;
|
||||
}
|
||||
const ragItems = result && result.results && Array.isArray(result.results) && result.results.length > 0
|
||||
? result.results.map((item) => ({
|
||||
title: item.title || '知识库结果',
|
||||
content: item.content || JSON.stringify(item),
|
||||
}))
|
||||
: [{
|
||||
title: '知识库结果',
|
||||
content: contentText,
|
||||
}];
|
||||
await db.addMessage(sessionId, 'assistant', contentText, 'voice_tool', 'search_knowledge', {
|
||||
route: 'search_knowledge',
|
||||
original_text: cleanQuery,
|
||||
tool_name: 'search_knowledge',
|
||||
tool_args: { query: cleanQuery },
|
||||
source: result?.source || null,
|
||||
original_query: result?.original_query || cleanQuery,
|
||||
rewritten_query: result?.rewritten_query || null,
|
||||
hit: typeof result?.hit === 'boolean' ? result.hit : null,
|
||||
reason: result?.reason || null,
|
||||
error_type: result?.errorType || null,
|
||||
latency_ms: result?.latency_ms || null,
|
||||
}).catch(() => null);
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
sessionId,
|
||||
roomId,
|
||||
taskId,
|
||||
rtcToken,
|
||||
rtcAppId: process.env.VOLC_RTC_APP_ID,
|
||||
query: cleanQuery,
|
||||
contentText,
|
||||
ragItems,
|
||||
ragJson: JSON.stringify(ragItems),
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[Voice] Prepare failed:', error.message);
|
||||
console.error('[DirectVoice] Query failed:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/start', async (req, res) => {
|
||||
let session = null;
|
||||
router.post('/direct/stop', async (req, res) => {
|
||||
try {
|
||||
const {
|
||||
sessionId,
|
||||
botName,
|
||||
systemRole,
|
||||
speakingStyle,
|
||||
modelVersion,
|
||||
speaker,
|
||||
enableWebSearch,
|
||||
chatHistory,
|
||||
} = req.body;
|
||||
const { sessionId } = req.body || {};
|
||||
if (!sessionId) {
|
||||
return res.status(400).json({ success: false, error: 'sessionId is required' });
|
||||
}
|
||||
session = activeSessions.get(sessionId);
|
||||
if (!session) {
|
||||
return res.status(404).json({ success: false, error: 'Session not found' });
|
||||
}
|
||||
if (session.started) {
|
||||
return res.json({ success: true, data: { message: 'Already started' } });
|
||||
}
|
||||
let effectiveChatHistory = chatHistory;
|
||||
if ((!chatHistory || chatHistory.length === 0) && sessionId) {
|
||||
try {
|
||||
const dbHistory = await db.getHistoryForLLM(sessionId, 20);
|
||||
if (dbHistory.length > 0) {
|
||||
effectiveChatHistory = dbHistory;
|
||||
console.log(`[Voice] Loaded ${dbHistory.length} messages from DB for session ${sessionId}`);
|
||||
}
|
||||
} catch (e) { console.warn('[DB] getHistoryForLLM failed:', e.message); }
|
||||
}
|
||||
console.log(`[Voice] chatHistory: ${effectiveChatHistory ? effectiveChatHistory.length : 'undefined'} messages`);
|
||||
const { config, botUserId } = VoiceChatConfigBuilder.build({
|
||||
roomId: session.roomId,
|
||||
taskId: session.taskId,
|
||||
userId: session.userId,
|
||||
botName,
|
||||
systemRole,
|
||||
speakingStyle,
|
||||
modelVersion,
|
||||
speaker,
|
||||
tools: DEFAULT_TOOLS,
|
||||
enableWebSearch,
|
||||
chatHistory: effectiveChatHistory,
|
||||
});
|
||||
session.botUserId = botUserId;
|
||||
roomToBotUserId.set(session.roomId, botUserId);
|
||||
roomToHumanUserId.set(session.roomId, session.userId);
|
||||
console.log(`[Voice] room=${session.roomId} botUserId=${botUserId} humanUserId=${session.userId}`);
|
||||
const result = await volcengine.startVoiceChat(config);
|
||||
session.started = true;
|
||||
// 捕获服务端可能分配的不同 TaskId
|
||||
const serverTaskId = result?.Result?.TaskId || result?.Result?.task_id;
|
||||
if (serverTaskId && serverTaskId !== session.taskId) {
|
||||
console.log(`[Voice] Server assigned different TaskId: ${serverTaskId} (ours: ${session.taskId})`);
|
||||
roomToTaskId.set(session.roomId, serverTaskId);
|
||||
session.taskId = serverTaskId;
|
||||
}
|
||||
console.log(`[Voice] Session started: ${sessionId}, TaskId=${session.taskId}`);
|
||||
directSessions.delete(sessionId);
|
||||
const messages = await db.getMessages(sessionId).catch(() => []);
|
||||
res.json({
|
||||
success: true,
|
||||
data: { startResult: result },
|
||||
data: {
|
||||
sessionId,
|
||||
messageCount: messages.length,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
const detail = error.response?.data || error.message;
|
||||
console.error('[Voice] Start failed:', JSON.stringify(detail, null, 2));
|
||||
if (session) {
|
||||
try {
|
||||
await volcengine.stopVoiceChat({
|
||||
AppId: process.env.VOLC_RTC_APP_ID,
|
||||
RoomId: session.roomId,
|
||||
TaskId: session.taskId,
|
||||
});
|
||||
console.log(`[Voice] Stopped failed session`);
|
||||
} catch (stopErr) {
|
||||
console.warn('[Voice] Stop failed during error handling:', stopErr.message);
|
||||
}
|
||||
}
|
||||
res.status(500).json({ success: false, error: typeof detail === 'string' ? detail : JSON.stringify(detail) });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/stop', async (req, res) => {
|
||||
try {
|
||||
const { sessionId } = req.body;
|
||||
const session = activeSessions.get(sessionId);
|
||||
if (session) {
|
||||
await volcengine.stopVoiceChat({
|
||||
AppId: process.env.VOLC_RTC_APP_ID,
|
||||
RoomId: session.roomId,
|
||||
TaskId: session.taskId,
|
||||
});
|
||||
const duration = Math.floor((Date.now() - session.startTime) / 1000);
|
||||
console.log(`[Voice] Session stopped: ${sessionId}, duration: ${duration}s, subtitles: ${session.subtitles.length}`);
|
||||
if (session.subtitles.length > 0) {
|
||||
completedSessions.set(sessionId, {
|
||||
subtitles: session.subtitles,
|
||||
duration,
|
||||
endTime: Date.now(),
|
||||
});
|
||||
setTimeout(() => completedSessions.delete(sessionId), 30 * 60 * 1000);
|
||||
}
|
||||
activeSessions.delete(sessionId);
|
||||
roomToTaskId.delete(session.roomId);
|
||||
roomToSessionId.delete(session.roomId);
|
||||
roomToBotUserId.delete(session.roomId);
|
||||
roomToHumanUserId.delete(session.roomId);
|
||||
res.json({
|
||||
success: true,
|
||||
data: {
|
||||
duration,
|
||||
subtitleCount: session.subtitles.length,
|
||||
subtitles: session.subtitles,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
res.json({ success: true, data: { message: 'Session not found or already stopped' } });
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Voice] Stop failed:', error.message);
|
||||
console.error('[DirectVoice] Stop session failed:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/subtitle', (req, res) => {
|
||||
try {
|
||||
const { sessionId, roomId, text, role, definite, sequence } = req.body;
|
||||
const session = activeSessions.get(sessionId);
|
||||
if (definite && text) {
|
||||
const subtitleRole = role === 'user' ? 'user' : 'assistant';
|
||||
if (session) {
|
||||
session.subtitles.push({ text, role: subtitleRole, timestamp: Date.now(), sequence });
|
||||
}
|
||||
const sid = sessionId || (session && roomToSessionId.get(session.roomId));
|
||||
if (sid) {
|
||||
const source = subtitleRole === 'user' ? 'voice_asr' : 'voice_bot';
|
||||
db.addMessage(sid, subtitleRole, text, source).catch(e => console.warn('[DB] addMessage failed:', e.message));
|
||||
}
|
||||
if (subtitleRole === 'user') {
|
||||
const rid = roomId || (session && session.roomId) || '';
|
||||
if (rid) {
|
||||
latestUserSpeech.set(rid, { text, timestamp: Date.now() });
|
||||
console.log(`[Subtitle][user][${rid}] "${text}"`);
|
||||
}
|
||||
} else {
|
||||
console.log(`[Subtitle][assistant] ${text}`);
|
||||
}
|
||||
}
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
console.error('[Subtitle] Error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.get('/subtitles/:sessionId', (req, res) => {
|
||||
const session = activeSessions.get(req.params.sessionId);
|
||||
res.json({
|
||||
success: true,
|
||||
data: session ? session.subtitles : [],
|
||||
});
|
||||
});
|
||||
|
||||
function extractReadableText(chunks) {
|
||||
const raw = chunks.join('');
|
||||
let decoded = raw;
|
||||
try {
|
||||
decoded = decoded.replace(/\\\\u([0-9a-fA-F]{4})/g, (_, hex) => {
|
||||
return String.fromCharCode(parseInt(hex, 16));
|
||||
});
|
||||
decoded = decoded.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => {
|
||||
return String.fromCharCode(parseInt(hex, 16));
|
||||
});
|
||||
} catch (e) { }
|
||||
const chineseChars = decoded.match(/[\u4e00-\u9fff\u3400-\u4dbf]+/g) || [];
|
||||
const skipWords = new Set(['id', 'type', 'function', 'name', 'arguments', 'query', 'object', 'string']);
|
||||
const englishWords = (decoded.match(/[a-zA-Z]{2,}/g) || [])
|
||||
.filter(w => !skipWords.has(w.toLowerCase()));
|
||||
const parts = [...chineseChars, ...englishWords];
|
||||
const result = parts.join(' ').trim();
|
||||
console.log(`[FC] extractReadableText: chinese=[${chineseChars.join(',')}] english=[${englishWords.join(',')}] → "${result}"`);
|
||||
return result;
|
||||
}
|
||||
|
||||
let fcCallbackSeq = 0;
|
||||
router.post('/fc_callback', async (req, res) => {
|
||||
try {
|
||||
const body = req.body;
|
||||
if (!body || typeof body !== 'object' || Object.keys(body).length === 0) {
|
||||
console.error('[FC] Empty body');
|
||||
return res.status(400).json({ success: false, error: 'Empty body' });
|
||||
}
|
||||
const { Message, Signature, Type, RoomID, TaskID, TaskType, AppID, AppId, room_id, task_id, roomId, taskId } = body;
|
||||
const effectiveRoomId = RoomID || room_id || roomId;
|
||||
const effectiveTaskId = TaskID || task_id || taskId;
|
||||
const effectiveAppId = AppID || AppId || process.env.VOLC_RTC_APP_ID;
|
||||
const seq = body._seq || ++fcCallbackSeq;
|
||||
console.log(`[FC] >>> Callback received: seq=${seq} Type="${Type}" Room=${effectiveRoomId} Task=${effectiveTaskId} TaskType=${TaskType}`);
|
||||
let msgObj = null;
|
||||
try {
|
||||
msgObj = typeof Message === 'string' ? JSON.parse(Message) : Message;
|
||||
} catch (e) {
|
||||
console.error('[FC] Failed to parse Message:', e.message);
|
||||
return res.json({ success: true });
|
||||
}
|
||||
|
||||
if (Type === 'tool_calls' && Array.isArray(msgObj) && msgObj.length > 0) {
|
||||
const tc = msgObj[0];
|
||||
const chunkId = tc.id || '';
|
||||
const chunkName = tc.function?.name || '';
|
||||
const chunkArgs = tc.function?.arguments || '';
|
||||
const existing = toolCallBuffers.get(effectiveTaskId);
|
||||
|
||||
if (existing && existing.triggered) {
|
||||
const userSpeech = latestUserSpeech.get(effectiveRoomId);
|
||||
const hasNewInput = userSpeech && (Date.now() - userSpeech.timestamp < 10000);
|
||||
if (hasNewInput) {
|
||||
console.log(`[FC] [FormatA] New user input detected, clearing cooldown for room=${effectiveRoomId}`);
|
||||
toolCallBuffers.delete(effectiveTaskId);
|
||||
} else {
|
||||
// 扩展 cooldown 到 30 秒,防止 LLM 在 KB 查询期间无限重试
|
||||
const cooldownMs = existing.resultSentAt ? 30000 : 15000;
|
||||
const elapsed = existing.resultSentAt
|
||||
? (Date.now() - existing.resultSentAt)
|
||||
: (Date.now() - existing.createdAt);
|
||||
if (elapsed < cooldownMs) {
|
||||
console.log(`[FC] [FormatA] Cooldown active (${elapsed}ms < ${cooldownMs}ms), ignoring retry for TaskID=${effectiveTaskId}`);
|
||||
res.json({ success: true });
|
||||
return;
|
||||
}
|
||||
console.log(`[FC] [FormatA] Cooldown expired (${elapsed}ms >= ${cooldownMs}ms), allowing new call for TaskID=${effectiveTaskId}`);
|
||||
toolCallBuffers.delete(effectiveTaskId);
|
||||
}
|
||||
}
|
||||
|
||||
if (!toolCallBuffers.has(effectiveTaskId)) {
|
||||
toolCallBuffers.set(effectiveTaskId, {
|
||||
id: '', name: '', chunks: [], triggered: false,
|
||||
RoomID: effectiveRoomId, AppID: effectiveAppId, S2STaskID: effectiveTaskId, createdAt: Date.now(), timer: null,
|
||||
});
|
||||
console.log(`[FC] [FormatA] New buffer created for TaskID=${effectiveTaskId}, room=${effectiveRoomId}`);
|
||||
}
|
||||
|
||||
const buf = toolCallBuffers.get(effectiveTaskId);
|
||||
if (chunkId && !buf.id) buf.id = chunkId;
|
||||
if (chunkName && !buf.name) buf.name = chunkName;
|
||||
if (chunkArgs) {
|
||||
buf.chunks.push({ seq: tc.seq || 0, args: chunkArgs });
|
||||
}
|
||||
|
||||
res.json({ success: true });
|
||||
|
||||
if (buf.timer) clearTimeout(buf.timer);
|
||||
buf.timer = setTimeout(async () => { // 500ms 收集 chunks
|
||||
const b = toolCallBuffers.get(effectiveTaskId);
|
||||
if (!b || b.triggered) return;
|
||||
b.triggered = true;
|
||||
const toolName = b.name || 'search_knowledge';
|
||||
const sortedChunks = b.chunks.sort((a, b) => a.seq - b.seq);
|
||||
const allArgs = sortedChunks.map(c => c.args).join('');
|
||||
console.log(`[FC] [FormatA] 500ms timeout, ${b.chunks.length} chunks collected, name="${toolName}"`);
|
||||
|
||||
const s2sTaskId = roomToTaskId.get(b.RoomID) || b.S2STaskID || effectiveTaskId;
|
||||
console.log(`[FC] TaskId resolution: roomToTaskId=${roomToTaskId.get(b.RoomID)} callback=${b.S2STaskID} → using=${s2sTaskId}`);
|
||||
// 不再单独发 interrupt 命令,ExternalTextToSpeech 的 InterruptMode:1 已包含打断功能
|
||||
|
||||
let parsedArgs = null;
|
||||
try {
|
||||
parsedArgs = JSON.parse(allArgs);
|
||||
console.log(`[FC] [FormatA] JSON.parse succeeded: ${JSON.stringify(parsedArgs)}`);
|
||||
} catch (e) {
|
||||
const userSpeech = latestUserSpeech.get(b.RoomID);
|
||||
if (userSpeech && (Date.now() - userSpeech.timestamp < 30000)) {
|
||||
console.log(`[FC] [FormatA] Using ASR user speech: "${userSpeech.text}"`);
|
||||
parsedArgs = { query: userSpeech.text };
|
||||
} else {
|
||||
const extractedText = extractReadableText(b.chunks.map(c => c.args));
|
||||
console.log(`[FC] [FormatA] No ASR text, extracted from chunks: "${extractedText}"`);
|
||||
parsedArgs = { query: extractedText || '' };
|
||||
}
|
||||
}
|
||||
|
||||
console.log('[FC] ⚡ Starting KB query (no pre-query interrupt)');
|
||||
const kbResult = await ToolExecutor.execute(toolName, parsedArgs);
|
||||
|
||||
try {
|
||||
const result = kbResult;
|
||||
const resultStr = JSON.stringify(result);
|
||||
console.log(`[FC] Tool result (${toolName}): ${resultStr.substring(0, 500)}`);
|
||||
let contentText = resultStr;
|
||||
try {
|
||||
if (result && result.results && Array.isArray(result.results)) {
|
||||
contentText = result.results.map(r => r.content || JSON.stringify(r)).join('\n');
|
||||
} else if (result && result.error) {
|
||||
contentText = result.error;
|
||||
} else if (typeof result === 'string') {
|
||||
contentText = result;
|
||||
}
|
||||
} catch (e) { }
|
||||
const dbSessionId = roomToSessionId.get(b.RoomID);
|
||||
if (dbSessionId) {
|
||||
db.addMessage(dbSessionId, 'assistant', contentText, 'voice_tool', toolName)
|
||||
.catch(e => console.warn('[DB] addMessage(tool) failed:', e.message));
|
||||
}
|
||||
console.log(`[FC] Knowledge base content (${contentText.length} chars): ${contentText.substring(0, 200)}${contentText.length > 200 ? '...' : ''}`);
|
||||
b.resultSentAt = Date.now();
|
||||
|
||||
// === 策略:只用 Command:function 回传结果给 LLM ===
|
||||
// 根因分析:
|
||||
// 1. ExternalTextToSpeech 在 S2S 端到端模式下不产生可听见的音频(API返回ok但无声音)
|
||||
// 2. ExternalTextToSpeech InterruptMode=1 会打断正在播放的 S2S 回复,导致用户听到中断
|
||||
// 3. Command:function 是官方自定义 FC 模式的正确回传方式
|
||||
// 流程:Command:function → LLM 收到工具结果 → LLM 生成回复 → S2S 朗读
|
||||
const toolCallId = b.id || 'unknown_call_id';
|
||||
const functionContent = contentText.length > 1500
|
||||
? contentText.substring(0, 1500) + '……(内容较长,以上为主要部分)'
|
||||
: contentText;
|
||||
const funcMsg = JSON.stringify({
|
||||
ToolCallID: toolCallId,
|
||||
Content: functionContent,
|
||||
});
|
||||
|
||||
let activeTaskId = s2sTaskId;
|
||||
try {
|
||||
console.log(`[FC] ★ Sending Command:function (ToolCallID=${toolCallId}, content=${functionContent.length} chars)`);
|
||||
await volcengine.updateVoiceChat({
|
||||
AppId: effectiveAppId,
|
||||
RoomId: b.RoomID,
|
||||
TaskId: activeTaskId,
|
||||
Command: 'function',
|
||||
Message: funcMsg,
|
||||
});
|
||||
console.log('[FC] ✅ Command:function sent OK → LLM will generate S2S response with KB content');
|
||||
} catch (funcErr) {
|
||||
console.error('[FC] ✖ Command:function failed:', funcErr.message);
|
||||
// 如果正式 TaskId 失败,尝试回调 TaskId
|
||||
if (activeTaskId !== b.S2STaskID) {
|
||||
try {
|
||||
console.log(`[FC] Retrying Command:function with callback TaskID=${b.S2STaskID}`);
|
||||
activeTaskId = b.S2STaskID;
|
||||
await volcengine.updateVoiceChat({
|
||||
AppId: effectiveAppId,
|
||||
RoomId: b.RoomID,
|
||||
TaskId: activeTaskId,
|
||||
Command: 'function',
|
||||
Message: funcMsg,
|
||||
});
|
||||
console.log('[FC] ✅ Command:function retry OK');
|
||||
} catch (retryErr) {
|
||||
console.error('[FC] ✖ Command:function retry also failed:', retryErr.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(`[FC] Final result: Command:function sent (${functionContent.length} chars)`);
|
||||
} catch (err) {
|
||||
console.error(`[FC] Tool execution failed:`, err.message);
|
||||
console.error(`[FC] Error details:`, err);
|
||||
}
|
||||
}, 500); // 从1s减到500ms,减少等待
|
||||
return;
|
||||
}
|
||||
|
||||
if (msgObj && typeof msgObj === 'object' && !Array.isArray(msgObj)) {
|
||||
const eventType = msgObj.event_type;
|
||||
console.log(`[FC] [FormatB] event_type="${eventType}"`);
|
||||
if (eventType === 'function_calling') {
|
||||
const funcName = msgObj.function || '';
|
||||
const toolCallId = msgObj.tool_call_id || '';
|
||||
const responseId = msgObj.response_id || '';
|
||||
console.log(`[FC] [Information] FC notification: func=${funcName} toolCallId=${toolCallId} responseId=${responseId}`);
|
||||
res.json({ success: true });
|
||||
// ExternalTextToSpeech 在 S2S 模式下不产生音频,不再发送安抚语
|
||||
// LLM 的 tool_calls 会触发 FormatA 分支执行工具并通过 Command:function 回传结果
|
||||
console.log(`[FC] [Information] FC notification received, waiting for tool_calls`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (msgObj && typeof msgObj === 'object') {
|
||||
const asrText = msgObj.text || msgObj.asr_text || msgObj.content ||
|
||||
msgObj.user_text || msgObj.transcript ||
|
||||
(msgObj.data && (msgObj.data.text || msgObj.data.asr_text || msgObj.data.content));
|
||||
const role = msgObj.role || msgObj.speaker || msgObj.data?.role || '';
|
||||
const isUser = !role || role === 'user' || role === 'human';
|
||||
if (asrText && isUser && RoomID) {
|
||||
latestUserSpeech.set(RoomID, { text: asrText, timestamp: Date.now() });
|
||||
console.log(`[FC] [ConvState] Stored user speech for ${RoomID}: "${asrText}"`);
|
||||
}
|
||||
}
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
console.error('[FC] Error:', error.message);
|
||||
res.status(500).json({ success: false, error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/room_message', (req, res) => {
|
||||
try {
|
||||
const { roomId, uid, text } = req.body;
|
||||
if (!roomId || !text) {
|
||||
return res.json({ success: true });
|
||||
}
|
||||
const jsonStart = text.search(/[\[{]/);
|
||||
if (jsonStart < 0) {
|
||||
return res.json({ success: true });
|
||||
}
|
||||
const jsonStr = text.substring(jsonStart);
|
||||
let parsed = null;
|
||||
try { parsed = JSON.parse(jsonStr); } catch (e) {
|
||||
const textMatch = jsonStr.match(/"text"\s*:\s*"([^"]+)"/);
|
||||
if (textMatch && textMatch[1]) {
|
||||
const extractedText = textMatch[1];
|
||||
const userIdMatch = jsonStr.match(/"userId"\s*:\s*"([^"]+)"/);
|
||||
const subtitleUserId = userIdMatch ? userIdMatch[1] : '';
|
||||
const isUserSpeech = subtitleUserId && !subtitleUserId.startsWith('bot_');
|
||||
if (isUserSpeech && extractedText) {
|
||||
latestUserSpeech.set(roomId, { text: extractedText, timestamp: Date.now(), source: 'room_regex' });
|
||||
console.log(`[RoomMsg] ✅ Stored user speech (regex) for ${roomId}: "${extractedText}"`);
|
||||
}
|
||||
}
|
||||
return res.json({ success: true });
|
||||
}
|
||||
if (parsed && parsed.data && Array.isArray(parsed.data)) {
|
||||
parsed.data.forEach(sub => {
|
||||
const subText = sub.text || '';
|
||||
const subUserId = sub.userId || sub.user_id || '';
|
||||
const isDefinite = sub.definite === true;
|
||||
const isUserSpeech = subUserId && !subUserId.startsWith('bot_');
|
||||
if (subText && isUserSpeech && isDefinite) {
|
||||
latestUserSpeech.set(roomId, { text: subText, timestamp: Date.now(), source: 'room_subtitle' });
|
||||
console.log(`[RoomMsg] ✅ Stored user speech for ${roomId}: "${subText}"`);
|
||||
}
|
||||
});
|
||||
res.json({ success: true });
|
||||
return;
|
||||
}
|
||||
if (parsed && typeof parsed === 'object') {
|
||||
const asrText = parsed.text || parsed.asr_text || parsed.content ||
|
||||
parsed.user_text || parsed.transcript ||
|
||||
(parsed.data && typeof parsed.data === 'string' ? parsed.data : null);
|
||||
const isBot = uid && uid.startsWith('bot_');
|
||||
if (asrText && !isBot) {
|
||||
latestUserSpeech.set(roomId, { text: asrText, timestamp: Date.now(), source: 'room_object' });
|
||||
console.log(`[RoomMsg] ✅ Stored user speech (obj) for ${roomId}: "${asrText}"`);
|
||||
}
|
||||
}
|
||||
res.json({ success: true });
|
||||
} catch (error) {
|
||||
console.error('[RoomMsg] Error:', error.message);
|
||||
res.json({ success: true });
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/tool-callback', async (req, res) => {
|
||||
console.log('[ToolCallback] Legacy callback received:', JSON.stringify(req.body));
|
||||
res.json({ success: true, message: 'deprecated, use fc_callback instead' });
|
||||
});
|
||||
|
||||
router.get('/sessions', (req, res) => {
|
||||
const sessions = [];
|
||||
for (const [id, session] of activeSessions) {
|
||||
sessions.push({
|
||||
sessionId: id,
|
||||
roomId: session.roomId,
|
||||
userId: session.userId,
|
||||
duration: Math.floor((Date.now() - session.startTime) / 1000),
|
||||
subtitleCount: session.subtitles.length,
|
||||
});
|
||||
}
|
||||
res.json({ success: true, data: sessions });
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
|
||||
@@ -35,6 +35,51 @@ class ArkChatService {
|
||||
};
|
||||
}
|
||||
|
||||
async summarizeContextForHandoff(messages, maxRounds = 3) {
|
||||
const normalizedMessages = (Array.isArray(messages) ? messages : [])
|
||||
.filter((item) => item && (item.role === 'user' || item.role === 'assistant') && String(item.content || '').trim());
|
||||
|
||||
let startIndex = 0;
|
||||
let userRounds = 0;
|
||||
for (let index = normalizedMessages.length - 1; index >= 0; index -= 1) {
|
||||
if (normalizedMessages[index].role === 'user') {
|
||||
userRounds += 1;
|
||||
startIndex = index;
|
||||
if (userRounds >= Math.max(1, maxRounds)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const recentMessages = normalizedMessages.slice(startIndex);
|
||||
|
||||
if (!recentMessages.length) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const transcript = recentMessages
|
||||
.map((item, index) => `${index + 1}. ${item.role === 'user' ? '用户' : '助手'}:${String(item.content || '').trim()}`)
|
||||
.join('\n');
|
||||
|
||||
if (this._isMockMode()) {
|
||||
const lastUserMessage = [...recentMessages].reverse().find((item) => item.role === 'user');
|
||||
return lastUserMessage ? `用户当前主要在追问:${lastUserMessage.content}` : '';
|
||||
}
|
||||
|
||||
const result = await this.chat([
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是对话交接摘要助手。请基于最近几轮对话生成一段简洁中文摘要,供另一个模型无缝接管会话。摘要必须同时包含:用户当前主要问题、已经确认的信息、仍待解决的问题。不要使用标题、项目符号或编号,不要虚构事实,控制在120字以内。',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `请总结以下最近${Math.ceil(recentMessages.length / 2)}轮对话:\n${transcript}`,
|
||||
},
|
||||
], []);
|
||||
|
||||
return String(result.content || '').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 非流式调用方舟 LLM
|
||||
*/
|
||||
|
||||
659
test2/server/services/nativeVoiceGateway.js
Normal file
659
test2/server/services/nativeVoiceGateway.js
Normal file
@@ -0,0 +1,659 @@
|
||||
const { WebSocket, WebSocketServer } = require('ws');
|
||||
const url = require('url');
|
||||
const db = require('../db');
|
||||
const arkChatService = require('./arkChatService');
|
||||
const {
|
||||
MsgType,
|
||||
unmarshal,
|
||||
createStartConnectionMessage,
|
||||
createStartSessionMessage,
|
||||
createAudioMessage,
|
||||
createChatTTSTextMessage,
|
||||
createChatRAGTextMessage,
|
||||
} = require('./realtimeDialogProtocol');
|
||||
const {
|
||||
getRuleBasedDirectRouteDecision,
|
||||
normalizeTextForSpeech,
|
||||
splitTextForSpeech,
|
||||
estimateSpeechDurationMs,
|
||||
resolveReply,
|
||||
} = require('./realtimeDialogRouting');
|
||||
|
||||
const sessions = new Map();
|
||||
|
||||
function sendJson(ws, payload) {
|
||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify(payload));
|
||||
}
|
||||
}
|
||||
|
||||
function buildStartSessionPayload(options) {
|
||||
return {
|
||||
asr: {
|
||||
extra: {},
|
||||
},
|
||||
tts: {
|
||||
speaker: options.speaker || 'zh_female_vv_jupiter_bigtts',
|
||||
audio_config: {
|
||||
channel: 1,
|
||||
format: 'pcm_s16le',
|
||||
sample_rate: 24000,
|
||||
},
|
||||
},
|
||||
dialog: {
|
||||
dialog_id: '',
|
||||
bot_name: options.botName || '豆包',
|
||||
system_role: normalizeTextForSpeech(options.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。'),
|
||||
speaking_style: normalizeTextForSpeech(options.speakingStyle || '请使用清晰、自然、简洁的口吻。'),
|
||||
extra: {
|
||||
input_mod: 'audio',
|
||||
model: 'O',
|
||||
strict_audit: false,
|
||||
audit_response: '抱歉,这个问题我暂时无法回答。',
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function parseJsonPayload(message) {
|
||||
try {
|
||||
return JSON.parse(message.payload.toString('utf8'));
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function extractUserText(jsonPayload) {
|
||||
const text = jsonPayload?.text
|
||||
|| jsonPayload?.content
|
||||
|| jsonPayload?.results?.[0]?.text
|
||||
|| jsonPayload?.results?.[0]?.alternatives?.[0]?.text
|
||||
|| '';
|
||||
return String(text || '').trim();
|
||||
}
|
||||
|
||||
function isFinalUserPayload(jsonPayload) {
|
||||
if (jsonPayload?.is_final === true) {
|
||||
return true;
|
||||
}
|
||||
if (Array.isArray(jsonPayload?.results)) {
|
||||
return jsonPayload.results.some((item) => item && item.is_interim === false);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function persistUserSpeech(session, text) {
|
||||
const cleanText = (text || '').trim();
|
||||
if (!cleanText) return false;
|
||||
const now = Date.now();
|
||||
if (session.lastPersistedUserText === cleanText && now - (session.lastPersistedUserAt || 0) < 5000) {
|
||||
return false;
|
||||
}
|
||||
session.lastPersistedUserText = cleanText;
|
||||
session.lastPersistedUserAt = now;
|
||||
session.latestUserText = cleanText;
|
||||
db.addMessage(session.sessionId, 'user', cleanText, 'voice_asr').catch((e) => console.warn('[NativeVoice][DB] add user failed:', e.message));
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'user',
|
||||
text: cleanText,
|
||||
isFinal: true,
|
||||
sequence: `native_user_${now}`,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
function persistAssistantSpeech(session, text, { source = 'voice_bot', toolName = null, persistToDb = true, meta = null } = {}) {
|
||||
const cleanText = (text || '').trim();
|
||||
if (!cleanText) return false;
|
||||
const now = Date.now();
|
||||
if (session.lastPersistedAssistantText === cleanText && now - (session.lastPersistedAssistantAt || 0) < 5000) {
|
||||
return false;
|
||||
}
|
||||
session.lastPersistedAssistantText = cleanText;
|
||||
session.lastPersistedAssistantAt = now;
|
||||
if (persistToDb) {
|
||||
db.addMessage(session.sessionId, 'assistant', cleanText, source, toolName, meta).catch((e) => console.warn('[NativeVoice][DB] add assistant failed:', e.message));
|
||||
}
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'assistant',
|
||||
text: cleanText,
|
||||
isFinal: true,
|
||||
source,
|
||||
toolName,
|
||||
sequence: `native_assistant_${now}`,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
function appendAssistantStream(session, payload) {
|
||||
const chunkText = extractUserText(payload);
|
||||
if (!chunkText) {
|
||||
return '';
|
||||
}
|
||||
const replyId = payload?.reply_id || '';
|
||||
if (replyId && session.assistantStreamReplyId && session.assistantStreamReplyId !== replyId) {
|
||||
session.assistantStreamBuffer = '';
|
||||
}
|
||||
session.assistantStreamReplyId = replyId || session.assistantStreamReplyId || '';
|
||||
session.assistantStreamBuffer = `${session.assistantStreamBuffer || ''}${chunkText}`;
|
||||
return session.assistantStreamBuffer;
|
||||
}
|
||||
|
||||
function flushAssistantStream(session, { source = 'voice_bot', toolName = null, meta = null } = {}) {
|
||||
const fullText = (session.assistantStreamBuffer || '').trim();
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
if (!fullText) {
|
||||
return false;
|
||||
}
|
||||
return persistAssistantSpeech(session, fullText, { source, toolName, meta });
|
||||
}
|
||||
|
||||
async function loadHandoffSummaryForVoice(session) {
|
||||
try {
|
||||
const history = await db.getHistoryForLLM(session.sessionId, 20);
|
||||
if (!history.length) {
|
||||
session.handoffSummary = '';
|
||||
session.handoffSummaryUsed = false;
|
||||
return;
|
||||
}
|
||||
session.handoffSummary = await arkChatService.summarizeContextForHandoff(history, 3);
|
||||
session.handoffSummaryUsed = false;
|
||||
console.log(`[NativeVoice] Handoff summary prepared for ${session.sessionId}: ${session.handoffSummary ? 'yes' : 'no'}`);
|
||||
} catch (error) {
|
||||
session.handoffSummary = '';
|
||||
session.handoffSummaryUsed = false;
|
||||
console.warn('[NativeVoice] loadHandoffSummaryForVoice failed:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function sendSpeechText(session, speechText) {
|
||||
const chunks = splitTextForSpeech(speechText);
|
||||
if (!chunks.length || !session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
|
||||
return;
|
||||
}
|
||||
console.log(`[NativeVoice] sendSpeechText start session=${session.sessionId} chunks=${chunks.length} textLen=${speechText.length}`);
|
||||
session.isSendingChatTTSText = true;
|
||||
session.currentTtsType = 'chat_tts_text';
|
||||
session.chatTTSUntil = Date.now() + estimateSpeechDurationMs(speechText) + 800;
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
session.chatTTSTimer = setTimeout(() => {
|
||||
session.chatTTSTimer = null;
|
||||
if ((session.chatTTSUntil || 0) <= Date.now()) {
|
||||
session.isSendingChatTTSText = false;
|
||||
}
|
||||
}, Math.max(200, session.chatTTSUntil - Date.now() + 50));
|
||||
sendJson(session.client, { type: 'tts_reset', ttsType: 'chat_tts_text' });
|
||||
for (let index = 0; index < chunks.length; index += 1) {
|
||||
const chunk = chunks[index];
|
||||
console.log(`[NativeVoice] sendSpeechText chunk session=${session.sessionId} index=${index + 1}/${chunks.length} len=${chunk.length} start=${index === 0} end=false text=${JSON.stringify(chunk.slice(0, 80))}`);
|
||||
session.upstream.send(createChatTTSTextMessage(session.sessionId, {
|
||||
start: index === 0,
|
||||
end: false,
|
||||
content: chunk,
|
||||
}));
|
||||
}
|
||||
console.log(`[NativeVoice] sendSpeechText end session=${session.sessionId}`);
|
||||
session.upstream.send(createChatTTSTextMessage(session.sessionId, {
|
||||
start: false,
|
||||
end: true,
|
||||
content: '',
|
||||
}));
|
||||
}
|
||||
|
||||
function sendGreeting(session) {
|
||||
const greetingText = normalizeTextForSpeech(session.greetingText || '你好,我是你的智能语音助手,有什么可以帮你的吗?');
|
||||
if (!greetingText || session.hasSentGreeting) {
|
||||
if (!session.readySent) {
|
||||
session.readySent = true;
|
||||
sendJson(session.client, { type: 'ready' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
session.hasSentGreeting = true;
|
||||
persistAssistantSpeech(session, greetingText, { source: 'voice_bot', persistToDb: false });
|
||||
clearTimeout(session.readyTimer);
|
||||
session.readyTimer = setTimeout(() => {
|
||||
session.readyTimer = null;
|
||||
if (!session.readySent) {
|
||||
session.readySent = true;
|
||||
sendJson(session.client, { type: 'ready' });
|
||||
}
|
||||
}, estimateSpeechDurationMs(greetingText) + 300);
|
||||
const playGreeting = () => {
|
||||
session.pendingGreetingAck = true;
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
session.greetingAckTimer = setTimeout(() => {
|
||||
session.greetingAckTimer = null;
|
||||
if (session.pendingGreetingAck && session.greetingRetryCount < 1) {
|
||||
session.greetingRetryCount += 1;
|
||||
console.warn(`[NativeVoice] greeting ack timeout, retry session=${session.sessionId}`);
|
||||
playGreeting();
|
||||
}
|
||||
}, 2000);
|
||||
sendSpeechText(session, greetingText).catch((error) => {
|
||||
session.pendingGreetingAck = false;
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
session.greetingAckTimer = null;
|
||||
session.hasSentGreeting = false;
|
||||
console.warn('[NativeVoice] greeting failed:', error.message);
|
||||
});
|
||||
};
|
||||
clearTimeout(session.greetingTimer);
|
||||
session.greetingTimer = setTimeout(() => {
|
||||
session.greetingTimer = null;
|
||||
playGreeting();
|
||||
}, 800);
|
||||
}
|
||||
|
||||
async function sendExternalRag(session, items) {
|
||||
if (!session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
|
||||
return;
|
||||
}
|
||||
const ragItems = Array.isArray(items) ? items.filter((item) => item && item.content) : [];
|
||||
if (!ragItems.length) {
|
||||
return;
|
||||
}
|
||||
session.upstream.send(createChatRAGTextMessage(session.sessionId, JSON.stringify(ragItems)));
|
||||
}
|
||||
|
||||
async function processReply(session, text) {
|
||||
const cleanText = (text || '').trim();
|
||||
if (!cleanText) return;
|
||||
if (session.processingReply) {
|
||||
session.queuedUserText = cleanText;
|
||||
console.log(`[NativeVoice] processReply queued(busy) session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
const now = Date.now();
|
||||
if (session.directSpeakUntil && now < session.directSpeakUntil) {
|
||||
session.queuedUserText = cleanText;
|
||||
console.log(`[NativeVoice] processReply queued(speaking) session=${session.sessionId} waitMs=${session.directSpeakUntil - now} text=${JSON.stringify(cleanText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
session.processingReply = true;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: true });
|
||||
console.log(`[NativeVoice] processReply start session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 120))}`);
|
||||
try {
|
||||
const { delivery, speechText, ragItems, source, toolName, routeDecision, responseMeta } = await resolveReply(session.sessionId, session, cleanText);
|
||||
if (delivery === 'upstream_chat') {
|
||||
session.awaitingUpstreamReply = true;
|
||||
session.pendingAssistantSource = 'voice_bot';
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = responseMeta;
|
||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=upstream_chat`);
|
||||
return;
|
||||
}
|
||||
if (delivery === 'external_rag') {
|
||||
session.awaitingUpstreamReply = true;
|
||||
session.pendingAssistantSource = source;
|
||||
session.pendingAssistantToolName = toolName;
|
||||
session.pendingAssistantMeta = responseMeta;
|
||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag items=${Array.isArray(ragItems) ? ragItems.length : 0}`);
|
||||
await sendExternalRag(session, ragItems);
|
||||
return;
|
||||
}
|
||||
if (!speechText) {
|
||||
console.log(`[NativeVoice] processReply empty session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=${delivery || 'unknown'}`);
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
return;
|
||||
}
|
||||
session.isSendingChatTTSText = true;
|
||||
session.chatTTSUntil = Date.now() + 30000;
|
||||
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_tts source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
|
||||
persistAssistantSpeech(session, speechText, { source, toolName, meta: responseMeta });
|
||||
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(speechText);
|
||||
await sendSpeechText(session, speechText);
|
||||
} catch (error) {
|
||||
console.error('[NativeVoice] processReply failed:', error.message);
|
||||
sendJson(session.client, { type: 'error', error: error.message });
|
||||
} finally {
|
||||
session.processingReply = false;
|
||||
if (!session.awaitingUpstreamReply) {
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
}
|
||||
const pending = session.queuedUserText;
|
||||
session.queuedUserText = '';
|
||||
if (pending && pending !== cleanText && (!session.directSpeakUntil || Date.now() >= session.directSpeakUntil)) {
|
||||
setTimeout(() => processReply(session, pending).catch((err) => {
|
||||
console.error('[NativeVoice] queued processReply failed:', err.message);
|
||||
}), 200);
|
||||
} else if (pending && pending !== cleanText) {
|
||||
const waitMs = Math.max(200, session.directSpeakUntil - Date.now() + 200);
|
||||
clearTimeout(session.queuedReplyTimer);
|
||||
session.queuedReplyTimer = setTimeout(() => {
|
||||
session.queuedReplyTimer = null;
|
||||
const queuedText = session.queuedUserText || pending;
|
||||
session.queuedUserText = '';
|
||||
processReply(session, queuedText).catch((err) => {
|
||||
console.error('[NativeVoice] delayed queued processReply failed:', err.message);
|
||||
});
|
||||
}, waitMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function handleUpstreamMessage(session, data) {
|
||||
let message;
|
||||
try {
|
||||
message = unmarshal(data);
|
||||
} catch (error) {
|
||||
console.warn('[NativeVoice] unmarshal failed:', error.message);
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
||||
if (session.isSendingChatTTSText && session.currentTtsType === 'default') {
|
||||
return;
|
||||
}
|
||||
if (session.client && session.client.readyState === WebSocket.OPEN) {
|
||||
session.client.send(message.payload, { binary: true });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = parseJsonPayload(message);
|
||||
if (message.type === MsgType.ERROR) {
|
||||
console.error(`[NativeVoice] upstream error session=${session.sessionId} code=${message.event} payload=${message.payload.toString('utf8').slice(0, 200)}`);
|
||||
sendJson(session.client, { type: 'error', error: message.payload.toString('utf8') });
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type !== MsgType.FULL_SERVER) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 150) {
|
||||
session.upstreamReady = true;
|
||||
console.log(`[NativeVoice] upstream ready session=${session.sessionId}`);
|
||||
if (!session.readySent) {
|
||||
session.readySent = true;
|
||||
sendJson(session.client, { type: 'ready' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 350) {
|
||||
session.currentTtsType = payload?.tts_type || '';
|
||||
if (payload?.tts_type === 'chat_tts_text' && session.pendingGreetingAck) {
|
||||
session.pendingGreetingAck = false;
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
session.greetingAckTimer = null;
|
||||
}
|
||||
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
|
||||
sendJson(session.client, { type: 'tts_event', payload });
|
||||
return;
|
||||
}
|
||||
|
||||
const isLocalChatTTSTextActive = !!session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now();
|
||||
|
||||
if (message.event === 351) {
|
||||
if (isLocalChatTTSTextActive) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
return;
|
||||
}
|
||||
const pendingAssistantSource = session.pendingAssistantSource || 'voice_bot';
|
||||
const pendingAssistantToolName = session.pendingAssistantToolName || null;
|
||||
const pendingAssistantMeta = session.pendingAssistantMeta || null;
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
flushAssistantStream(session, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
const assistantText = extractUserText(payload);
|
||||
if (assistantText) {
|
||||
console.log(`[NativeVoice] upstream assistant session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
||||
persistAssistantSpeech(session, assistantText, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 550) {
|
||||
if (isLocalChatTTSTextActive) {
|
||||
return;
|
||||
}
|
||||
if (session.awaitingUpstreamReply) {
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
}
|
||||
const fullText = appendAssistantStream(session, payload);
|
||||
if (fullText) {
|
||||
console.log(`[NativeVoice] upstream assistant chunk session=${session.sessionId} len=${fullText.length} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 559) {
|
||||
if (isLocalChatTTSTextActive) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
return;
|
||||
}
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
flushAssistantStream(session, {
|
||||
source: session.pendingAssistantSource || 'voice_bot',
|
||||
toolName: session.pendingAssistantToolName || null,
|
||||
meta: session.pendingAssistantMeta || null,
|
||||
});
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 450 || (message.event === 451 && !isFinalUserPayload(payload))) {
|
||||
const text = extractUserText(payload);
|
||||
if (text) {
|
||||
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
||||
session.latestUserText = text;
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'user',
|
||||
text,
|
||||
isFinal: false,
|
||||
sequence: `native_partial_${Date.now()}`,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 459 || (message.event === 451 && isFinalUserPayload(payload))) {
|
||||
const finalText = extractUserText(payload) || session.latestUserText || '';
|
||||
console.log(`[NativeVoice] upstream final session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 120))}`);
|
||||
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
|
||||
console.log(`[NativeVoice] user interrupt during speaking session=${session.sessionId}`);
|
||||
session.directSpeakUntil = 0;
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
}
|
||||
if (persistUserSpeech(session, finalText)) {
|
||||
processReply(session, finalText).catch((error) => {
|
||||
console.error('[NativeVoice] processReply error:', error.message);
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sendJson(session.client, {
|
||||
type: 'event',
|
||||
event: message.event,
|
||||
payload,
|
||||
});
|
||||
}
|
||||
|
||||
function attachClientHandlers(session) {
|
||||
session.client.on('message', async (raw, isBinary) => {
|
||||
if (isBinary) {
|
||||
if (session.upstream && session.upstream.readyState === WebSocket.OPEN && session.upstreamReady) {
|
||||
session.upstream.send(createAudioMessage(session.sessionId, raw));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(raw.toString('utf8'));
|
||||
} catch (error) {
|
||||
sendJson(session.client, { type: 'error', error: 'invalid client json' });
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.type === 'start') {
|
||||
session.botName = parsed.botName || '豆包';
|
||||
session.systemRole = parsed.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。';
|
||||
session.speakingStyle = parsed.speakingStyle || '请使用清晰、自然、简洁的口吻。';
|
||||
session.speaker = parsed.speaker || 'zh_female_vv_jupiter_bigtts';
|
||||
session.greetingText = parsed.greetingText || session.greetingText || '你好,我是你的智能语音助手,有什么可以帮你的吗?';
|
||||
session.userId = parsed.userId || session.userId || null;
|
||||
session.upstream = createUpstreamConnection(session);
|
||||
loadHandoffSummaryForVoice(session).catch((error) => {
|
||||
console.warn('[NativeVoice] async loadHandoffSummaryForVoice failed:', error.message);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.type === 'stop') {
|
||||
session.client.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.type === 'text' && parsed.text) {
|
||||
persistUserSpeech(session, parsed.text);
|
||||
processReply(session, parsed.text).catch((error) => {
|
||||
console.error('[NativeVoice] text processReply failed:', error.message);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
session.client.on('close', () => {
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
clearTimeout(session.greetingTimer);
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
clearTimeout(session.readyTimer);
|
||||
if (session.upstream && session.upstream.readyState === WebSocket.OPEN) {
|
||||
session.upstream.close();
|
||||
}
|
||||
sessions.delete(session.sessionId);
|
||||
});
|
||||
}
|
||||
|
||||
function createUpstreamConnection(session) {
|
||||
const upstream = new WebSocket('wss://openspeech.bytedance.com/api/v3/realtime/dialogue', {
|
||||
headers: {
|
||||
'X-Api-Resource-Id': 'volc.speech.dialog',
|
||||
'X-Api-Access-Key': process.env.VOLC_S2S_TOKEN,
|
||||
'X-Api-App-Key': process.env.VOLC_DIALOG_APP_KEY || 'PlgvMymc7f3tQnJ6',
|
||||
'X-Api-App-ID': process.env.VOLC_S2S_APP_ID,
|
||||
'X-Api-Connect-Id': session.sessionId,
|
||||
},
|
||||
});
|
||||
|
||||
upstream.on('open', () => {
|
||||
upstream.send(createStartConnectionMessage());
|
||||
upstream.send(createStartSessionMessage(session.sessionId, buildStartSessionPayload(session)));
|
||||
});
|
||||
|
||||
upstream.on('message', (data, isBinary) => {
|
||||
if (!isBinary && typeof data === 'string') {
|
||||
sendJson(session.client, { type: 'server_text', text: data });
|
||||
return;
|
||||
}
|
||||
handleUpstreamMessage(session, Buffer.isBuffer(data) ? data : Buffer.from(data));
|
||||
});
|
||||
|
||||
upstream.on('error', (error) => {
|
||||
console.error('[NativeVoice] upstream error:', error.message);
|
||||
sendJson(session.client, { type: 'error', error: error.message });
|
||||
});
|
||||
|
||||
upstream.on('close', () => {
|
||||
sendJson(session.client, { type: 'closed' });
|
||||
});
|
||||
|
||||
return upstream;
|
||||
}
|
||||
|
||||
function createSession(client, sessionId) {
|
||||
const session = {
|
||||
sessionId,
|
||||
client,
|
||||
upstream: null,
|
||||
upstreamReady: false,
|
||||
isSendingChatTTSText: false,
|
||||
latestUserText: '',
|
||||
queuedUserText: '',
|
||||
processingReply: false,
|
||||
directSpeakUntil: 0,
|
||||
queuedReplyTimer: null,
|
||||
lastPersistedAssistantText: '',
|
||||
lastPersistedAssistantAt: 0,
|
||||
assistantStreamBuffer: '',
|
||||
assistantStreamReplyId: '',
|
||||
currentTtsType: '',
|
||||
botName: '豆包',
|
||||
systemRole: '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。',
|
||||
speakingStyle: '请使用清晰、自然、简洁的口吻。',
|
||||
speaker: 'zh_female_vv_jupiter_bigtts',
|
||||
greetingText: '你好,我是你的智能语音助手,有什么可以帮你的吗?',
|
||||
hasSentGreeting: false,
|
||||
greetingTimer: null,
|
||||
greetingAckTimer: null,
|
||||
pendingGreetingAck: false,
|
||||
greetingRetryCount: 0,
|
||||
readyTimer: null,
|
||||
readySent: false,
|
||||
handoffSummary: '',
|
||||
handoffSummaryUsed: false,
|
||||
awaitingUpstreamReply: false,
|
||||
pendingAssistantSource: null,
|
||||
pendingAssistantToolName: null,
|
||||
pendingAssistantMeta: null,
|
||||
};
|
||||
sessions.set(sessionId, session);
|
||||
attachClientHandlers(session);
|
||||
return session;
|
||||
}
|
||||
|
||||
function setupNativeVoiceGateway(server) {
|
||||
const wss = new WebSocketServer({ server, path: '/ws/realtime-dialog' });
|
||||
wss.on('connection', async (client, req) => {
|
||||
const parsed = url.parse(req.url, true);
|
||||
const sessionId = parsed.query?.sessionId;
|
||||
if (!sessionId) {
|
||||
client.close();
|
||||
return;
|
||||
}
|
||||
const userId = parsed.query?.userId || null;
|
||||
const session = createSession(client, sessionId);
|
||||
session.userId = userId;
|
||||
try {
|
||||
await db.createSession(sessionId, userId, 'voice');
|
||||
} catch (error) {
|
||||
console.warn('[NativeVoice][DB] createSession failed:', error.message);
|
||||
}
|
||||
sendJson(client, { type: 'connected', sessionId });
|
||||
});
|
||||
return wss;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
setupNativeVoiceGateway,
|
||||
};
|
||||
205
test2/server/services/realtimeDialogProtocol.js
Normal file
205
test2/server/services/realtimeDialogProtocol.js
Normal file
@@ -0,0 +1,205 @@
|
||||
const HEADER_SIZE_4 = 0x1;
|
||||
const VERSION_1 = 0x10;
|
||||
const SERIALIZATION_JSON = 0x1 << 4;
|
||||
const SERIALIZATION_RAW = 0;
|
||||
const COMPRESSION_NONE = 0;
|
||||
const MSG_TYPE_FLAG_WITH_EVENT = 0b100;
|
||||
|
||||
const MsgType = {
|
||||
INVALID: 0,
|
||||
FULL_CLIENT: 1,
|
||||
AUDIO_ONLY_CLIENT: 2,
|
||||
FULL_SERVER: 9,
|
||||
AUDIO_ONLY_SERVER: 11,
|
||||
FRONT_END_RESULT_SERVER: 12,
|
||||
ERROR: 15,
|
||||
};
|
||||
|
||||
function getMessageTypeName(value) {
|
||||
return Object.keys(MsgType).find((key) => MsgType[key] === value) || 'INVALID';
|
||||
}
|
||||
|
||||
function containsEvent(typeFlag) {
|
||||
return (typeFlag & MSG_TYPE_FLAG_WITH_EVENT) === MSG_TYPE_FLAG_WITH_EVENT;
|
||||
}
|
||||
|
||||
function shouldHandleSessionId(event) {
|
||||
return event !== 1 && event !== 2 && event !== 50 && event !== 51 && event !== 52;
|
||||
}
|
||||
|
||||
function writeInt(buffer, value, offset) {
|
||||
buffer.writeInt32BE(value, offset);
|
||||
return offset + 4;
|
||||
}
|
||||
|
||||
function writeStringWithLength(buffer, value, offset) {
|
||||
const strBuffer = Buffer.from(value || '', 'utf8');
|
||||
offset = writeInt(buffer, strBuffer.length, offset);
|
||||
strBuffer.copy(buffer, offset);
|
||||
return offset + strBuffer.length;
|
||||
}
|
||||
|
||||
function writePayload(buffer, payload, offset) {
|
||||
const payloadBuffer = Buffer.isBuffer(payload) ? payload : Buffer.from(payload || '');
|
||||
offset = writeInt(buffer, payloadBuffer.length, offset);
|
||||
payloadBuffer.copy(buffer, offset);
|
||||
return offset + payloadBuffer.length;
|
||||
}
|
||||
|
||||
function buildHeader(type, typeFlag, serialization) {
|
||||
return Buffer.from([
|
||||
VERSION_1 | HEADER_SIZE_4,
|
||||
((type & 0x0f) << 4) | (typeFlag & 0x0f),
|
||||
serialization | COMPRESSION_NONE,
|
||||
0,
|
||||
]);
|
||||
}
|
||||
|
||||
function marshal(message, { rawPayload = false } = {}) {
|
||||
const type = message.type;
|
||||
const typeFlag = message.typeFlag || MSG_TYPE_FLAG_WITH_EVENT;
|
||||
const payload = Buffer.isBuffer(message.payload) ? message.payload : Buffer.from(message.payload || '');
|
||||
const serialization = rawPayload ? SERIALIZATION_RAW : SERIALIZATION_JSON;
|
||||
|
||||
let size = 4;
|
||||
if (containsEvent(typeFlag)) {
|
||||
size += 4;
|
||||
}
|
||||
if (containsEvent(typeFlag) && shouldHandleSessionId(message.event)) {
|
||||
size += 4 + Buffer.byteLength(message.sessionId || '', 'utf8');
|
||||
}
|
||||
size += 4 + payload.length;
|
||||
|
||||
const buffer = Buffer.allocUnsafe(size);
|
||||
buildHeader(type, typeFlag, serialization).copy(buffer, 0);
|
||||
|
||||
let offset = 4;
|
||||
if (containsEvent(typeFlag)) {
|
||||
offset = writeInt(buffer, message.event || 0, offset);
|
||||
}
|
||||
if (containsEvent(typeFlag) && shouldHandleSessionId(message.event)) {
|
||||
offset = writeStringWithLength(buffer, message.sessionId || '', offset);
|
||||
}
|
||||
writePayload(buffer, payload, offset);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
function readStringWithLength(buffer, offsetObj) {
|
||||
const size = buffer.readInt32BE(offsetObj.offset);
|
||||
offsetObj.offset += 4;
|
||||
if (size <= 0) {
|
||||
return '';
|
||||
}
|
||||
const value = buffer.subarray(offsetObj.offset, offsetObj.offset + size).toString('utf8');
|
||||
offsetObj.offset += size;
|
||||
return value;
|
||||
}
|
||||
|
||||
function readPayload(buffer, offsetObj) {
|
||||
const size = buffer.readInt32BE(offsetObj.offset);
|
||||
offsetObj.offset += 4;
|
||||
if (size <= 0) {
|
||||
return Buffer.alloc(0);
|
||||
}
|
||||
const payload = buffer.subarray(offsetObj.offset, offsetObj.offset + size);
|
||||
offsetObj.offset += size;
|
||||
return payload;
|
||||
}
|
||||
|
||||
function unmarshal(data) {
|
||||
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
|
||||
if (buffer.length < 4) {
|
||||
throw new Error('protocol message too short');
|
||||
}
|
||||
|
||||
const typeAndFlag = buffer[1];
|
||||
const type = (typeAndFlag >> 4) & 0x0f;
|
||||
const typeFlag = typeAndFlag & 0x0f;
|
||||
const offsetObj = { offset: 4 };
|
||||
const message = {
|
||||
type,
|
||||
typeName: getMessageTypeName(type),
|
||||
typeFlag,
|
||||
event: 0,
|
||||
sessionId: '',
|
||||
payload: Buffer.alloc(0),
|
||||
};
|
||||
|
||||
if (containsEvent(typeFlag)) {
|
||||
message.event = buffer.readInt32BE(offsetObj.offset);
|
||||
offsetObj.offset += 4;
|
||||
}
|
||||
if (containsEvent(typeFlag) && shouldHandleSessionId(message.event)) {
|
||||
message.sessionId = readStringWithLength(buffer, offsetObj);
|
||||
}
|
||||
message.payload = readPayload(buffer, offsetObj);
|
||||
return message;
|
||||
}
|
||||
|
||||
function createStartConnectionMessage() {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 1,
|
||||
payload: Buffer.from('{}', 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
function createStartSessionMessage(sessionId, payload) {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 100,
|
||||
sessionId,
|
||||
payload: Buffer.from(JSON.stringify(payload), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
function createAudioMessage(sessionId, audioBuffer) {
|
||||
return marshal({
|
||||
type: MsgType.AUDIO_ONLY_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 200,
|
||||
sessionId,
|
||||
payload: Buffer.isBuffer(audioBuffer) ? audioBuffer : Buffer.from(audioBuffer),
|
||||
}, { rawPayload: true });
|
||||
}
|
||||
|
||||
function createChatTTSTextMessage(sessionId, payload) {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 500,
|
||||
sessionId,
|
||||
payload: Buffer.from(JSON.stringify({
|
||||
session_id: sessionId,
|
||||
start: !!payload.start,
|
||||
end: !!payload.end,
|
||||
content: payload.content || '',
|
||||
}), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
function createChatRAGTextMessage(sessionId, externalRag) {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 502,
|
||||
sessionId,
|
||||
payload: Buffer.from(JSON.stringify({
|
||||
session_id: sessionId,
|
||||
external_rag: externalRag || '[]',
|
||||
}), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
MsgType,
|
||||
MSG_TYPE_FLAG_WITH_EVENT,
|
||||
unmarshal,
|
||||
createStartConnectionMessage,
|
||||
createStartSessionMessage,
|
||||
createAudioMessage,
|
||||
createChatTTSTextMessage,
|
||||
createChatRAGTextMessage,
|
||||
};
|
||||
297
test2/server/services/realtimeDialogRouting.js
Normal file
297
test2/server/services/realtimeDialogRouting.js
Normal file
@@ -0,0 +1,297 @@
|
||||
const ToolExecutor = require('./toolExecutor');
|
||||
const arkChatService = require('./arkChatService');
|
||||
const db = require('../db');
|
||||
|
||||
function normalizeTextForSpeech(text) {
|
||||
return (text || '')
|
||||
.replace(/^#{1,6}\s*/gm, '')
|
||||
.replace(/\*\*([^*]*)\*\*/g, '$1')
|
||||
.replace(/__([^_]*)__/g, '$1')
|
||||
.replace(/\*([^*]+)\*/g, '$1')
|
||||
.replace(/_([^_]+)_/g, '$1')
|
||||
.replace(/~~([^~]*)~~/g, '$1')
|
||||
.replace(/`{1,3}[^`]*`{1,3}/g, '')
|
||||
.replace(/^[-*]{3,}\s*$/gm, '')
|
||||
.replace(/^>\s*/gm, '')
|
||||
.replace(/!\[[^\]]*\]\([^)]*\)/g, '')
|
||||
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1')
|
||||
.replace(/^[\s]*[-*+]\s+/gm, ' ')
|
||||
.replace(/^[\s]*\d+[.)]\s+/gm, ' ')
|
||||
.replace(/---\s*来源[::]/g, '来源:')
|
||||
.replace(/\r/g, ' ')
|
||||
.replace(/\n{2,}/g, '。')
|
||||
.replace(/\n/g, ' ')
|
||||
.replace(/。{2,}/g, '。')
|
||||
.replace(/([!?;,])\1+/g, '$1')
|
||||
.replace(/([。!?;,])\s*([。!?;,])/g, '$2')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function splitTextForSpeech(text, maxLen = 180) {
|
||||
const content = normalizeTextForSpeech(text);
|
||||
if (!content) return [];
|
||||
if (content.length <= maxLen) return [content];
|
||||
const chunks = [];
|
||||
let remaining = content;
|
||||
while (remaining.length > maxLen) {
|
||||
const currentMaxLen = chunks.length === 0 ? Math.min(90, maxLen) : maxLen;
|
||||
let splitIndex = Math.max(
|
||||
remaining.lastIndexOf('。', currentMaxLen),
|
||||
remaining.lastIndexOf('!', currentMaxLen),
|
||||
remaining.lastIndexOf('?', currentMaxLen),
|
||||
remaining.lastIndexOf(';', currentMaxLen),
|
||||
remaining.lastIndexOf(',', currentMaxLen),
|
||||
remaining.lastIndexOf(',', currentMaxLen)
|
||||
);
|
||||
if (splitIndex < Math.floor(currentMaxLen / 2)) {
|
||||
splitIndex = currentMaxLen;
|
||||
} else {
|
||||
splitIndex += 1;
|
||||
}
|
||||
chunks.push(remaining.slice(0, splitIndex).trim());
|
||||
remaining = remaining.slice(splitIndex).trim();
|
||||
}
|
||||
if (remaining) chunks.push(remaining);
|
||||
return chunks.filter(Boolean);
|
||||
}
|
||||
|
||||
function estimateSpeechDurationMs(text) {
|
||||
const plainText = normalizeTextForSpeech(text).replace(/\s+/g, '');
|
||||
const length = plainText.length;
|
||||
return Math.max(4000, Math.min(60000, length * 180));
|
||||
}
|
||||
|
||||
function buildDirectRouteMessages(session, context, userText) {
|
||||
const messages = [];
|
||||
const systemPrompt = [
|
||||
'你是语音前置路由器,只负责判断当前用户问题应该走哪条链路。',
|
||||
'你必须只输出一个 JSON 对象,不要输出解释、代码块或额外文本。',
|
||||
'允许的 route 只有:chat、search_knowledge、query_weather、query_order、get_current_time、calculate。',
|
||||
'规则如下:',
|
||||
'1. 企业产品、功能、政策、售后、专业说明、品牌官方信息 -> search_knowledge。',
|
||||
'2. 天气 -> query_weather。',
|
||||
'3. 订单状态 -> query_order。',
|
||||
'4. 当前时间、日期、星期 -> get_current_time。',
|
||||
'5. 明确的数学计算 -> calculate。',
|
||||
'6. 闲聊、问候、开放式泛化交流 -> chat。',
|
||||
'输出格式示例:{"route":"chat","args":{},"reply":""}',
|
||||
'如果 route=search_knowledge,args 中必须包含 query。',
|
||||
'如果 route=query_weather,args 中必须包含 city。',
|
||||
'如果 route=query_order,args 中必须包含 order_id。',
|
||||
'如果 route=calculate,args 中必须包含 expression。',
|
||||
`当前助手设定:${session.systemRole || '你是一个友善的智能助手。'} ${session.speakingStyle || '请使用温和、清晰的口吻。'}`,
|
||||
].join('\n');
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
(context || []).slice(-6).forEach((item) => {
|
||||
if (item && item.role && item.content) {
|
||||
messages.push({ role: item.role, content: item.content });
|
||||
}
|
||||
});
|
||||
messages.push({ role: 'user', content: userText });
|
||||
return messages;
|
||||
}
|
||||
|
||||
function buildDirectChatMessages(session, context, userText) {
|
||||
const messages = [];
|
||||
const systemPrompt = [
|
||||
session.systemRole || '你是一个友善的智能助手。',
|
||||
session.speakingStyle || '请使用温和、清晰的口吻。',
|
||||
'这是语音对话场景,请直接给出自然、完整、适合朗读的中文回复。',
|
||||
'如果不是基于知识库或工具结果,就不要冒充官方结论。',
|
||||
].join('\n');
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
(context || []).slice(-10).forEach((item) => {
|
||||
if (item && item.role && item.content) {
|
||||
messages.push({ role: item.role, content: item.content });
|
||||
}
|
||||
});
|
||||
messages.push({ role: 'user', content: userText });
|
||||
return messages;
|
||||
}
|
||||
|
||||
function hasKnowledgeKeyword(text) {
|
||||
return /(系统|平台|产品|功能|介绍|说明|规则|流程|步骤|配置|接入|开通|操作|怎么用|如何用|适合谁|区别|价格|费用|政策|售后|文档|资料|方案|一成系统)/.test(text || '');
|
||||
}
|
||||
|
||||
function isKnowledgeFollowUp(text) {
|
||||
return /^(这个|那个|它|该系统|这个系统|那个系统|这个功能|那个功能|这个产品|那个产品|详细|详细说说|详细查一下|展开说说|继续说|继续讲|怎么用|怎么操作|怎么配置|适合谁|有什么区别|费用多少|价格多少|怎么申请|怎么开通|是什么|什么意思)/.test((text || '').trim());
|
||||
}
|
||||
|
||||
function shouldForceKnowledgeRoute(userText, context = []) {
|
||||
const text = (userText || '').trim();
|
||||
if (!text) return false;
|
||||
if (hasKnowledgeKeyword(text)) return true;
|
||||
if (!isKnowledgeFollowUp(text)) return false;
|
||||
const recentContextText = (Array.isArray(context) ? context : [])
|
||||
.slice(-6)
|
||||
.map((item) => String(item?.content || '').trim())
|
||||
.join('\n');
|
||||
return hasKnowledgeKeyword(recentContextText);
|
||||
}
|
||||
|
||||
function withHandoffSummary(session, context) {
|
||||
const summary = String(session?.handoffSummary || '').trim();
|
||||
if (!summary || session?.handoffSummaryUsed) {
|
||||
return context;
|
||||
}
|
||||
return [
|
||||
{ role: 'assistant', content: `会话交接摘要:${summary}` },
|
||||
...(Array.isArray(context) ? context : []),
|
||||
];
|
||||
}
|
||||
|
||||
function parseDirectRouteDecision(content, userText) {
|
||||
const raw = (content || '').trim();
|
||||
const jsonText = raw.replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```$/i, '').trim();
|
||||
const start = jsonText.indexOf('{');
|
||||
const end = jsonText.lastIndexOf('}');
|
||||
const candidate = start >= 0 && end > start ? jsonText.slice(start, end + 1) : jsonText;
|
||||
try {
|
||||
const parsed = JSON.parse(candidate);
|
||||
const route = parsed.route;
|
||||
const args = parsed.args && typeof parsed.args === 'object' ? parsed.args : {};
|
||||
if (route === 'chat') return { route: 'chat', args: {} };
|
||||
if (route === 'search_knowledge') return { route: 'search_knowledge', args: { query: args.query || userText } };
|
||||
if (route === 'query_weather' && args.city) return { route: 'query_weather', args: { city: args.city } };
|
||||
if (route === 'query_order' && args.order_id) return { route: 'query_order', args: { order_id: args.order_id } };
|
||||
if (route === 'get_current_time') return { route: 'get_current_time', args: {} };
|
||||
if (route === 'calculate' && args.expression) return { route: 'calculate', args: { expression: args.expression } };
|
||||
} catch (error) {
|
||||
console.warn('[NativeVoice] route JSON parse failed:', error.message, 'raw=', raw);
|
||||
}
|
||||
return { route: 'search_knowledge', args: { query: userText } };
|
||||
}
|
||||
|
||||
function getRuleBasedDirectRouteDecision(userText) {
|
||||
const text = (userText || '').trim();
|
||||
if (!text) return { route: 'chat', args: {} };
|
||||
if (/(几点|几号|日期|星期|周几|现在时间|当前时间)/.test(text)) return { route: 'get_current_time', args: {} };
|
||||
if (/(天气|气温|下雨|晴天|阴天|温度)/.test(text)) {
|
||||
return { route: 'query_weather', args: { city: text.replace(/.*?(北京|上海|广州|深圳|杭州|成都|重庆|武汉|西安|南京|苏州|天津|长沙|郑州|青岛|宁波|无锡)/, '$1') || '北京' } };
|
||||
}
|
||||
if (/(订单|物流|快递|单号)/.test(text)) return { route: 'query_order', args: { order_id: text } };
|
||||
if (/^[\d\s+\-*/().=%]+$/.test(text) || /(等于多少|帮我算|计算一下|算一下)/.test(text)) {
|
||||
return { route: 'calculate', args: { expression: text.replace(/(帮我算|计算一下|算一下|等于多少)/g, '').trim() || text } };
|
||||
}
|
||||
if (/^(喂|你好|您好|嗨|哈喽|hello|hi|在吗|在不在|早上好|中午好|下午好|晚上好|早安|晚安|谢谢|感谢|再见|拜拜|嗯|哦|好的|对|是的|没有了|没事了|可以了|行|OK|ok)[,,!。??~~\s]*[啊呀吧呢哦嗯嘛哈的了]*[!。??~~]*$/.test(text)) {
|
||||
return { route: 'chat', args: {} };
|
||||
}
|
||||
if (/^(喂[,,\s]*)?(你好|您好)[,,!。??\s]*(在吗|请问)?[!。??]*$/.test(text)) {
|
||||
return { route: 'chat', args: {} };
|
||||
}
|
||||
return { route: 'search_knowledge', args: { query: text } };
|
||||
}
|
||||
|
||||
function extractToolResultText(toolName, toolResult) {
|
||||
if (!toolResult) return '';
|
||||
if (toolName === 'search_knowledge') {
|
||||
if (toolResult.errorType === 'timeout') {
|
||||
return '知识库查询超时了,请稍后重试,或换一种更具体的问法再试。';
|
||||
}
|
||||
if (toolResult.errorType === 'not_configured') {
|
||||
return '知识库当前未配置完成,请先检查知识库配置。';
|
||||
}
|
||||
if (toolResult.results && Array.isArray(toolResult.results)) {
|
||||
return toolResult.results.map((item) => item.content || JSON.stringify(item)).join('\n');
|
||||
}
|
||||
if (typeof toolResult === 'string') return toolResult;
|
||||
if (toolResult.error) return toolResult.error;
|
||||
}
|
||||
if (toolName === 'query_weather' && !toolResult.error) return `${toolResult.city}今天${toolResult.weather},气温${toolResult.temp},湿度${toolResult.humidity},${toolResult.wind}。${toolResult.tips || ''}`.trim();
|
||||
if (toolName === 'query_order' && !toolResult.error) return `订单${toolResult.order_id}当前状态是${toolResult.status},预计送达时间${toolResult.estimated_delivery},快递单号${toolResult.tracking_number}。`;
|
||||
if (toolName === 'get_current_time' && !toolResult.error) return `现在是${toolResult.datetime},${toolResult.weekday}。`;
|
||||
if (toolName === 'calculate' && !toolResult.error) return `${toolResult.expression} 的计算结果是 ${toolResult.formatted}。`;
|
||||
if (toolResult.error) return toolResult.error;
|
||||
return typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult);
|
||||
}
|
||||
|
||||
async function resolveReply(sessionId, session, text) {
|
||||
const baseContext = await db.getHistoryForLLM(sessionId, 20).catch(() => []);
|
||||
const context = withHandoffSummary(session, baseContext);
|
||||
const originalText = text.trim();
|
||||
let routeDecision = getRuleBasedDirectRouteDecision(text.trim());
|
||||
if (routeDecision.route === 'chat' && shouldForceKnowledgeRoute(text.trim(), context)) {
|
||||
routeDecision = { route: 'search_knowledge', args: { query: text.trim() } };
|
||||
}
|
||||
let replyText = '';
|
||||
let source = 'voice_bot';
|
||||
let toolName = null;
|
||||
let responseMeta = {
|
||||
route: routeDecision.route,
|
||||
original_text: originalText,
|
||||
};
|
||||
if (routeDecision.route === 'chat') {
|
||||
session.handoffSummaryUsed = true;
|
||||
return {
|
||||
delivery: 'upstream_chat',
|
||||
speechText: '',
|
||||
ragItems: [],
|
||||
source,
|
||||
toolName,
|
||||
routeDecision,
|
||||
responseMeta,
|
||||
};
|
||||
} else {
|
||||
toolName = routeDecision.route;
|
||||
source = 'voice_tool';
|
||||
const toolArgs = toolName === 'search_knowledge'
|
||||
? { ...(routeDecision.args || {}), response_mode: 'snippet' }
|
||||
: routeDecision.args;
|
||||
const toolResult = await ToolExecutor.execute(routeDecision.route, toolArgs, context);
|
||||
replyText = extractToolResultText(toolName, toolResult);
|
||||
responseMeta = {
|
||||
...responseMeta,
|
||||
tool_name: toolName,
|
||||
tool_args: toolArgs || {},
|
||||
source: toolResult?.source || null,
|
||||
original_query: toolResult?.original_query || routeDecision.args?.query || originalText,
|
||||
rewritten_query: toolResult?.rewritten_query || null,
|
||||
hit: typeof toolResult?.hit === 'boolean' ? toolResult.hit : null,
|
||||
reason: toolResult?.reason || null,
|
||||
error_type: toolResult?.errorType || null,
|
||||
latency_ms: toolResult?.latency_ms || null,
|
||||
};
|
||||
|
||||
const ragItems = toolName === 'search_knowledge'
|
||||
? (toolResult?.hit && Array.isArray(toolResult?.results)
|
||||
? toolResult.results
|
||||
.filter((item) => item && item.content)
|
||||
.map((item) => ({
|
||||
title: item.title || '知识库结果',
|
||||
content: item.content,
|
||||
}))
|
||||
: [])
|
||||
: (!toolResult?.error && replyText
|
||||
? [{ title: `${toolName}结果`, content: replyText }]
|
||||
: []);
|
||||
|
||||
if (ragItems.length > 0) {
|
||||
session.handoffSummaryUsed = true;
|
||||
return {
|
||||
delivery: 'external_rag',
|
||||
speechText: '',
|
||||
ragItems,
|
||||
source,
|
||||
toolName,
|
||||
routeDecision,
|
||||
responseMeta,
|
||||
};
|
||||
}
|
||||
}
|
||||
const speechText = normalizeTextForSpeech(replyText);
|
||||
session.handoffSummaryUsed = true;
|
||||
if (!speechText) {
|
||||
return { delivery: 'local_tts', speechText: '', ragItems: [], source, toolName, routeDecision, responseMeta };
|
||||
}
|
||||
return { delivery: 'local_tts', speechText, ragItems: [], source, toolName, routeDecision, responseMeta };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getRuleBasedDirectRouteDecision,
|
||||
normalizeTextForSpeech,
|
||||
splitTextForSpeech,
|
||||
estimateSpeechDurationMs,
|
||||
shouldForceKnowledgeRoute,
|
||||
resolveReply,
|
||||
};
|
||||
@@ -1,6 +1,60 @@
|
||||
const axios = require('axios');
|
||||
const arkChatService = require('./arkChatService');
|
||||
|
||||
class ToolExecutor {
|
||||
static hasCanonicalKnowledgeTerm(query) {
|
||||
return /(一成系统|PM-FitLine|PM细胞营养素|NTC营养保送系统|Activize Oxyplus|小红产品|Basics|大白产品|Restorate|小白产品|儿童倍适|火炉原理|阿育吠陀)/i.test(String(query || ''));
|
||||
}
|
||||
|
||||
static normalizeKnowledgeQueryAlias(query) {
|
||||
return String(query || '')
|
||||
.replace(/^[啊哦嗯呢呀哎诶额,。!?、\s]+/g, '')
|
||||
.replace(/[啊哦嗯呢呀哎诶额,。!?、\s]+$/g, '')
|
||||
.replace(/X{2}系统/gi, '一成系统')
|
||||
.replace(/一城系统|逸城系统|一程系统|易成系统|一诚系统|亦成系统/g, '一成系统')
|
||||
.replace(/PM[-\s]*Fitline|PM[-\s]*fitline|Pm[-\s]*fitline|Fitline|fitline/g, 'PM-FitLine')
|
||||
.replace(/PM细胞营养|PM营养素|德国PM营养素/g, 'PM细胞营养素')
|
||||
.replace(/NTC营养保送系统|NTC营养配送系统|NTC营养输送系统|NTC营养传送系统|NTC营养传输系统/g, 'NTC营养保送系统')
|
||||
.replace(/Nutrient Transport Concept/gi, 'NTC营养保送系统')
|
||||
.replace(/Activize Oxyplus|Activize/gi, 'Activize Oxyplus')
|
||||
.replace(/Restorate/gi, 'Restorate')
|
||||
.replace(/Basics/gi, 'Basics')
|
||||
.replace(/基础三合一|基础套装?|三合一基础套|大白小红小白/g, 'Basics')
|
||||
.replace(/小红产品|小红/g, '小红产品 Activize Oxyplus')
|
||||
.replace(/大白产品|大白/g, '大白产品 Basics')
|
||||
.replace(/小白产品|小白/g, '小白产品 Restorate')
|
||||
.replace(/儿童倍适|儿童产品/g, '儿童倍适')
|
||||
.replace(/火炉原理/g, '火炉原理')
|
||||
.replace(/阿育吠陀|Ayurveda/gi, '阿育吠陀')
|
||||
.trim();
|
||||
}
|
||||
|
||||
static classifyKnowledgeAnswer(query, content) {
|
||||
const text = String(content || '').trim();
|
||||
if (!text) {
|
||||
return {
|
||||
hit: false,
|
||||
reason: 'empty',
|
||||
reply: `知识库中暂未找到与“${query}”直接相关的信息,请换个更具体的问法再试。`,
|
||||
};
|
||||
}
|
||||
|
||||
const noHitPattern = /未检索到|没有检索到|没有相关内容|暂无相关内容|未找到相关信息|没有找到相关信息|知识库中没有相关内容|暂未找到与.*直接相关的信息|无法基于知识库/;
|
||||
if (noHitPattern.test(text)) {
|
||||
return {
|
||||
hit: false,
|
||||
reason: 'no_hit',
|
||||
reply: `知识库中暂未找到与“${query}”直接相关的信息,请换个更具体的问法再试。`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
hit: true,
|
||||
reason: 'hit',
|
||||
reply: text,
|
||||
};
|
||||
}
|
||||
|
||||
static async execute(toolName, args, context = []) {
|
||||
const startTime = Date.now();
|
||||
console.log(`[ToolExecutor] Executing: ${toolName}`, args);
|
||||
@@ -79,36 +133,108 @@ class ToolExecutor {
|
||||
};
|
||||
}
|
||||
|
||||
static async searchKnowledge({ query } = {}, context = []) {
|
||||
static async searchKnowledge({ query, response_mode } = {}, context = []) {
|
||||
const startTime = Date.now();
|
||||
query = query || '';
|
||||
const responseMode = response_mode === 'snippet' ? 'snippet' : 'answer';
|
||||
console.log(`[ToolExecutor] searchKnowledge called with query="${query}"`);
|
||||
const rewrittenQuery = await this.rewriteKnowledgeQuery(query, context);
|
||||
if (rewrittenQuery && rewrittenQuery !== query) {
|
||||
console.log(`[ToolExecutor] searchKnowledge rewritten query="${rewrittenQuery}"`);
|
||||
}
|
||||
|
||||
const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS;
|
||||
if (kbIds && kbIds !== 'your_knowledge_base_dataset_id') {
|
||||
try {
|
||||
console.log('[ToolExecutor] Trying Ark Knowledge Search...');
|
||||
const result = await this.searchArkKnowledge(query, context);
|
||||
console.log(`[ToolExecutor] Ark KB search succeeded in ${Date.now() - startTime}ms`);
|
||||
return result;
|
||||
const result = await this.searchArkKnowledge(rewrittenQuery || query, context, responseMode);
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms`);
|
||||
return {
|
||||
...result,
|
||||
original_query: query,
|
||||
rewritten_query: rewrittenQuery || query,
|
||||
latency_ms: latencyMs,
|
||||
};
|
||||
} catch (error) {
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.warn('[ToolExecutor] Ark Knowledge Search failed:', error.message);
|
||||
console.log('[ToolExecutor] Falling back to local Knowledge Base');
|
||||
return {
|
||||
query,
|
||||
original_query: query,
|
||||
rewritten_query: rewrittenQuery || query,
|
||||
latency_ms: latencyMs,
|
||||
errorType: error.code === 'ECONNABORTED' || /timeout/i.test(error.message) ? 'timeout' : 'request_failed',
|
||||
error: `知识库查询失败: ${error.message}`,
|
||||
source: 'ark_knowledge',
|
||||
hit: false,
|
||||
reason: 'error',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
console.log('[ToolExecutor] Using local Knowledge Base (voice fast path)');
|
||||
const result = this.searchLocalKnowledge(query);
|
||||
console.log(`[ToolExecutor] Local KB search completed in ${Date.now() - startTime}ms`);
|
||||
return result;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.warn('[ToolExecutor] Ark knowledge base is not configured');
|
||||
return {
|
||||
query,
|
||||
original_query: query,
|
||||
rewritten_query: rewrittenQuery || query,
|
||||
latency_ms: latencyMs,
|
||||
errorType: 'not_configured',
|
||||
error: '知识库未配置,请检查 VOLC_ARK_KNOWLEDGE_BASE_IDS',
|
||||
source: 'ark_knowledge',
|
||||
hit: false,
|
||||
reason: 'not_configured',
|
||||
};
|
||||
}
|
||||
|
||||
static async rewriteKnowledgeQuery(query, context = []) {
|
||||
const originalQuery = String(query || '').trim();
|
||||
if (!originalQuery) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const normalizedQuery = this.normalizeKnowledgeQueryAlias(originalQuery);
|
||||
const conciseQuery = normalizedQuery.replace(/[,。!?、,.!?\s]+/g, '');
|
||||
const recentContext = (Array.isArray(context) ? context : [])
|
||||
.filter((item) => item && (item.role === 'user' || item.role === 'assistant') && String(item.content || '').trim())
|
||||
.slice(-6)
|
||||
.map((item) => `${item.role === 'user' ? '用户' : '助手'}:${String(item.content || '').trim()}`)
|
||||
.join('\n');
|
||||
|
||||
const isPronounFollowUp = /^(这个|那个|它|该系统|这个系统|那个系统|详细|继续|怎么|为什么|适合谁|什么意思)/.test(normalizedQuery);
|
||||
if (this.hasCanonicalKnowledgeTerm(normalizedQuery) && conciseQuery.length <= 36 && !isPronounFollowUp) {
|
||||
return normalizedQuery;
|
||||
}
|
||||
|
||||
if (!process.env.VOLC_ARK_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID === 'your_ark_endpoint_id') {
|
||||
return normalizedQuery;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await arkChatService.chat([
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是知识库检索词改写助手。你的任务是把用户当前问题改写成适合企业知识库检索的完整查询语句。必须处理三类问题:1)补全多轮对话中的省略主语;2)纠正语音识别错误、口语噪声和同音误写;3)把别名统一成知识库里的规范说法。规则:不要改变用户真实意图;不要回答问题;只输出一行最终检索词;优先保留真正的产品名、系统名、技术名。当前知识库高频规范术语包括:一成系统、PM-FitLine、PM细胞营养素、NTC营养保送系统、Activize Oxyplus、小红产品、Basics、大白产品、Restorate、小白产品、儿童倍适、火炉原理、阿育吠陀。示例:XX系统、一城系统、逸城系统、一程系统等都统一理解为一成系统;NTC营养配送系统、NTC营养输送系统统一为NTC营养保送系统;Fitline、PM fitline 统一为 PM-FitLine;小红统一为小红产品 Activize Oxyplus。',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `最近上下文:\n${recentContext || '无'}\n\n当前原始问题:${normalizedQuery}\n\n请输出最终检索词:`,
|
||||
},
|
||||
], []);
|
||||
const rewritten = this.normalizeKnowledgeQueryAlias(String(result.content || '').replace(/^["'“”]+|["'“”]+$/g, '').trim());
|
||||
return rewritten || normalizedQuery;
|
||||
} catch (error) {
|
||||
console.warn('[ToolExecutor] rewriteKnowledgeQuery failed:', error.message);
|
||||
return normalizedQuery;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过方舟 Chat Completions API + knowledge_base metadata 进行知识检索
|
||||
* 使用独立的 LLM 调用,专门用于知识库检索场景(如语音通话的工具回调)
|
||||
*/
|
||||
static async searchArkKnowledge(query, context = []) {
|
||||
static async searchArkKnowledge(query, context = [], responseMode = 'answer') {
|
||||
const endpointId = process.env.VOLC_ARK_ENDPOINT_ID;
|
||||
const authKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID;
|
||||
const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS;
|
||||
@@ -126,12 +252,14 @@ class ToolExecutor {
|
||||
// 提取最近 3 轮对话作为上下文(最多 6 条 user/assistant 消息)
|
||||
const recentContext = context
|
||||
.filter(m => m.role === 'user' || m.role === 'assistant')
|
||||
.slice(-6);
|
||||
.slice(responseMode === 'snippet' ? -4 : -6);
|
||||
|
||||
const messages = [
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是一个知识库检索助手。请根据知识库中的内容回答用户问题。如果知识库中没有相关内容,请如实说明。回答时请引用知识库来源。',
|
||||
content: responseMode === 'snippet'
|
||||
? '你是知识库片段提取助手。请基于知识库提取与用户问题最相关的2到4条简洁知识片段,供语音系统继续组织回复。规则:只输出直接相关的中文事实片段;每条尽量简短;不要寒暄,不要解释你的任务,不要写“根据知识库”;如果没有相关内容,请明确说未找到相关内容。'
|
||||
: '你是一个知识库检索助手。请根据知识库中的内容回答用户问题。如果知识库中没有相关内容,请如实说明。回答时请引用知识库来源。',
|
||||
},
|
||||
...recentContext,
|
||||
{
|
||||
@@ -165,21 +293,24 @@ class ToolExecutor {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${authKey}`,
|
||||
},
|
||||
timeout: 15000, // 方舟知识库超时 15s(减少等待,防止 LLM 重试风暴)
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
const choice = response.data.choices?.[0];
|
||||
const content = choice?.message?.content || '未找到相关信息';
|
||||
const classified = this.classifyKnowledgeAnswer(query, content);
|
||||
|
||||
return {
|
||||
query,
|
||||
results: [{
|
||||
title: '方舟知识库检索结果',
|
||||
content: content,
|
||||
content: classified.reply,
|
||||
}],
|
||||
total: 1,
|
||||
source: 'ark_knowledge',
|
||||
hit: classified.hit,
|
||||
reason: classified.reason,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -209,7 +340,7 @@ class ToolExecutor {
|
||||
],
|
||||
stream: true,
|
||||
auto_save_history: false,
|
||||
}, { headers, timeout: 15000 });
|
||||
}, { headers, timeout: 30000 });
|
||||
|
||||
const chatData = chatRes.data?.data;
|
||||
if (!chatData?.id || !chatData?.conversation_id) {
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
const { Signer } = require('@volcengine/openapi');
|
||||
const fetch = require('node-fetch');
|
||||
const { AccessToken, privileges } = require('../lib/token');
|
||||
|
||||
class VolcengineService {
|
||||
constructor() {
|
||||
this.baseUrl = 'https://rtc.volcengineapi.com';
|
||||
this.service = 'rtc';
|
||||
this.region = 'cn-north-1';
|
||||
this.version = '2024-12-01';
|
||||
}
|
||||
|
||||
async startVoiceChat(config) {
|
||||
console.log('[Volcengine] Starting voice chat (S2S端到端 + LLM混合, API v2024-12-01)');
|
||||
console.log('[Volcengine] RoomId:', config.RoomId);
|
||||
// ProviderParams 可能是 JSON 字符串或对象
|
||||
let pp = config.Config.S2SConfig?.ProviderParams;
|
||||
if (typeof pp === 'string') {
|
||||
try { pp = JSON.parse(pp); } catch (e) { pp = {}; }
|
||||
}
|
||||
console.log('[Volcengine] S2S AppId:', pp?.app?.appid);
|
||||
console.log('[Volcengine] S2S model:', pp?.dialog?.extra?.model);
|
||||
console.log('[Volcengine] S2S speaker:', pp?.tts?.speaker);
|
||||
console.log('[Volcengine] ProviderParams type:', typeof config.Config.S2SConfig?.ProviderParams);
|
||||
console.log('[Volcengine] LLM EndPointId:', config.Config.LLMConfig?.EndPointId);
|
||||
console.log('[Volcengine] Tools:', config.Config.LLMConfig?.Tools?.length || 0);
|
||||
console.log('[Volcengine] Full request body:', JSON.stringify(config, null, 2));
|
||||
const result = await this._callOpenAPI('StartVoiceChat', config);
|
||||
console.log('[Volcengine] StartVoiceChat response:', JSON.stringify(result, null, 2));
|
||||
return result;
|
||||
}
|
||||
|
||||
async updateVoiceChat(params) {
|
||||
console.log('[Volcengine] Updating voice chat (v2024-12-01)');
|
||||
console.log('[Volcengine] UpdateVoiceChat params:', JSON.stringify(params, null, 2));
|
||||
const result = await this._callOpenAPI('UpdateVoiceChat', params);
|
||||
console.log('[Volcengine] UpdateVoiceChat response:', JSON.stringify(result, null, 2));
|
||||
return result;
|
||||
}
|
||||
|
||||
async stopVoiceChat(params) {
|
||||
console.log('[Volcengine] Stopping voice chat, RoomId:', params.RoomId);
|
||||
return this._callOpenAPI('StopVoiceChat', params);
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成 RTC 入房 Token
|
||||
* 使用官方 AccessToken 库:https://github.com/volcengine/rtc-aigc-demo/blob/main/Server/token.js
|
||||
*/
|
||||
generateRTCToken(roomId, userId) {
|
||||
const appId = process.env.VOLC_RTC_APP_ID;
|
||||
const appKey = process.env.VOLC_RTC_APP_KEY;
|
||||
|
||||
if (!appId || !appKey || appKey === 'your_rtc_app_key') {
|
||||
console.warn('[Volcengine] RTC AppKey not configured, returning placeholder token');
|
||||
return `placeholder_token_${roomId}_${userId}_${Date.now()}`;
|
||||
}
|
||||
|
||||
const token = new AccessToken(appId, appKey, roomId, userId);
|
||||
const expireTime = Math.floor(Date.now() / 1000) + 24 * 3600; // 24 小时有效
|
||||
token.expireTime(expireTime);
|
||||
token.addPrivilege(privileges.PrivPublishStream, 0);
|
||||
token.addPrivilege(privileges.PrivSubscribeStream, 0);
|
||||
|
||||
const serialized = token.serialize();
|
||||
console.log(`[Volcengine] RTC Token generated for room=${roomId}, user=${userId}`);
|
||||
return serialized;
|
||||
}
|
||||
|
||||
async _callOpenAPI(action, body, versionOverride) {
|
||||
const ak = process.env.VOLC_ACCESS_KEY_ID;
|
||||
const sk = process.env.VOLC_SECRET_ACCESS_KEY;
|
||||
const version = versionOverride || this.version;
|
||||
|
||||
if (!ak || !sk || ak === 'your_access_key_id') {
|
||||
console.warn(`[Volcengine] Credentials not configured, returning mock response for ${action}`);
|
||||
return this._mockResponse(action, body);
|
||||
}
|
||||
|
||||
// 与官方 rtc-aigc-demo 完全一致的签名方式
|
||||
const openApiRequestData = {
|
||||
region: this.region,
|
||||
method: 'POST',
|
||||
params: {
|
||||
Action: action,
|
||||
Version: version,
|
||||
},
|
||||
headers: {
|
||||
Host: 'rtc.volcengineapi.com',
|
||||
'Content-type': 'application/json',
|
||||
},
|
||||
body,
|
||||
};
|
||||
|
||||
const signer = new Signer(openApiRequestData, this.service);
|
||||
signer.addAuthorization({ accessKeyId: ak, secretKey: sk });
|
||||
|
||||
const url = `${this.baseUrl}?Action=${action}&Version=${version}`;
|
||||
console.log(`[Volcengine] ${action} calling:`, url);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: openApiRequestData.headers,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const data = await response.json();
|
||||
|
||||
if (data?.ResponseMetadata?.Error) {
|
||||
const err = data.ResponseMetadata.Error;
|
||||
throw new Error(`${action} failed: ${err.Code} - ${err.Message}`);
|
||||
}
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error(`[Volcengine] ${action} error:`, error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock 响应(开发阶段凭证未配置时使用)
|
||||
*/
|
||||
_mockResponse(action, params) {
|
||||
console.log(`[Volcengine][MOCK] ${action} called with:`, JSON.stringify(params, null, 2).substring(0, 500));
|
||||
return {
|
||||
ResponseMetadata: { RequestId: `mock-${Date.now()}`, Action: action },
|
||||
Result: { Message: 'Mock response - credentials not configured' },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new VolcengineService();
|
||||
Reference in New Issue
Block a user