560 lines
23 KiB
JavaScript
560 lines
23 KiB
JavaScript
|
|
const express = require('express');
|
|||
|
|
const router = express.Router();
|
|||
|
|
const { v4: uuidv4 } = require('uuid');
|
|||
|
|
const volcengine = require('../services/volcengine');
|
|||
|
|
const VoiceChatConfigBuilder = require('../config/voiceChatConfig');
|
|||
|
|
const ToolExecutor = require('../services/toolExecutor');
|
|||
|
|
const DEFAULT_TOOLS = require('../config/tools');
|
|||
|
|
const db = require('../db');
|
|||
|
|
|
|||
|
|
const activeSessions = new Map();
|
|||
|
|
const completedSessions = new Map();
|
|||
|
|
const roomToBotUserId = new Map();
|
|||
|
|
const roomToHumanUserId = new Map();
|
|||
|
|
const roomToSessionId = new Map();
|
|||
|
|
const roomToTaskId = new Map();
|
|||
|
|
const latestUserSpeech = new Map();
|
|||
|
|
const toolCallBuffers = new Map();
|
|||
|
|
|
|||
|
|
router.get('/config', (req, res) => {
|
|||
|
|
res.json({
|
|||
|
|
success: true,
|
|||
|
|
data: {
|
|||
|
|
models: [
|
|||
|
|
{ value: '1.2.1.0', label: 'O2.0(推荐,精品音质)' },
|
|||
|
|
{ value: 'O', label: 'O(基础版)' },
|
|||
|
|
{ value: '2.2.0.0', label: 'SC2.0(推荐,声音复刻)' },
|
|||
|
|
{ value: 'SC', label: 'SC(基础版)' },
|
|||
|
|
],
|
|||
|
|
speakers: [
|
|||
|
|
{ value: 'zh_female_vv_jupiter_bigtts', label: 'VV(活泼女声)', series: 'O' },
|
|||
|
|
{ value: 'zh_female_xiaohe_jupiter_bigtts', label: '小禾(甜美女声·台湾口音)', series: 'O' },
|
|||
|
|
{ value: 'zh_male_yunzhou_jupiter_bigtts', label: '云舟(沉稳男声)', series: 'O' },
|
|||
|
|
{ value: 'zh_male_xiaotian_jupiter_bigtts', label: '小天(磁性男声)', series: 'O' },
|
|||
|
|
{ value: 'saturn_common_female_1', label: 'Saturn 女声1', series: 'SC2.0' },
|
|||
|
|
{ value: 'saturn_common_male_1', label: 'Saturn 男声1', series: 'SC2.0' },
|
|||
|
|
{ value: 'ICL_common_female_1', label: 'ICL 女声1', series: 'SC' },
|
|||
|
|
{ value: 'ICL_common_male_1', label: 'ICL 男声1', series: 'SC' },
|
|||
|
|
],
|
|||
|
|
tools: DEFAULT_TOOLS.map((t) => ({
|
|||
|
|
name: t.function.name,
|
|||
|
|
description: t.function.description,
|
|||
|
|
})),
|
|||
|
|
},
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.post('/prepare', async (req, res) => {
|
|||
|
|
try {
|
|||
|
|
const { userId } = req.body;
|
|||
|
|
if (!userId) {
|
|||
|
|
return res.status(400).json({ success: false, error: 'userId is required' });
|
|||
|
|
}
|
|||
|
|
const sessionId = uuidv4();
|
|||
|
|
const roomId = `room_${sessionId.slice(0, 8)}`;
|
|||
|
|
const taskId = `task_${sessionId.slice(0, 8)}_${Date.now()}`;
|
|||
|
|
const rtcToken = volcengine.generateRTCToken(roomId, userId);
|
|||
|
|
activeSessions.set(sessionId, {
|
|||
|
|
roomId,
|
|||
|
|
taskId,
|
|||
|
|
userId,
|
|||
|
|
startTime: Date.now(),
|
|||
|
|
subtitles: [],
|
|||
|
|
started: false,
|
|||
|
|
});
|
|||
|
|
roomToTaskId.set(roomId, taskId);
|
|||
|
|
roomToSessionId.set(roomId, sessionId);
|
|||
|
|
console.log(`[Voice] Session prepared: ${sessionId}, room: ${roomId}, user: ${userId}`);
|
|||
|
|
try { await db.createSession(sessionId, userId, 'voice'); } catch (e) { console.warn('[DB] createSession failed:', e.message); }
|
|||
|
|
res.json({
|
|||
|
|
success: true,
|
|||
|
|
data: {
|
|||
|
|
sessionId,
|
|||
|
|
roomId,
|
|||
|
|
taskId,
|
|||
|
|
rtcToken,
|
|||
|
|
rtcAppId: process.env.VOLC_RTC_APP_ID,
|
|||
|
|
},
|
|||
|
|
});
|
|||
|
|
} catch (error) {
|
|||
|
|
console.error('[Voice] Prepare failed:', error.message);
|
|||
|
|
res.status(500).json({ success: false, error: error.message });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.post('/start', async (req, res) => {
|
|||
|
|
let session = null;
|
|||
|
|
try {
|
|||
|
|
const {
|
|||
|
|
sessionId,
|
|||
|
|
botName,
|
|||
|
|
systemRole,
|
|||
|
|
speakingStyle,
|
|||
|
|
modelVersion,
|
|||
|
|
speaker,
|
|||
|
|
enableWebSearch,
|
|||
|
|
chatHistory,
|
|||
|
|
} = req.body;
|
|||
|
|
if (!sessionId) {
|
|||
|
|
return res.status(400).json({ success: false, error: 'sessionId is required' });
|
|||
|
|
}
|
|||
|
|
session = activeSessions.get(sessionId);
|
|||
|
|
if (!session) {
|
|||
|
|
return res.status(404).json({ success: false, error: 'Session not found' });
|
|||
|
|
}
|
|||
|
|
if (session.started) {
|
|||
|
|
return res.json({ success: true, data: { message: 'Already started' } });
|
|||
|
|
}
|
|||
|
|
let effectiveChatHistory = chatHistory;
|
|||
|
|
if ((!chatHistory || chatHistory.length === 0) && sessionId) {
|
|||
|
|
try {
|
|||
|
|
const dbHistory = await db.getHistoryForLLM(sessionId, 20);
|
|||
|
|
if (dbHistory.length > 0) {
|
|||
|
|
effectiveChatHistory = dbHistory;
|
|||
|
|
console.log(`[Voice] Loaded ${dbHistory.length} messages from DB for session ${sessionId}`);
|
|||
|
|
}
|
|||
|
|
} catch (e) { console.warn('[DB] getHistoryForLLM failed:', e.message); }
|
|||
|
|
}
|
|||
|
|
console.log(`[Voice] chatHistory: ${effectiveChatHistory ? effectiveChatHistory.length : 'undefined'} messages`);
|
|||
|
|
const { config, botUserId } = VoiceChatConfigBuilder.build({
|
|||
|
|
roomId: session.roomId,
|
|||
|
|
taskId: session.taskId,
|
|||
|
|
userId: session.userId,
|
|||
|
|
botName,
|
|||
|
|
systemRole,
|
|||
|
|
speakingStyle,
|
|||
|
|
modelVersion,
|
|||
|
|
speaker,
|
|||
|
|
tools: DEFAULT_TOOLS,
|
|||
|
|
enableWebSearch,
|
|||
|
|
chatHistory: effectiveChatHistory,
|
|||
|
|
});
|
|||
|
|
session.botUserId = botUserId;
|
|||
|
|
roomToBotUserId.set(session.roomId, botUserId);
|
|||
|
|
roomToHumanUserId.set(session.roomId, session.userId);
|
|||
|
|
console.log(`[Voice] room=${session.roomId} botUserId=${botUserId} humanUserId=${session.userId}`);
|
|||
|
|
const result = await volcengine.startVoiceChat(config);
|
|||
|
|
session.started = true;
|
|||
|
|
// 捕获服务端可能分配的不同 TaskId
|
|||
|
|
const serverTaskId = result?.Result?.TaskId || result?.Result?.task_id;
|
|||
|
|
if (serverTaskId && serverTaskId !== session.taskId) {
|
|||
|
|
console.log(`[Voice] Server assigned different TaskId: ${serverTaskId} (ours: ${session.taskId})`);
|
|||
|
|
roomToTaskId.set(session.roomId, serverTaskId);
|
|||
|
|
session.taskId = serverTaskId;
|
|||
|
|
}
|
|||
|
|
console.log(`[Voice] Session started: ${sessionId}, TaskId=${session.taskId}`);
|
|||
|
|
res.json({
|
|||
|
|
success: true,
|
|||
|
|
data: { startResult: result },
|
|||
|
|
});
|
|||
|
|
} catch (error) {
|
|||
|
|
const detail = error.response?.data || error.message;
|
|||
|
|
console.error('[Voice] Start failed:', JSON.stringify(detail, null, 2));
|
|||
|
|
if (session) {
|
|||
|
|
try {
|
|||
|
|
await volcengine.stopVoiceChat({
|
|||
|
|
AppId: process.env.VOLC_RTC_APP_ID,
|
|||
|
|
RoomId: session.roomId,
|
|||
|
|
TaskId: session.taskId,
|
|||
|
|
});
|
|||
|
|
console.log(`[Voice] Stopped failed session`);
|
|||
|
|
} catch (stopErr) {
|
|||
|
|
console.warn('[Voice] Stop failed during error handling:', stopErr.message);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
res.status(500).json({ success: false, error: typeof detail === 'string' ? detail : JSON.stringify(detail) });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.post('/stop', async (req, res) => {
|
|||
|
|
try {
|
|||
|
|
const { sessionId } = req.body;
|
|||
|
|
const session = activeSessions.get(sessionId);
|
|||
|
|
if (session) {
|
|||
|
|
await volcengine.stopVoiceChat({
|
|||
|
|
AppId: process.env.VOLC_RTC_APP_ID,
|
|||
|
|
RoomId: session.roomId,
|
|||
|
|
TaskId: session.taskId,
|
|||
|
|
});
|
|||
|
|
const duration = Math.floor((Date.now() - session.startTime) / 1000);
|
|||
|
|
console.log(`[Voice] Session stopped: ${sessionId}, duration: ${duration}s, subtitles: ${session.subtitles.length}`);
|
|||
|
|
if (session.subtitles.length > 0) {
|
|||
|
|
completedSessions.set(sessionId, {
|
|||
|
|
subtitles: session.subtitles,
|
|||
|
|
duration,
|
|||
|
|
endTime: Date.now(),
|
|||
|
|
});
|
|||
|
|
setTimeout(() => completedSessions.delete(sessionId), 30 * 60 * 1000);
|
|||
|
|
}
|
|||
|
|
activeSessions.delete(sessionId);
|
|||
|
|
roomToTaskId.delete(session.roomId);
|
|||
|
|
roomToSessionId.delete(session.roomId);
|
|||
|
|
roomToBotUserId.delete(session.roomId);
|
|||
|
|
roomToHumanUserId.delete(session.roomId);
|
|||
|
|
res.json({
|
|||
|
|
success: true,
|
|||
|
|
data: {
|
|||
|
|
duration,
|
|||
|
|
subtitleCount: session.subtitles.length,
|
|||
|
|
subtitles: session.subtitles,
|
|||
|
|
},
|
|||
|
|
});
|
|||
|
|
} else {
|
|||
|
|
res.json({ success: true, data: { message: 'Session not found or already stopped' } });
|
|||
|
|
}
|
|||
|
|
} catch (error) {
|
|||
|
|
console.error('[Voice] Stop failed:', error.message);
|
|||
|
|
res.status(500).json({ success: false, error: error.message });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.post('/subtitle', (req, res) => {
|
|||
|
|
try {
|
|||
|
|
const { sessionId, roomId, text, role, definite, sequence } = req.body;
|
|||
|
|
const session = activeSessions.get(sessionId);
|
|||
|
|
if (definite && text) {
|
|||
|
|
const subtitleRole = role === 'user' ? 'user' : 'assistant';
|
|||
|
|
if (session) {
|
|||
|
|
session.subtitles.push({ text, role: subtitleRole, timestamp: Date.now(), sequence });
|
|||
|
|
}
|
|||
|
|
const sid = sessionId || (session && roomToSessionId.get(session.roomId));
|
|||
|
|
if (sid) {
|
|||
|
|
const source = subtitleRole === 'user' ? 'voice_asr' : 'voice_bot';
|
|||
|
|
db.addMessage(sid, subtitleRole, text, source).catch(e => console.warn('[DB] addMessage failed:', e.message));
|
|||
|
|
}
|
|||
|
|
if (subtitleRole === 'user') {
|
|||
|
|
const rid = roomId || (session && session.roomId) || '';
|
|||
|
|
if (rid) {
|
|||
|
|
latestUserSpeech.set(rid, { text, timestamp: Date.now() });
|
|||
|
|
console.log(`[Subtitle][user][${rid}] "${text}"`);
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
console.log(`[Subtitle][assistant] ${text}`);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
res.json({ success: true });
|
|||
|
|
} catch (error) {
|
|||
|
|
console.error('[Subtitle] Error:', error.message);
|
|||
|
|
res.status(500).json({ success: false, error: error.message });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.get('/subtitles/:sessionId', (req, res) => {
|
|||
|
|
const session = activeSessions.get(req.params.sessionId);
|
|||
|
|
res.json({
|
|||
|
|
success: true,
|
|||
|
|
data: session ? session.subtitles : [],
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
function extractReadableText(chunks) {
|
|||
|
|
const raw = chunks.join('');
|
|||
|
|
let decoded = raw;
|
|||
|
|
try {
|
|||
|
|
decoded = decoded.replace(/\\\\u([0-9a-fA-F]{4})/g, (_, hex) => {
|
|||
|
|
return String.fromCharCode(parseInt(hex, 16));
|
|||
|
|
});
|
|||
|
|
decoded = decoded.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) => {
|
|||
|
|
return String.fromCharCode(parseInt(hex, 16));
|
|||
|
|
});
|
|||
|
|
} catch (e) { }
|
|||
|
|
const chineseChars = decoded.match(/[\u4e00-\u9fff\u3400-\u4dbf]+/g) || [];
|
|||
|
|
const skipWords = new Set(['id', 'type', 'function', 'name', 'arguments', 'query', 'object', 'string']);
|
|||
|
|
const englishWords = (decoded.match(/[a-zA-Z]{2,}/g) || [])
|
|||
|
|
.filter(w => !skipWords.has(w.toLowerCase()));
|
|||
|
|
const parts = [...chineseChars, ...englishWords];
|
|||
|
|
const result = parts.join(' ').trim();
|
|||
|
|
console.log(`[FC] extractReadableText: chinese=[${chineseChars.join(',')}] english=[${englishWords.join(',')}] → "${result}"`);
|
|||
|
|
return result;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
let fcCallbackSeq = 0;
|
|||
|
|
router.post('/fc_callback', async (req, res) => {
|
|||
|
|
try {
|
|||
|
|
const body = req.body;
|
|||
|
|
if (!body || typeof body !== 'object' || Object.keys(body).length === 0) {
|
|||
|
|
console.error('[FC] Empty body');
|
|||
|
|
return res.status(400).json({ success: false, error: 'Empty body' });
|
|||
|
|
}
|
|||
|
|
const { Message, Signature, Type, RoomID, TaskID, TaskType, AppID, AppId, room_id, task_id, roomId, taskId } = body;
|
|||
|
|
const effectiveRoomId = RoomID || room_id || roomId;
|
|||
|
|
const effectiveTaskId = TaskID || task_id || taskId;
|
|||
|
|
const effectiveAppId = AppID || AppId || process.env.VOLC_RTC_APP_ID;
|
|||
|
|
const seq = body._seq || ++fcCallbackSeq;
|
|||
|
|
console.log(`[FC] >>> Callback received: seq=${seq} Type="${Type}" Room=${effectiveRoomId} Task=${effectiveTaskId} TaskType=${TaskType}`);
|
|||
|
|
let msgObj = null;
|
|||
|
|
try {
|
|||
|
|
msgObj = typeof Message === 'string' ? JSON.parse(Message) : Message;
|
|||
|
|
} catch (e) {
|
|||
|
|
console.error('[FC] Failed to parse Message:', e.message);
|
|||
|
|
return res.json({ success: true });
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (Type === 'tool_calls' && Array.isArray(msgObj) && msgObj.length > 0) {
|
|||
|
|
const tc = msgObj[0];
|
|||
|
|
const chunkId = tc.id || '';
|
|||
|
|
const chunkName = tc.function?.name || '';
|
|||
|
|
const chunkArgs = tc.function?.arguments || '';
|
|||
|
|
const existing = toolCallBuffers.get(effectiveTaskId);
|
|||
|
|
|
|||
|
|
if (existing && existing.triggered) {
|
|||
|
|
const userSpeech = latestUserSpeech.get(effectiveRoomId);
|
|||
|
|
const hasNewInput = userSpeech && (Date.now() - userSpeech.timestamp < 10000);
|
|||
|
|
if (hasNewInput) {
|
|||
|
|
console.log(`[FC] [FormatA] New user input detected, clearing cooldown for room=${effectiveRoomId}`);
|
|||
|
|
toolCallBuffers.delete(effectiveTaskId);
|
|||
|
|
} else {
|
|||
|
|
// 扩展 cooldown 到 30 秒,防止 LLM 在 KB 查询期间无限重试
|
|||
|
|
const cooldownMs = existing.resultSentAt ? 30000 : 15000;
|
|||
|
|
const elapsed = existing.resultSentAt
|
|||
|
|
? (Date.now() - existing.resultSentAt)
|
|||
|
|
: (Date.now() - existing.createdAt);
|
|||
|
|
if (elapsed < cooldownMs) {
|
|||
|
|
console.log(`[FC] [FormatA] Cooldown active (${elapsed}ms < ${cooldownMs}ms), ignoring retry for TaskID=${effectiveTaskId}`);
|
|||
|
|
res.json({ success: true });
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
console.log(`[FC] [FormatA] Cooldown expired (${elapsed}ms >= ${cooldownMs}ms), allowing new call for TaskID=${effectiveTaskId}`);
|
|||
|
|
toolCallBuffers.delete(effectiveTaskId);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (!toolCallBuffers.has(effectiveTaskId)) {
|
|||
|
|
toolCallBuffers.set(effectiveTaskId, {
|
|||
|
|
id: '', name: '', chunks: [], triggered: false,
|
|||
|
|
RoomID: effectiveRoomId, AppID: effectiveAppId, S2STaskID: effectiveTaskId, createdAt: Date.now(), timer: null,
|
|||
|
|
});
|
|||
|
|
console.log(`[FC] [FormatA] New buffer created for TaskID=${effectiveTaskId}, room=${effectiveRoomId}`);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const buf = toolCallBuffers.get(effectiveTaskId);
|
|||
|
|
if (chunkId && !buf.id) buf.id = chunkId;
|
|||
|
|
if (chunkName && !buf.name) buf.name = chunkName;
|
|||
|
|
if (chunkArgs) {
|
|||
|
|
buf.chunks.push({ seq: tc.seq || 0, args: chunkArgs });
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
res.json({ success: true });
|
|||
|
|
|
|||
|
|
if (buf.timer) clearTimeout(buf.timer);
|
|||
|
|
buf.timer = setTimeout(async () => { // 500ms 收集 chunks
|
|||
|
|
const b = toolCallBuffers.get(effectiveTaskId);
|
|||
|
|
if (!b || b.triggered) return;
|
|||
|
|
b.triggered = true;
|
|||
|
|
const toolName = b.name || 'search_knowledge';
|
|||
|
|
const sortedChunks = b.chunks.sort((a, b) => a.seq - b.seq);
|
|||
|
|
const allArgs = sortedChunks.map(c => c.args).join('');
|
|||
|
|
console.log(`[FC] [FormatA] 500ms timeout, ${b.chunks.length} chunks collected, name="${toolName}"`);
|
|||
|
|
|
|||
|
|
const s2sTaskId = roomToTaskId.get(b.RoomID) || b.S2STaskID || effectiveTaskId;
|
|||
|
|
console.log(`[FC] TaskId resolution: roomToTaskId=${roomToTaskId.get(b.RoomID)} callback=${b.S2STaskID} → using=${s2sTaskId}`);
|
|||
|
|
// 不再单独发 interrupt 命令,ExternalTextToSpeech 的 InterruptMode:1 已包含打断功能
|
|||
|
|
|
|||
|
|
let parsedArgs = null;
|
|||
|
|
try {
|
|||
|
|
parsedArgs = JSON.parse(allArgs);
|
|||
|
|
console.log(`[FC] [FormatA] JSON.parse succeeded: ${JSON.stringify(parsedArgs)}`);
|
|||
|
|
} catch (e) {
|
|||
|
|
const userSpeech = latestUserSpeech.get(b.RoomID);
|
|||
|
|
if (userSpeech && (Date.now() - userSpeech.timestamp < 30000)) {
|
|||
|
|
console.log(`[FC] [FormatA] Using ASR user speech: "${userSpeech.text}"`);
|
|||
|
|
parsedArgs = { query: userSpeech.text };
|
|||
|
|
} else {
|
|||
|
|
const extractedText = extractReadableText(b.chunks.map(c => c.args));
|
|||
|
|
console.log(`[FC] [FormatA] No ASR text, extracted from chunks: "${extractedText}"`);
|
|||
|
|
parsedArgs = { query: extractedText || '' };
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
console.log('[FC] ⚡ Starting KB query (no pre-query interrupt)');
|
|||
|
|
const kbResult = await ToolExecutor.execute(toolName, parsedArgs);
|
|||
|
|
|
|||
|
|
try {
|
|||
|
|
const result = kbResult;
|
|||
|
|
const resultStr = JSON.stringify(result);
|
|||
|
|
console.log(`[FC] Tool result (${toolName}): ${resultStr.substring(0, 500)}`);
|
|||
|
|
let contentText = resultStr;
|
|||
|
|
try {
|
|||
|
|
if (result && result.results && Array.isArray(result.results)) {
|
|||
|
|
contentText = result.results.map(r => r.content || JSON.stringify(r)).join('\n');
|
|||
|
|
} else if (result && result.error) {
|
|||
|
|
contentText = result.error;
|
|||
|
|
} else if (typeof result === 'string') {
|
|||
|
|
contentText = result;
|
|||
|
|
}
|
|||
|
|
} catch (e) { }
|
|||
|
|
const dbSessionId = roomToSessionId.get(b.RoomID);
|
|||
|
|
if (dbSessionId) {
|
|||
|
|
db.addMessage(dbSessionId, 'assistant', contentText, 'voice_tool', toolName)
|
|||
|
|
.catch(e => console.warn('[DB] addMessage(tool) failed:', e.message));
|
|||
|
|
}
|
|||
|
|
console.log(`[FC] Knowledge base content (${contentText.length} chars): ${contentText.substring(0, 200)}${contentText.length > 200 ? '...' : ''}`);
|
|||
|
|
b.resultSentAt = Date.now();
|
|||
|
|
|
|||
|
|
// === 策略:只用 Command:function 回传结果给 LLM ===
|
|||
|
|
// 根因分析:
|
|||
|
|
// 1. ExternalTextToSpeech 在 S2S 端到端模式下不产生可听见的音频(API返回ok但无声音)
|
|||
|
|
// 2. ExternalTextToSpeech InterruptMode=1 会打断正在播放的 S2S 回复,导致用户听到中断
|
|||
|
|
// 3. Command:function 是官方自定义 FC 模式的正确回传方式
|
|||
|
|
// 流程:Command:function → LLM 收到工具结果 → LLM 生成回复 → S2S 朗读
|
|||
|
|
const toolCallId = b.id || 'unknown_call_id';
|
|||
|
|
const functionContent = contentText.length > 1500
|
|||
|
|
? contentText.substring(0, 1500) + '……(内容较长,以上为主要部分)'
|
|||
|
|
: contentText;
|
|||
|
|
const funcMsg = JSON.stringify({
|
|||
|
|
ToolCallID: toolCallId,
|
|||
|
|
Content: functionContent,
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
let activeTaskId = s2sTaskId;
|
|||
|
|
try {
|
|||
|
|
console.log(`[FC] ★ Sending Command:function (ToolCallID=${toolCallId}, content=${functionContent.length} chars)`);
|
|||
|
|
await volcengine.updateVoiceChat({
|
|||
|
|
AppId: effectiveAppId,
|
|||
|
|
RoomId: b.RoomID,
|
|||
|
|
TaskId: activeTaskId,
|
|||
|
|
Command: 'function',
|
|||
|
|
Message: funcMsg,
|
|||
|
|
});
|
|||
|
|
console.log('[FC] ✅ Command:function sent OK → LLM will generate S2S response with KB content');
|
|||
|
|
} catch (funcErr) {
|
|||
|
|
console.error('[FC] ✖ Command:function failed:', funcErr.message);
|
|||
|
|
// 如果正式 TaskId 失败,尝试回调 TaskId
|
|||
|
|
if (activeTaskId !== b.S2STaskID) {
|
|||
|
|
try {
|
|||
|
|
console.log(`[FC] Retrying Command:function with callback TaskID=${b.S2STaskID}`);
|
|||
|
|
activeTaskId = b.S2STaskID;
|
|||
|
|
await volcengine.updateVoiceChat({
|
|||
|
|
AppId: effectiveAppId,
|
|||
|
|
RoomId: b.RoomID,
|
|||
|
|
TaskId: activeTaskId,
|
|||
|
|
Command: 'function',
|
|||
|
|
Message: funcMsg,
|
|||
|
|
});
|
|||
|
|
console.log('[FC] ✅ Command:function retry OK');
|
|||
|
|
} catch (retryErr) {
|
|||
|
|
console.error('[FC] ✖ Command:function retry also failed:', retryErr.message);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
console.log(`[FC] Final result: Command:function sent (${functionContent.length} chars)`);
|
|||
|
|
} catch (err) {
|
|||
|
|
console.error(`[FC] Tool execution failed:`, err.message);
|
|||
|
|
console.error(`[FC] Error details:`, err);
|
|||
|
|
}
|
|||
|
|
}, 500); // 从1s减到500ms,减少等待
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (msgObj && typeof msgObj === 'object' && !Array.isArray(msgObj)) {
|
|||
|
|
const eventType = msgObj.event_type;
|
|||
|
|
console.log(`[FC] [FormatB] event_type="${eventType}"`);
|
|||
|
|
if (eventType === 'function_calling') {
|
|||
|
|
const funcName = msgObj.function || '';
|
|||
|
|
const toolCallId = msgObj.tool_call_id || '';
|
|||
|
|
const responseId = msgObj.response_id || '';
|
|||
|
|
console.log(`[FC] [Information] FC notification: func=${funcName} toolCallId=${toolCallId} responseId=${responseId}`);
|
|||
|
|
res.json({ success: true });
|
|||
|
|
// ExternalTextToSpeech 在 S2S 模式下不产生音频,不再发送安抚语
|
|||
|
|
// LLM 的 tool_calls 会触发 FormatA 分支执行工具并通过 Command:function 回传结果
|
|||
|
|
console.log(`[FC] [Information] FC notification received, waiting for tool_calls`);
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
if (msgObj && typeof msgObj === 'object') {
|
|||
|
|
const asrText = msgObj.text || msgObj.asr_text || msgObj.content ||
|
|||
|
|
msgObj.user_text || msgObj.transcript ||
|
|||
|
|
(msgObj.data && (msgObj.data.text || msgObj.data.asr_text || msgObj.data.content));
|
|||
|
|
const role = msgObj.role || msgObj.speaker || msgObj.data?.role || '';
|
|||
|
|
const isUser = !role || role === 'user' || role === 'human';
|
|||
|
|
if (asrText && isUser && RoomID) {
|
|||
|
|
latestUserSpeech.set(RoomID, { text: asrText, timestamp: Date.now() });
|
|||
|
|
console.log(`[FC] [ConvState] Stored user speech for ${RoomID}: "${asrText}"`);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
res.json({ success: true });
|
|||
|
|
} catch (error) {
|
|||
|
|
console.error('[FC] Error:', error.message);
|
|||
|
|
res.status(500).json({ success: false, error: error.message });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.post('/room_message', (req, res) => {
|
|||
|
|
try {
|
|||
|
|
const { roomId, uid, text } = req.body;
|
|||
|
|
if (!roomId || !text) {
|
|||
|
|
return res.json({ success: true });
|
|||
|
|
}
|
|||
|
|
const jsonStart = text.search(/[\[{]/);
|
|||
|
|
if (jsonStart < 0) {
|
|||
|
|
return res.json({ success: true });
|
|||
|
|
}
|
|||
|
|
const jsonStr = text.substring(jsonStart);
|
|||
|
|
let parsed = null;
|
|||
|
|
try { parsed = JSON.parse(jsonStr); } catch (e) {
|
|||
|
|
const textMatch = jsonStr.match(/"text"\s*:\s*"([^"]+)"/);
|
|||
|
|
if (textMatch && textMatch[1]) {
|
|||
|
|
const extractedText = textMatch[1];
|
|||
|
|
const userIdMatch = jsonStr.match(/"userId"\s*:\s*"([^"]+)"/);
|
|||
|
|
const subtitleUserId = userIdMatch ? userIdMatch[1] : '';
|
|||
|
|
const isUserSpeech = subtitleUserId && !subtitleUserId.startsWith('bot_');
|
|||
|
|
if (isUserSpeech && extractedText) {
|
|||
|
|
latestUserSpeech.set(roomId, { text: extractedText, timestamp: Date.now(), source: 'room_regex' });
|
|||
|
|
console.log(`[RoomMsg] ✅ Stored user speech (regex) for ${roomId}: "${extractedText}"`);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
return res.json({ success: true });
|
|||
|
|
}
|
|||
|
|
if (parsed && parsed.data && Array.isArray(parsed.data)) {
|
|||
|
|
parsed.data.forEach(sub => {
|
|||
|
|
const subText = sub.text || '';
|
|||
|
|
const subUserId = sub.userId || sub.user_id || '';
|
|||
|
|
const isDefinite = sub.definite === true;
|
|||
|
|
const isUserSpeech = subUserId && !subUserId.startsWith('bot_');
|
|||
|
|
if (subText && isUserSpeech && isDefinite) {
|
|||
|
|
latestUserSpeech.set(roomId, { text: subText, timestamp: Date.now(), source: 'room_subtitle' });
|
|||
|
|
console.log(`[RoomMsg] ✅ Stored user speech for ${roomId}: "${subText}"`);
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
res.json({ success: true });
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
if (parsed && typeof parsed === 'object') {
|
|||
|
|
const asrText = parsed.text || parsed.asr_text || parsed.content ||
|
|||
|
|
parsed.user_text || parsed.transcript ||
|
|||
|
|
(parsed.data && typeof parsed.data === 'string' ? parsed.data : null);
|
|||
|
|
const isBot = uid && uid.startsWith('bot_');
|
|||
|
|
if (asrText && !isBot) {
|
|||
|
|
latestUserSpeech.set(roomId, { text: asrText, timestamp: Date.now(), source: 'room_object' });
|
|||
|
|
console.log(`[RoomMsg] ✅ Stored user speech (obj) for ${roomId}: "${asrText}"`);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
res.json({ success: true });
|
|||
|
|
} catch (error) {
|
|||
|
|
console.error('[RoomMsg] Error:', error.message);
|
|||
|
|
res.json({ success: true });
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.post('/tool-callback', async (req, res) => {
|
|||
|
|
console.log('[ToolCallback] Legacy callback received:', JSON.stringify(req.body));
|
|||
|
|
res.json({ success: true, message: 'deprecated, use fc_callback instead' });
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
router.get('/sessions', (req, res) => {
|
|||
|
|
const sessions = [];
|
|||
|
|
for (const [id, session] of activeSessions) {
|
|||
|
|
sessions.push({
|
|||
|
|
sessionId: id,
|
|||
|
|
roomId: session.roomId,
|
|||
|
|
userId: session.userId,
|
|||
|
|
duration: Math.floor((Date.now() - session.startTime) / 1000),
|
|||
|
|
subtitleCount: session.subtitles.length,
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
res.json({ success: true, data: sessions });
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
module.exports = router;
|