feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目
This commit is contained in:
@@ -35,6 +35,51 @@ class ArkChatService {
|
||||
};
|
||||
}
|
||||
|
||||
async summarizeContextForHandoff(messages, maxRounds = 3) {
|
||||
const normalizedMessages = (Array.isArray(messages) ? messages : [])
|
||||
.filter((item) => item && (item.role === 'user' || item.role === 'assistant') && String(item.content || '').trim());
|
||||
|
||||
let startIndex = 0;
|
||||
let userRounds = 0;
|
||||
for (let index = normalizedMessages.length - 1; index >= 0; index -= 1) {
|
||||
if (normalizedMessages[index].role === 'user') {
|
||||
userRounds += 1;
|
||||
startIndex = index;
|
||||
if (userRounds >= Math.max(1, maxRounds)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const recentMessages = normalizedMessages.slice(startIndex);
|
||||
|
||||
if (!recentMessages.length) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const transcript = recentMessages
|
||||
.map((item, index) => `${index + 1}. ${item.role === 'user' ? '用户' : '助手'}:${String(item.content || '').trim()}`)
|
||||
.join('\n');
|
||||
|
||||
if (this._isMockMode()) {
|
||||
const lastUserMessage = [...recentMessages].reverse().find((item) => item.role === 'user');
|
||||
return lastUserMessage ? `用户当前主要在追问:${lastUserMessage.content}` : '';
|
||||
}
|
||||
|
||||
const result = await this.chat([
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是对话交接摘要助手。请基于最近几轮对话生成一段简洁中文摘要,供另一个模型无缝接管会话。摘要必须同时包含:用户当前主要问题、已经确认的信息、仍待解决的问题。不要使用标题、项目符号或编号,不要虚构事实,控制在120字以内。',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `请总结以下最近${Math.ceil(recentMessages.length / 2)}轮对话:\n${transcript}`,
|
||||
},
|
||||
], []);
|
||||
|
||||
return String(result.content || '').trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 非流式调用方舟 LLM
|
||||
*/
|
||||
|
||||
659
test2/server/services/nativeVoiceGateway.js
Normal file
659
test2/server/services/nativeVoiceGateway.js
Normal file
@@ -0,0 +1,659 @@
|
||||
const { WebSocket, WebSocketServer } = require('ws');
|
||||
const url = require('url');
|
||||
const db = require('../db');
|
||||
const arkChatService = require('./arkChatService');
|
||||
const {
|
||||
MsgType,
|
||||
unmarshal,
|
||||
createStartConnectionMessage,
|
||||
createStartSessionMessage,
|
||||
createAudioMessage,
|
||||
createChatTTSTextMessage,
|
||||
createChatRAGTextMessage,
|
||||
} = require('./realtimeDialogProtocol');
|
||||
const {
|
||||
getRuleBasedDirectRouteDecision,
|
||||
normalizeTextForSpeech,
|
||||
splitTextForSpeech,
|
||||
estimateSpeechDurationMs,
|
||||
resolveReply,
|
||||
} = require('./realtimeDialogRouting');
|
||||
|
||||
const sessions = new Map();
|
||||
|
||||
function sendJson(ws, payload) {
|
||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||
ws.send(JSON.stringify(payload));
|
||||
}
|
||||
}
|
||||
|
||||
function buildStartSessionPayload(options) {
|
||||
return {
|
||||
asr: {
|
||||
extra: {},
|
||||
},
|
||||
tts: {
|
||||
speaker: options.speaker || 'zh_female_vv_jupiter_bigtts',
|
||||
audio_config: {
|
||||
channel: 1,
|
||||
format: 'pcm_s16le',
|
||||
sample_rate: 24000,
|
||||
},
|
||||
},
|
||||
dialog: {
|
||||
dialog_id: '',
|
||||
bot_name: options.botName || '豆包',
|
||||
system_role: normalizeTextForSpeech(options.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。'),
|
||||
speaking_style: normalizeTextForSpeech(options.speakingStyle || '请使用清晰、自然、简洁的口吻。'),
|
||||
extra: {
|
||||
input_mod: 'audio',
|
||||
model: 'O',
|
||||
strict_audit: false,
|
||||
audit_response: '抱歉,这个问题我暂时无法回答。',
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function parseJsonPayload(message) {
|
||||
try {
|
||||
return JSON.parse(message.payload.toString('utf8'));
|
||||
} catch (error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function extractUserText(jsonPayload) {
|
||||
const text = jsonPayload?.text
|
||||
|| jsonPayload?.content
|
||||
|| jsonPayload?.results?.[0]?.text
|
||||
|| jsonPayload?.results?.[0]?.alternatives?.[0]?.text
|
||||
|| '';
|
||||
return String(text || '').trim();
|
||||
}
|
||||
|
||||
function isFinalUserPayload(jsonPayload) {
|
||||
if (jsonPayload?.is_final === true) {
|
||||
return true;
|
||||
}
|
||||
if (Array.isArray(jsonPayload?.results)) {
|
||||
return jsonPayload.results.some((item) => item && item.is_interim === false);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function persistUserSpeech(session, text) {
|
||||
const cleanText = (text || '').trim();
|
||||
if (!cleanText) return false;
|
||||
const now = Date.now();
|
||||
if (session.lastPersistedUserText === cleanText && now - (session.lastPersistedUserAt || 0) < 5000) {
|
||||
return false;
|
||||
}
|
||||
session.lastPersistedUserText = cleanText;
|
||||
session.lastPersistedUserAt = now;
|
||||
session.latestUserText = cleanText;
|
||||
db.addMessage(session.sessionId, 'user', cleanText, 'voice_asr').catch((e) => console.warn('[NativeVoice][DB] add user failed:', e.message));
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'user',
|
||||
text: cleanText,
|
||||
isFinal: true,
|
||||
sequence: `native_user_${now}`,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
function persistAssistantSpeech(session, text, { source = 'voice_bot', toolName = null, persistToDb = true, meta = null } = {}) {
|
||||
const cleanText = (text || '').trim();
|
||||
if (!cleanText) return false;
|
||||
const now = Date.now();
|
||||
if (session.lastPersistedAssistantText === cleanText && now - (session.lastPersistedAssistantAt || 0) < 5000) {
|
||||
return false;
|
||||
}
|
||||
session.lastPersistedAssistantText = cleanText;
|
||||
session.lastPersistedAssistantAt = now;
|
||||
if (persistToDb) {
|
||||
db.addMessage(session.sessionId, 'assistant', cleanText, source, toolName, meta).catch((e) => console.warn('[NativeVoice][DB] add assistant failed:', e.message));
|
||||
}
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'assistant',
|
||||
text: cleanText,
|
||||
isFinal: true,
|
||||
source,
|
||||
toolName,
|
||||
sequence: `native_assistant_${now}`,
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
function appendAssistantStream(session, payload) {
|
||||
const chunkText = extractUserText(payload);
|
||||
if (!chunkText) {
|
||||
return '';
|
||||
}
|
||||
const replyId = payload?.reply_id || '';
|
||||
if (replyId && session.assistantStreamReplyId && session.assistantStreamReplyId !== replyId) {
|
||||
session.assistantStreamBuffer = '';
|
||||
}
|
||||
session.assistantStreamReplyId = replyId || session.assistantStreamReplyId || '';
|
||||
session.assistantStreamBuffer = `${session.assistantStreamBuffer || ''}${chunkText}`;
|
||||
return session.assistantStreamBuffer;
|
||||
}
|
||||
|
||||
function flushAssistantStream(session, { source = 'voice_bot', toolName = null, meta = null } = {}) {
|
||||
const fullText = (session.assistantStreamBuffer || '').trim();
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
if (!fullText) {
|
||||
return false;
|
||||
}
|
||||
return persistAssistantSpeech(session, fullText, { source, toolName, meta });
|
||||
}
|
||||
|
||||
async function loadHandoffSummaryForVoice(session) {
|
||||
try {
|
||||
const history = await db.getHistoryForLLM(session.sessionId, 20);
|
||||
if (!history.length) {
|
||||
session.handoffSummary = '';
|
||||
session.handoffSummaryUsed = false;
|
||||
return;
|
||||
}
|
||||
session.handoffSummary = await arkChatService.summarizeContextForHandoff(history, 3);
|
||||
session.handoffSummaryUsed = false;
|
||||
console.log(`[NativeVoice] Handoff summary prepared for ${session.sessionId}: ${session.handoffSummary ? 'yes' : 'no'}`);
|
||||
} catch (error) {
|
||||
session.handoffSummary = '';
|
||||
session.handoffSummaryUsed = false;
|
||||
console.warn('[NativeVoice] loadHandoffSummaryForVoice failed:', error.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function sendSpeechText(session, speechText) {
|
||||
const chunks = splitTextForSpeech(speechText);
|
||||
if (!chunks.length || !session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
|
||||
return;
|
||||
}
|
||||
console.log(`[NativeVoice] sendSpeechText start session=${session.sessionId} chunks=${chunks.length} textLen=${speechText.length}`);
|
||||
session.isSendingChatTTSText = true;
|
||||
session.currentTtsType = 'chat_tts_text';
|
||||
session.chatTTSUntil = Date.now() + estimateSpeechDurationMs(speechText) + 800;
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
session.chatTTSTimer = setTimeout(() => {
|
||||
session.chatTTSTimer = null;
|
||||
if ((session.chatTTSUntil || 0) <= Date.now()) {
|
||||
session.isSendingChatTTSText = false;
|
||||
}
|
||||
}, Math.max(200, session.chatTTSUntil - Date.now() + 50));
|
||||
sendJson(session.client, { type: 'tts_reset', ttsType: 'chat_tts_text' });
|
||||
for (let index = 0; index < chunks.length; index += 1) {
|
||||
const chunk = chunks[index];
|
||||
console.log(`[NativeVoice] sendSpeechText chunk session=${session.sessionId} index=${index + 1}/${chunks.length} len=${chunk.length} start=${index === 0} end=false text=${JSON.stringify(chunk.slice(0, 80))}`);
|
||||
session.upstream.send(createChatTTSTextMessage(session.sessionId, {
|
||||
start: index === 0,
|
||||
end: false,
|
||||
content: chunk,
|
||||
}));
|
||||
}
|
||||
console.log(`[NativeVoice] sendSpeechText end session=${session.sessionId}`);
|
||||
session.upstream.send(createChatTTSTextMessage(session.sessionId, {
|
||||
start: false,
|
||||
end: true,
|
||||
content: '',
|
||||
}));
|
||||
}
|
||||
|
||||
function sendGreeting(session) {
|
||||
const greetingText = normalizeTextForSpeech(session.greetingText || '你好,我是你的智能语音助手,有什么可以帮你的吗?');
|
||||
if (!greetingText || session.hasSentGreeting) {
|
||||
if (!session.readySent) {
|
||||
session.readySent = true;
|
||||
sendJson(session.client, { type: 'ready' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
session.hasSentGreeting = true;
|
||||
persistAssistantSpeech(session, greetingText, { source: 'voice_bot', persistToDb: false });
|
||||
clearTimeout(session.readyTimer);
|
||||
session.readyTimer = setTimeout(() => {
|
||||
session.readyTimer = null;
|
||||
if (!session.readySent) {
|
||||
session.readySent = true;
|
||||
sendJson(session.client, { type: 'ready' });
|
||||
}
|
||||
}, estimateSpeechDurationMs(greetingText) + 300);
|
||||
const playGreeting = () => {
|
||||
session.pendingGreetingAck = true;
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
session.greetingAckTimer = setTimeout(() => {
|
||||
session.greetingAckTimer = null;
|
||||
if (session.pendingGreetingAck && session.greetingRetryCount < 1) {
|
||||
session.greetingRetryCount += 1;
|
||||
console.warn(`[NativeVoice] greeting ack timeout, retry session=${session.sessionId}`);
|
||||
playGreeting();
|
||||
}
|
||||
}, 2000);
|
||||
sendSpeechText(session, greetingText).catch((error) => {
|
||||
session.pendingGreetingAck = false;
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
session.greetingAckTimer = null;
|
||||
session.hasSentGreeting = false;
|
||||
console.warn('[NativeVoice] greeting failed:', error.message);
|
||||
});
|
||||
};
|
||||
clearTimeout(session.greetingTimer);
|
||||
session.greetingTimer = setTimeout(() => {
|
||||
session.greetingTimer = null;
|
||||
playGreeting();
|
||||
}, 800);
|
||||
}
|
||||
|
||||
async function sendExternalRag(session, items) {
|
||||
if (!session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
|
||||
return;
|
||||
}
|
||||
const ragItems = Array.isArray(items) ? items.filter((item) => item && item.content) : [];
|
||||
if (!ragItems.length) {
|
||||
return;
|
||||
}
|
||||
session.upstream.send(createChatRAGTextMessage(session.sessionId, JSON.stringify(ragItems)));
|
||||
}
|
||||
|
||||
async function processReply(session, text) {
|
||||
const cleanText = (text || '').trim();
|
||||
if (!cleanText) return;
|
||||
if (session.processingReply) {
|
||||
session.queuedUserText = cleanText;
|
||||
console.log(`[NativeVoice] processReply queued(busy) session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
const now = Date.now();
|
||||
if (session.directSpeakUntil && now < session.directSpeakUntil) {
|
||||
session.queuedUserText = cleanText;
|
||||
console.log(`[NativeVoice] processReply queued(speaking) session=${session.sessionId} waitMs=${session.directSpeakUntil - now} text=${JSON.stringify(cleanText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
session.processingReply = true;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: true });
|
||||
console.log(`[NativeVoice] processReply start session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 120))}`);
|
||||
try {
|
||||
const { delivery, speechText, ragItems, source, toolName, routeDecision, responseMeta } = await resolveReply(session.sessionId, session, cleanText);
|
||||
if (delivery === 'upstream_chat') {
|
||||
session.awaitingUpstreamReply = true;
|
||||
session.pendingAssistantSource = 'voice_bot';
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = responseMeta;
|
||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=upstream_chat`);
|
||||
return;
|
||||
}
|
||||
if (delivery === 'external_rag') {
|
||||
session.awaitingUpstreamReply = true;
|
||||
session.pendingAssistantSource = source;
|
||||
session.pendingAssistantToolName = toolName;
|
||||
session.pendingAssistantMeta = responseMeta;
|
||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag items=${Array.isArray(ragItems) ? ragItems.length : 0}`);
|
||||
await sendExternalRag(session, ragItems);
|
||||
return;
|
||||
}
|
||||
if (!speechText) {
|
||||
console.log(`[NativeVoice] processReply empty session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=${delivery || 'unknown'}`);
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
return;
|
||||
}
|
||||
session.isSendingChatTTSText = true;
|
||||
session.chatTTSUntil = Date.now() + 30000;
|
||||
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_tts source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
|
||||
persistAssistantSpeech(session, speechText, { source, toolName, meta: responseMeta });
|
||||
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(speechText);
|
||||
await sendSpeechText(session, speechText);
|
||||
} catch (error) {
|
||||
console.error('[NativeVoice] processReply failed:', error.message);
|
||||
sendJson(session.client, { type: 'error', error: error.message });
|
||||
} finally {
|
||||
session.processingReply = false;
|
||||
if (!session.awaitingUpstreamReply) {
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
}
|
||||
const pending = session.queuedUserText;
|
||||
session.queuedUserText = '';
|
||||
if (pending && pending !== cleanText && (!session.directSpeakUntil || Date.now() >= session.directSpeakUntil)) {
|
||||
setTimeout(() => processReply(session, pending).catch((err) => {
|
||||
console.error('[NativeVoice] queued processReply failed:', err.message);
|
||||
}), 200);
|
||||
} else if (pending && pending !== cleanText) {
|
||||
const waitMs = Math.max(200, session.directSpeakUntil - Date.now() + 200);
|
||||
clearTimeout(session.queuedReplyTimer);
|
||||
session.queuedReplyTimer = setTimeout(() => {
|
||||
session.queuedReplyTimer = null;
|
||||
const queuedText = session.queuedUserText || pending;
|
||||
session.queuedUserText = '';
|
||||
processReply(session, queuedText).catch((err) => {
|
||||
console.error('[NativeVoice] delayed queued processReply failed:', err.message);
|
||||
});
|
||||
}, waitMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function handleUpstreamMessage(session, data) {
|
||||
let message;
|
||||
try {
|
||||
message = unmarshal(data);
|
||||
} catch (error) {
|
||||
console.warn('[NativeVoice] unmarshal failed:', error.message);
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
||||
if (session.isSendingChatTTSText && session.currentTtsType === 'default') {
|
||||
return;
|
||||
}
|
||||
if (session.client && session.client.readyState === WebSocket.OPEN) {
|
||||
session.client.send(message.payload, { binary: true });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const payload = parseJsonPayload(message);
|
||||
if (message.type === MsgType.ERROR) {
|
||||
console.error(`[NativeVoice] upstream error session=${session.sessionId} code=${message.event} payload=${message.payload.toString('utf8').slice(0, 200)}`);
|
||||
sendJson(session.client, { type: 'error', error: message.payload.toString('utf8') });
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type !== MsgType.FULL_SERVER) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 150) {
|
||||
session.upstreamReady = true;
|
||||
console.log(`[NativeVoice] upstream ready session=${session.sessionId}`);
|
||||
if (!session.readySent) {
|
||||
session.readySent = true;
|
||||
sendJson(session.client, { type: 'ready' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 350) {
|
||||
session.currentTtsType = payload?.tts_type || '';
|
||||
if (payload?.tts_type === 'chat_tts_text' && session.pendingGreetingAck) {
|
||||
session.pendingGreetingAck = false;
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
session.greetingAckTimer = null;
|
||||
}
|
||||
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
|
||||
sendJson(session.client, { type: 'tts_event', payload });
|
||||
return;
|
||||
}
|
||||
|
||||
const isLocalChatTTSTextActive = !!session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now();
|
||||
|
||||
if (message.event === 351) {
|
||||
if (isLocalChatTTSTextActive) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
return;
|
||||
}
|
||||
const pendingAssistantSource = session.pendingAssistantSource || 'voice_bot';
|
||||
const pendingAssistantToolName = session.pendingAssistantToolName || null;
|
||||
const pendingAssistantMeta = session.pendingAssistantMeta || null;
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
flushAssistantStream(session, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
const assistantText = extractUserText(payload);
|
||||
if (assistantText) {
|
||||
console.log(`[NativeVoice] upstream assistant session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
||||
persistAssistantSpeech(session, assistantText, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 550) {
|
||||
if (isLocalChatTTSTextActive) {
|
||||
return;
|
||||
}
|
||||
if (session.awaitingUpstreamReply) {
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
}
|
||||
const fullText = appendAssistantStream(session, payload);
|
||||
if (fullText) {
|
||||
console.log(`[NativeVoice] upstream assistant chunk session=${session.sessionId} len=${fullText.length} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 559) {
|
||||
if (isLocalChatTTSTextActive) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
return;
|
||||
}
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
flushAssistantStream(session, {
|
||||
source: session.pendingAssistantSource || 'voice_bot',
|
||||
toolName: session.pendingAssistantToolName || null,
|
||||
meta: session.pendingAssistantMeta || null,
|
||||
});
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 450 || (message.event === 451 && !isFinalUserPayload(payload))) {
|
||||
const text = extractUserText(payload);
|
||||
if (text) {
|
||||
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
||||
session.latestUserText = text;
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'user',
|
||||
text,
|
||||
isFinal: false,
|
||||
sequence: `native_partial_${Date.now()}`,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.event === 459 || (message.event === 451 && isFinalUserPayload(payload))) {
|
||||
const finalText = extractUserText(payload) || session.latestUserText || '';
|
||||
console.log(`[NativeVoice] upstream final session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 120))}`);
|
||||
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
|
||||
console.log(`[NativeVoice] user interrupt during speaking session=${session.sessionId}`);
|
||||
session.directSpeakUntil = 0;
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
}
|
||||
if (persistUserSpeech(session, finalText)) {
|
||||
processReply(session, finalText).catch((error) => {
|
||||
console.error('[NativeVoice] processReply error:', error.message);
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sendJson(session.client, {
|
||||
type: 'event',
|
||||
event: message.event,
|
||||
payload,
|
||||
});
|
||||
}
|
||||
|
||||
function attachClientHandlers(session) {
|
||||
session.client.on('message', async (raw, isBinary) => {
|
||||
if (isBinary) {
|
||||
if (session.upstream && session.upstream.readyState === WebSocket.OPEN && session.upstreamReady) {
|
||||
session.upstream.send(createAudioMessage(session.sessionId, raw));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(raw.toString('utf8'));
|
||||
} catch (error) {
|
||||
sendJson(session.client, { type: 'error', error: 'invalid client json' });
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.type === 'start') {
|
||||
session.botName = parsed.botName || '豆包';
|
||||
session.systemRole = parsed.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。';
|
||||
session.speakingStyle = parsed.speakingStyle || '请使用清晰、自然、简洁的口吻。';
|
||||
session.speaker = parsed.speaker || 'zh_female_vv_jupiter_bigtts';
|
||||
session.greetingText = parsed.greetingText || session.greetingText || '你好,我是你的智能语音助手,有什么可以帮你的吗?';
|
||||
session.userId = parsed.userId || session.userId || null;
|
||||
session.upstream = createUpstreamConnection(session);
|
||||
loadHandoffSummaryForVoice(session).catch((error) => {
|
||||
console.warn('[NativeVoice] async loadHandoffSummaryForVoice failed:', error.message);
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.type === 'stop') {
|
||||
session.client.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed.type === 'text' && parsed.text) {
|
||||
persistUserSpeech(session, parsed.text);
|
||||
processReply(session, parsed.text).catch((error) => {
|
||||
console.error('[NativeVoice] text processReply failed:', error.message);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
session.client.on('close', () => {
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
clearTimeout(session.greetingTimer);
|
||||
clearTimeout(session.greetingAckTimer);
|
||||
clearTimeout(session.readyTimer);
|
||||
if (session.upstream && session.upstream.readyState === WebSocket.OPEN) {
|
||||
session.upstream.close();
|
||||
}
|
||||
sessions.delete(session.sessionId);
|
||||
});
|
||||
}
|
||||
|
||||
function createUpstreamConnection(session) {
|
||||
const upstream = new WebSocket('wss://openspeech.bytedance.com/api/v3/realtime/dialogue', {
|
||||
headers: {
|
||||
'X-Api-Resource-Id': 'volc.speech.dialog',
|
||||
'X-Api-Access-Key': process.env.VOLC_S2S_TOKEN,
|
||||
'X-Api-App-Key': process.env.VOLC_DIALOG_APP_KEY || 'PlgvMymc7f3tQnJ6',
|
||||
'X-Api-App-ID': process.env.VOLC_S2S_APP_ID,
|
||||
'X-Api-Connect-Id': session.sessionId,
|
||||
},
|
||||
});
|
||||
|
||||
upstream.on('open', () => {
|
||||
upstream.send(createStartConnectionMessage());
|
||||
upstream.send(createStartSessionMessage(session.sessionId, buildStartSessionPayload(session)));
|
||||
});
|
||||
|
||||
upstream.on('message', (data, isBinary) => {
|
||||
if (!isBinary && typeof data === 'string') {
|
||||
sendJson(session.client, { type: 'server_text', text: data });
|
||||
return;
|
||||
}
|
||||
handleUpstreamMessage(session, Buffer.isBuffer(data) ? data : Buffer.from(data));
|
||||
});
|
||||
|
||||
upstream.on('error', (error) => {
|
||||
console.error('[NativeVoice] upstream error:', error.message);
|
||||
sendJson(session.client, { type: 'error', error: error.message });
|
||||
});
|
||||
|
||||
upstream.on('close', () => {
|
||||
sendJson(session.client, { type: 'closed' });
|
||||
});
|
||||
|
||||
return upstream;
|
||||
}
|
||||
|
||||
function createSession(client, sessionId) {
|
||||
const session = {
|
||||
sessionId,
|
||||
client,
|
||||
upstream: null,
|
||||
upstreamReady: false,
|
||||
isSendingChatTTSText: false,
|
||||
latestUserText: '',
|
||||
queuedUserText: '',
|
||||
processingReply: false,
|
||||
directSpeakUntil: 0,
|
||||
queuedReplyTimer: null,
|
||||
lastPersistedAssistantText: '',
|
||||
lastPersistedAssistantAt: 0,
|
||||
assistantStreamBuffer: '',
|
||||
assistantStreamReplyId: '',
|
||||
currentTtsType: '',
|
||||
botName: '豆包',
|
||||
systemRole: '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。',
|
||||
speakingStyle: '请使用清晰、自然、简洁的口吻。',
|
||||
speaker: 'zh_female_vv_jupiter_bigtts',
|
||||
greetingText: '你好,我是你的智能语音助手,有什么可以帮你的吗?',
|
||||
hasSentGreeting: false,
|
||||
greetingTimer: null,
|
||||
greetingAckTimer: null,
|
||||
pendingGreetingAck: false,
|
||||
greetingRetryCount: 0,
|
||||
readyTimer: null,
|
||||
readySent: false,
|
||||
handoffSummary: '',
|
||||
handoffSummaryUsed: false,
|
||||
awaitingUpstreamReply: false,
|
||||
pendingAssistantSource: null,
|
||||
pendingAssistantToolName: null,
|
||||
pendingAssistantMeta: null,
|
||||
};
|
||||
sessions.set(sessionId, session);
|
||||
attachClientHandlers(session);
|
||||
return session;
|
||||
}
|
||||
|
||||
function setupNativeVoiceGateway(server) {
|
||||
const wss = new WebSocketServer({ server, path: '/ws/realtime-dialog' });
|
||||
wss.on('connection', async (client, req) => {
|
||||
const parsed = url.parse(req.url, true);
|
||||
const sessionId = parsed.query?.sessionId;
|
||||
if (!sessionId) {
|
||||
client.close();
|
||||
return;
|
||||
}
|
||||
const userId = parsed.query?.userId || null;
|
||||
const session = createSession(client, sessionId);
|
||||
session.userId = userId;
|
||||
try {
|
||||
await db.createSession(sessionId, userId, 'voice');
|
||||
} catch (error) {
|
||||
console.warn('[NativeVoice][DB] createSession failed:', error.message);
|
||||
}
|
||||
sendJson(client, { type: 'connected', sessionId });
|
||||
});
|
||||
return wss;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
setupNativeVoiceGateway,
|
||||
};
|
||||
205
test2/server/services/realtimeDialogProtocol.js
Normal file
205
test2/server/services/realtimeDialogProtocol.js
Normal file
@@ -0,0 +1,205 @@
|
||||
const HEADER_SIZE_4 = 0x1;
|
||||
const VERSION_1 = 0x10;
|
||||
const SERIALIZATION_JSON = 0x1 << 4;
|
||||
const SERIALIZATION_RAW = 0;
|
||||
const COMPRESSION_NONE = 0;
|
||||
const MSG_TYPE_FLAG_WITH_EVENT = 0b100;
|
||||
|
||||
const MsgType = {
|
||||
INVALID: 0,
|
||||
FULL_CLIENT: 1,
|
||||
AUDIO_ONLY_CLIENT: 2,
|
||||
FULL_SERVER: 9,
|
||||
AUDIO_ONLY_SERVER: 11,
|
||||
FRONT_END_RESULT_SERVER: 12,
|
||||
ERROR: 15,
|
||||
};
|
||||
|
||||
function getMessageTypeName(value) {
|
||||
return Object.keys(MsgType).find((key) => MsgType[key] === value) || 'INVALID';
|
||||
}
|
||||
|
||||
function containsEvent(typeFlag) {
|
||||
return (typeFlag & MSG_TYPE_FLAG_WITH_EVENT) === MSG_TYPE_FLAG_WITH_EVENT;
|
||||
}
|
||||
|
||||
function shouldHandleSessionId(event) {
|
||||
return event !== 1 && event !== 2 && event !== 50 && event !== 51 && event !== 52;
|
||||
}
|
||||
|
||||
function writeInt(buffer, value, offset) {
|
||||
buffer.writeInt32BE(value, offset);
|
||||
return offset + 4;
|
||||
}
|
||||
|
||||
function writeStringWithLength(buffer, value, offset) {
|
||||
const strBuffer = Buffer.from(value || '', 'utf8');
|
||||
offset = writeInt(buffer, strBuffer.length, offset);
|
||||
strBuffer.copy(buffer, offset);
|
||||
return offset + strBuffer.length;
|
||||
}
|
||||
|
||||
function writePayload(buffer, payload, offset) {
|
||||
const payloadBuffer = Buffer.isBuffer(payload) ? payload : Buffer.from(payload || '');
|
||||
offset = writeInt(buffer, payloadBuffer.length, offset);
|
||||
payloadBuffer.copy(buffer, offset);
|
||||
return offset + payloadBuffer.length;
|
||||
}
|
||||
|
||||
function buildHeader(type, typeFlag, serialization) {
|
||||
return Buffer.from([
|
||||
VERSION_1 | HEADER_SIZE_4,
|
||||
((type & 0x0f) << 4) | (typeFlag & 0x0f),
|
||||
serialization | COMPRESSION_NONE,
|
||||
0,
|
||||
]);
|
||||
}
|
||||
|
||||
function marshal(message, { rawPayload = false } = {}) {
|
||||
const type = message.type;
|
||||
const typeFlag = message.typeFlag || MSG_TYPE_FLAG_WITH_EVENT;
|
||||
const payload = Buffer.isBuffer(message.payload) ? message.payload : Buffer.from(message.payload || '');
|
||||
const serialization = rawPayload ? SERIALIZATION_RAW : SERIALIZATION_JSON;
|
||||
|
||||
let size = 4;
|
||||
if (containsEvent(typeFlag)) {
|
||||
size += 4;
|
||||
}
|
||||
if (containsEvent(typeFlag) && shouldHandleSessionId(message.event)) {
|
||||
size += 4 + Buffer.byteLength(message.sessionId || '', 'utf8');
|
||||
}
|
||||
size += 4 + payload.length;
|
||||
|
||||
const buffer = Buffer.allocUnsafe(size);
|
||||
buildHeader(type, typeFlag, serialization).copy(buffer, 0);
|
||||
|
||||
let offset = 4;
|
||||
if (containsEvent(typeFlag)) {
|
||||
offset = writeInt(buffer, message.event || 0, offset);
|
||||
}
|
||||
if (containsEvent(typeFlag) && shouldHandleSessionId(message.event)) {
|
||||
offset = writeStringWithLength(buffer, message.sessionId || '', offset);
|
||||
}
|
||||
writePayload(buffer, payload, offset);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
function readStringWithLength(buffer, offsetObj) {
|
||||
const size = buffer.readInt32BE(offsetObj.offset);
|
||||
offsetObj.offset += 4;
|
||||
if (size <= 0) {
|
||||
return '';
|
||||
}
|
||||
const value = buffer.subarray(offsetObj.offset, offsetObj.offset + size).toString('utf8');
|
||||
offsetObj.offset += size;
|
||||
return value;
|
||||
}
|
||||
|
||||
function readPayload(buffer, offsetObj) {
|
||||
const size = buffer.readInt32BE(offsetObj.offset);
|
||||
offsetObj.offset += 4;
|
||||
if (size <= 0) {
|
||||
return Buffer.alloc(0);
|
||||
}
|
||||
const payload = buffer.subarray(offsetObj.offset, offsetObj.offset + size);
|
||||
offsetObj.offset += size;
|
||||
return payload;
|
||||
}
|
||||
|
||||
function unmarshal(data) {
|
||||
const buffer = Buffer.isBuffer(data) ? data : Buffer.from(data);
|
||||
if (buffer.length < 4) {
|
||||
throw new Error('protocol message too short');
|
||||
}
|
||||
|
||||
const typeAndFlag = buffer[1];
|
||||
const type = (typeAndFlag >> 4) & 0x0f;
|
||||
const typeFlag = typeAndFlag & 0x0f;
|
||||
const offsetObj = { offset: 4 };
|
||||
const message = {
|
||||
type,
|
||||
typeName: getMessageTypeName(type),
|
||||
typeFlag,
|
||||
event: 0,
|
||||
sessionId: '',
|
||||
payload: Buffer.alloc(0),
|
||||
};
|
||||
|
||||
if (containsEvent(typeFlag)) {
|
||||
message.event = buffer.readInt32BE(offsetObj.offset);
|
||||
offsetObj.offset += 4;
|
||||
}
|
||||
if (containsEvent(typeFlag) && shouldHandleSessionId(message.event)) {
|
||||
message.sessionId = readStringWithLength(buffer, offsetObj);
|
||||
}
|
||||
message.payload = readPayload(buffer, offsetObj);
|
||||
return message;
|
||||
}
|
||||
|
||||
function createStartConnectionMessage() {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 1,
|
||||
payload: Buffer.from('{}', 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
function createStartSessionMessage(sessionId, payload) {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 100,
|
||||
sessionId,
|
||||
payload: Buffer.from(JSON.stringify(payload), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
function createAudioMessage(sessionId, audioBuffer) {
|
||||
return marshal({
|
||||
type: MsgType.AUDIO_ONLY_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 200,
|
||||
sessionId,
|
||||
payload: Buffer.isBuffer(audioBuffer) ? audioBuffer : Buffer.from(audioBuffer),
|
||||
}, { rawPayload: true });
|
||||
}
|
||||
|
||||
function createChatTTSTextMessage(sessionId, payload) {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 500,
|
||||
sessionId,
|
||||
payload: Buffer.from(JSON.stringify({
|
||||
session_id: sessionId,
|
||||
start: !!payload.start,
|
||||
end: !!payload.end,
|
||||
content: payload.content || '',
|
||||
}), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
function createChatRAGTextMessage(sessionId, externalRag) {
|
||||
return marshal({
|
||||
type: MsgType.FULL_CLIENT,
|
||||
typeFlag: MSG_TYPE_FLAG_WITH_EVENT,
|
||||
event: 502,
|
||||
sessionId,
|
||||
payload: Buffer.from(JSON.stringify({
|
||||
session_id: sessionId,
|
||||
external_rag: externalRag || '[]',
|
||||
}), 'utf8'),
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
MsgType,
|
||||
MSG_TYPE_FLAG_WITH_EVENT,
|
||||
unmarshal,
|
||||
createStartConnectionMessage,
|
||||
createStartSessionMessage,
|
||||
createAudioMessage,
|
||||
createChatTTSTextMessage,
|
||||
createChatRAGTextMessage,
|
||||
};
|
||||
297
test2/server/services/realtimeDialogRouting.js
Normal file
297
test2/server/services/realtimeDialogRouting.js
Normal file
@@ -0,0 +1,297 @@
|
||||
const ToolExecutor = require('./toolExecutor');
|
||||
const arkChatService = require('./arkChatService');
|
||||
const db = require('../db');
|
||||
|
||||
function normalizeTextForSpeech(text) {
|
||||
return (text || '')
|
||||
.replace(/^#{1,6}\s*/gm, '')
|
||||
.replace(/\*\*([^*]*)\*\*/g, '$1')
|
||||
.replace(/__([^_]*)__/g, '$1')
|
||||
.replace(/\*([^*]+)\*/g, '$1')
|
||||
.replace(/_([^_]+)_/g, '$1')
|
||||
.replace(/~~([^~]*)~~/g, '$1')
|
||||
.replace(/`{1,3}[^`]*`{1,3}/g, '')
|
||||
.replace(/^[-*]{3,}\s*$/gm, '')
|
||||
.replace(/^>\s*/gm, '')
|
||||
.replace(/!\[[^\]]*\]\([^)]*\)/g, '')
|
||||
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1')
|
||||
.replace(/^[\s]*[-*+]\s+/gm, ' ')
|
||||
.replace(/^[\s]*\d+[.)]\s+/gm, ' ')
|
||||
.replace(/---\s*来源[::]/g, '来源:')
|
||||
.replace(/\r/g, ' ')
|
||||
.replace(/\n{2,}/g, '。')
|
||||
.replace(/\n/g, ' ')
|
||||
.replace(/。{2,}/g, '。')
|
||||
.replace(/([!?;,])\1+/g, '$1')
|
||||
.replace(/([。!?;,])\s*([。!?;,])/g, '$2')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function splitTextForSpeech(text, maxLen = 180) {
|
||||
const content = normalizeTextForSpeech(text);
|
||||
if (!content) return [];
|
||||
if (content.length <= maxLen) return [content];
|
||||
const chunks = [];
|
||||
let remaining = content;
|
||||
while (remaining.length > maxLen) {
|
||||
const currentMaxLen = chunks.length === 0 ? Math.min(90, maxLen) : maxLen;
|
||||
let splitIndex = Math.max(
|
||||
remaining.lastIndexOf('。', currentMaxLen),
|
||||
remaining.lastIndexOf('!', currentMaxLen),
|
||||
remaining.lastIndexOf('?', currentMaxLen),
|
||||
remaining.lastIndexOf(';', currentMaxLen),
|
||||
remaining.lastIndexOf(',', currentMaxLen),
|
||||
remaining.lastIndexOf(',', currentMaxLen)
|
||||
);
|
||||
if (splitIndex < Math.floor(currentMaxLen / 2)) {
|
||||
splitIndex = currentMaxLen;
|
||||
} else {
|
||||
splitIndex += 1;
|
||||
}
|
||||
chunks.push(remaining.slice(0, splitIndex).trim());
|
||||
remaining = remaining.slice(splitIndex).trim();
|
||||
}
|
||||
if (remaining) chunks.push(remaining);
|
||||
return chunks.filter(Boolean);
|
||||
}
|
||||
|
||||
function estimateSpeechDurationMs(text) {
|
||||
const plainText = normalizeTextForSpeech(text).replace(/\s+/g, '');
|
||||
const length = plainText.length;
|
||||
return Math.max(4000, Math.min(60000, length * 180));
|
||||
}
|
||||
|
||||
function buildDirectRouteMessages(session, context, userText) {
|
||||
const messages = [];
|
||||
const systemPrompt = [
|
||||
'你是语音前置路由器,只负责判断当前用户问题应该走哪条链路。',
|
||||
'你必须只输出一个 JSON 对象,不要输出解释、代码块或额外文本。',
|
||||
'允许的 route 只有:chat、search_knowledge、query_weather、query_order、get_current_time、calculate。',
|
||||
'规则如下:',
|
||||
'1. 企业产品、功能、政策、售后、专业说明、品牌官方信息 -> search_knowledge。',
|
||||
'2. 天气 -> query_weather。',
|
||||
'3. 订单状态 -> query_order。',
|
||||
'4. 当前时间、日期、星期 -> get_current_time。',
|
||||
'5. 明确的数学计算 -> calculate。',
|
||||
'6. 闲聊、问候、开放式泛化交流 -> chat。',
|
||||
'输出格式示例:{"route":"chat","args":{},"reply":""}',
|
||||
'如果 route=search_knowledge,args 中必须包含 query。',
|
||||
'如果 route=query_weather,args 中必须包含 city。',
|
||||
'如果 route=query_order,args 中必须包含 order_id。',
|
||||
'如果 route=calculate,args 中必须包含 expression。',
|
||||
`当前助手设定:${session.systemRole || '你是一个友善的智能助手。'} ${session.speakingStyle || '请使用温和、清晰的口吻。'}`,
|
||||
].join('\n');
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
(context || []).slice(-6).forEach((item) => {
|
||||
if (item && item.role && item.content) {
|
||||
messages.push({ role: item.role, content: item.content });
|
||||
}
|
||||
});
|
||||
messages.push({ role: 'user', content: userText });
|
||||
return messages;
|
||||
}
|
||||
|
||||
function buildDirectChatMessages(session, context, userText) {
|
||||
const messages = [];
|
||||
const systemPrompt = [
|
||||
session.systemRole || '你是一个友善的智能助手。',
|
||||
session.speakingStyle || '请使用温和、清晰的口吻。',
|
||||
'这是语音对话场景,请直接给出自然、完整、适合朗读的中文回复。',
|
||||
'如果不是基于知识库或工具结果,就不要冒充官方结论。',
|
||||
].join('\n');
|
||||
messages.push({ role: 'system', content: systemPrompt });
|
||||
(context || []).slice(-10).forEach((item) => {
|
||||
if (item && item.role && item.content) {
|
||||
messages.push({ role: item.role, content: item.content });
|
||||
}
|
||||
});
|
||||
messages.push({ role: 'user', content: userText });
|
||||
return messages;
|
||||
}
|
||||
|
||||
function hasKnowledgeKeyword(text) {
|
||||
return /(系统|平台|产品|功能|介绍|说明|规则|流程|步骤|配置|接入|开通|操作|怎么用|如何用|适合谁|区别|价格|费用|政策|售后|文档|资料|方案|一成系统)/.test(text || '');
|
||||
}
|
||||
|
||||
function isKnowledgeFollowUp(text) {
|
||||
return /^(这个|那个|它|该系统|这个系统|那个系统|这个功能|那个功能|这个产品|那个产品|详细|详细说说|详细查一下|展开说说|继续说|继续讲|怎么用|怎么操作|怎么配置|适合谁|有什么区别|费用多少|价格多少|怎么申请|怎么开通|是什么|什么意思)/.test((text || '').trim());
|
||||
}
|
||||
|
||||
function shouldForceKnowledgeRoute(userText, context = []) {
|
||||
const text = (userText || '').trim();
|
||||
if (!text) return false;
|
||||
if (hasKnowledgeKeyword(text)) return true;
|
||||
if (!isKnowledgeFollowUp(text)) return false;
|
||||
const recentContextText = (Array.isArray(context) ? context : [])
|
||||
.slice(-6)
|
||||
.map((item) => String(item?.content || '').trim())
|
||||
.join('\n');
|
||||
return hasKnowledgeKeyword(recentContextText);
|
||||
}
|
||||
|
||||
function withHandoffSummary(session, context) {
|
||||
const summary = String(session?.handoffSummary || '').trim();
|
||||
if (!summary || session?.handoffSummaryUsed) {
|
||||
return context;
|
||||
}
|
||||
return [
|
||||
{ role: 'assistant', content: `会话交接摘要:${summary}` },
|
||||
...(Array.isArray(context) ? context : []),
|
||||
];
|
||||
}
|
||||
|
||||
function parseDirectRouteDecision(content, userText) {
|
||||
const raw = (content || '').trim();
|
||||
const jsonText = raw.replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```$/i, '').trim();
|
||||
const start = jsonText.indexOf('{');
|
||||
const end = jsonText.lastIndexOf('}');
|
||||
const candidate = start >= 0 && end > start ? jsonText.slice(start, end + 1) : jsonText;
|
||||
try {
|
||||
const parsed = JSON.parse(candidate);
|
||||
const route = parsed.route;
|
||||
const args = parsed.args && typeof parsed.args === 'object' ? parsed.args : {};
|
||||
if (route === 'chat') return { route: 'chat', args: {} };
|
||||
if (route === 'search_knowledge') return { route: 'search_knowledge', args: { query: args.query || userText } };
|
||||
if (route === 'query_weather' && args.city) return { route: 'query_weather', args: { city: args.city } };
|
||||
if (route === 'query_order' && args.order_id) return { route: 'query_order', args: { order_id: args.order_id } };
|
||||
if (route === 'get_current_time') return { route: 'get_current_time', args: {} };
|
||||
if (route === 'calculate' && args.expression) return { route: 'calculate', args: { expression: args.expression } };
|
||||
} catch (error) {
|
||||
console.warn('[NativeVoice] route JSON parse failed:', error.message, 'raw=', raw);
|
||||
}
|
||||
return { route: 'search_knowledge', args: { query: userText } };
|
||||
}
|
||||
|
||||
function getRuleBasedDirectRouteDecision(userText) {
|
||||
const text = (userText || '').trim();
|
||||
if (!text) return { route: 'chat', args: {} };
|
||||
if (/(几点|几号|日期|星期|周几|现在时间|当前时间)/.test(text)) return { route: 'get_current_time', args: {} };
|
||||
if (/(天气|气温|下雨|晴天|阴天|温度)/.test(text)) {
|
||||
return { route: 'query_weather', args: { city: text.replace(/.*?(北京|上海|广州|深圳|杭州|成都|重庆|武汉|西安|南京|苏州|天津|长沙|郑州|青岛|宁波|无锡)/, '$1') || '北京' } };
|
||||
}
|
||||
if (/(订单|物流|快递|单号)/.test(text)) return { route: 'query_order', args: { order_id: text } };
|
||||
if (/^[\d\s+\-*/().=%]+$/.test(text) || /(等于多少|帮我算|计算一下|算一下)/.test(text)) {
|
||||
return { route: 'calculate', args: { expression: text.replace(/(帮我算|计算一下|算一下|等于多少)/g, '').trim() || text } };
|
||||
}
|
||||
if (/^(喂|你好|您好|嗨|哈喽|hello|hi|在吗|在不在|早上好|中午好|下午好|晚上好|早安|晚安|谢谢|感谢|再见|拜拜|嗯|哦|好的|对|是的|没有了|没事了|可以了|行|OK|ok)[,,!。??~~\s]*[啊呀吧呢哦嗯嘛哈的了]*[!。??~~]*$/.test(text)) {
|
||||
return { route: 'chat', args: {} };
|
||||
}
|
||||
if (/^(喂[,,\s]*)?(你好|您好)[,,!。??\s]*(在吗|请问)?[!。??]*$/.test(text)) {
|
||||
return { route: 'chat', args: {} };
|
||||
}
|
||||
return { route: 'search_knowledge', args: { query: text } };
|
||||
}
|
||||
|
||||
function extractToolResultText(toolName, toolResult) {
|
||||
if (!toolResult) return '';
|
||||
if (toolName === 'search_knowledge') {
|
||||
if (toolResult.errorType === 'timeout') {
|
||||
return '知识库查询超时了,请稍后重试,或换一种更具体的问法再试。';
|
||||
}
|
||||
if (toolResult.errorType === 'not_configured') {
|
||||
return '知识库当前未配置完成,请先检查知识库配置。';
|
||||
}
|
||||
if (toolResult.results && Array.isArray(toolResult.results)) {
|
||||
return toolResult.results.map((item) => item.content || JSON.stringify(item)).join('\n');
|
||||
}
|
||||
if (typeof toolResult === 'string') return toolResult;
|
||||
if (toolResult.error) return toolResult.error;
|
||||
}
|
||||
if (toolName === 'query_weather' && !toolResult.error) return `${toolResult.city}今天${toolResult.weather},气温${toolResult.temp},湿度${toolResult.humidity},${toolResult.wind}。${toolResult.tips || ''}`.trim();
|
||||
if (toolName === 'query_order' && !toolResult.error) return `订单${toolResult.order_id}当前状态是${toolResult.status},预计送达时间${toolResult.estimated_delivery},快递单号${toolResult.tracking_number}。`;
|
||||
if (toolName === 'get_current_time' && !toolResult.error) return `现在是${toolResult.datetime},${toolResult.weekday}。`;
|
||||
if (toolName === 'calculate' && !toolResult.error) return `${toolResult.expression} 的计算结果是 ${toolResult.formatted}。`;
|
||||
if (toolResult.error) return toolResult.error;
|
||||
return typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult);
|
||||
}
|
||||
|
||||
async function resolveReply(sessionId, session, text) {
|
||||
const baseContext = await db.getHistoryForLLM(sessionId, 20).catch(() => []);
|
||||
const context = withHandoffSummary(session, baseContext);
|
||||
const originalText = text.trim();
|
||||
let routeDecision = getRuleBasedDirectRouteDecision(text.trim());
|
||||
if (routeDecision.route === 'chat' && shouldForceKnowledgeRoute(text.trim(), context)) {
|
||||
routeDecision = { route: 'search_knowledge', args: { query: text.trim() } };
|
||||
}
|
||||
let replyText = '';
|
||||
let source = 'voice_bot';
|
||||
let toolName = null;
|
||||
let responseMeta = {
|
||||
route: routeDecision.route,
|
||||
original_text: originalText,
|
||||
};
|
||||
if (routeDecision.route === 'chat') {
|
||||
session.handoffSummaryUsed = true;
|
||||
return {
|
||||
delivery: 'upstream_chat',
|
||||
speechText: '',
|
||||
ragItems: [],
|
||||
source,
|
||||
toolName,
|
||||
routeDecision,
|
||||
responseMeta,
|
||||
};
|
||||
} else {
|
||||
toolName = routeDecision.route;
|
||||
source = 'voice_tool';
|
||||
const toolArgs = toolName === 'search_knowledge'
|
||||
? { ...(routeDecision.args || {}), response_mode: 'snippet' }
|
||||
: routeDecision.args;
|
||||
const toolResult = await ToolExecutor.execute(routeDecision.route, toolArgs, context);
|
||||
replyText = extractToolResultText(toolName, toolResult);
|
||||
responseMeta = {
|
||||
...responseMeta,
|
||||
tool_name: toolName,
|
||||
tool_args: toolArgs || {},
|
||||
source: toolResult?.source || null,
|
||||
original_query: toolResult?.original_query || routeDecision.args?.query || originalText,
|
||||
rewritten_query: toolResult?.rewritten_query || null,
|
||||
hit: typeof toolResult?.hit === 'boolean' ? toolResult.hit : null,
|
||||
reason: toolResult?.reason || null,
|
||||
error_type: toolResult?.errorType || null,
|
||||
latency_ms: toolResult?.latency_ms || null,
|
||||
};
|
||||
|
||||
const ragItems = toolName === 'search_knowledge'
|
||||
? (toolResult?.hit && Array.isArray(toolResult?.results)
|
||||
? toolResult.results
|
||||
.filter((item) => item && item.content)
|
||||
.map((item) => ({
|
||||
title: item.title || '知识库结果',
|
||||
content: item.content,
|
||||
}))
|
||||
: [])
|
||||
: (!toolResult?.error && replyText
|
||||
? [{ title: `${toolName}结果`, content: replyText }]
|
||||
: []);
|
||||
|
||||
if (ragItems.length > 0) {
|
||||
session.handoffSummaryUsed = true;
|
||||
return {
|
||||
delivery: 'external_rag',
|
||||
speechText: '',
|
||||
ragItems,
|
||||
source,
|
||||
toolName,
|
||||
routeDecision,
|
||||
responseMeta,
|
||||
};
|
||||
}
|
||||
}
|
||||
const speechText = normalizeTextForSpeech(replyText);
|
||||
session.handoffSummaryUsed = true;
|
||||
if (!speechText) {
|
||||
return { delivery: 'local_tts', speechText: '', ragItems: [], source, toolName, routeDecision, responseMeta };
|
||||
}
|
||||
return { delivery: 'local_tts', speechText, ragItems: [], source, toolName, routeDecision, responseMeta };
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getRuleBasedDirectRouteDecision,
|
||||
normalizeTextForSpeech,
|
||||
splitTextForSpeech,
|
||||
estimateSpeechDurationMs,
|
||||
shouldForceKnowledgeRoute,
|
||||
resolveReply,
|
||||
};
|
||||
@@ -1,6 +1,60 @@
|
||||
const axios = require('axios');
|
||||
const arkChatService = require('./arkChatService');
|
||||
|
||||
class ToolExecutor {
|
||||
static hasCanonicalKnowledgeTerm(query) {
|
||||
return /(一成系统|PM-FitLine|PM细胞营养素|NTC营养保送系统|Activize Oxyplus|小红产品|Basics|大白产品|Restorate|小白产品|儿童倍适|火炉原理|阿育吠陀)/i.test(String(query || ''));
|
||||
}
|
||||
|
||||
static normalizeKnowledgeQueryAlias(query) {
|
||||
return String(query || '')
|
||||
.replace(/^[啊哦嗯呢呀哎诶额,。!?、\s]+/g, '')
|
||||
.replace(/[啊哦嗯呢呀哎诶额,。!?、\s]+$/g, '')
|
||||
.replace(/X{2}系统/gi, '一成系统')
|
||||
.replace(/一城系统|逸城系统|一程系统|易成系统|一诚系统|亦成系统/g, '一成系统')
|
||||
.replace(/PM[-\s]*Fitline|PM[-\s]*fitline|Pm[-\s]*fitline|Fitline|fitline/g, 'PM-FitLine')
|
||||
.replace(/PM细胞营养|PM营养素|德国PM营养素/g, 'PM细胞营养素')
|
||||
.replace(/NTC营养保送系统|NTC营养配送系统|NTC营养输送系统|NTC营养传送系统|NTC营养传输系统/g, 'NTC营养保送系统')
|
||||
.replace(/Nutrient Transport Concept/gi, 'NTC营养保送系统')
|
||||
.replace(/Activize Oxyplus|Activize/gi, 'Activize Oxyplus')
|
||||
.replace(/Restorate/gi, 'Restorate')
|
||||
.replace(/Basics/gi, 'Basics')
|
||||
.replace(/基础三合一|基础套装?|三合一基础套|大白小红小白/g, 'Basics')
|
||||
.replace(/小红产品|小红/g, '小红产品 Activize Oxyplus')
|
||||
.replace(/大白产品|大白/g, '大白产品 Basics')
|
||||
.replace(/小白产品|小白/g, '小白产品 Restorate')
|
||||
.replace(/儿童倍适|儿童产品/g, '儿童倍适')
|
||||
.replace(/火炉原理/g, '火炉原理')
|
||||
.replace(/阿育吠陀|Ayurveda/gi, '阿育吠陀')
|
||||
.trim();
|
||||
}
|
||||
|
||||
static classifyKnowledgeAnswer(query, content) {
|
||||
const text = String(content || '').trim();
|
||||
if (!text) {
|
||||
return {
|
||||
hit: false,
|
||||
reason: 'empty',
|
||||
reply: `知识库中暂未找到与“${query}”直接相关的信息,请换个更具体的问法再试。`,
|
||||
};
|
||||
}
|
||||
|
||||
const noHitPattern = /未检索到|没有检索到|没有相关内容|暂无相关内容|未找到相关信息|没有找到相关信息|知识库中没有相关内容|暂未找到与.*直接相关的信息|无法基于知识库/;
|
||||
if (noHitPattern.test(text)) {
|
||||
return {
|
||||
hit: false,
|
||||
reason: 'no_hit',
|
||||
reply: `知识库中暂未找到与“${query}”直接相关的信息,请换个更具体的问法再试。`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
hit: true,
|
||||
reason: 'hit',
|
||||
reply: text,
|
||||
};
|
||||
}
|
||||
|
||||
static async execute(toolName, args, context = []) {
|
||||
const startTime = Date.now();
|
||||
console.log(`[ToolExecutor] Executing: ${toolName}`, args);
|
||||
@@ -79,36 +133,108 @@ class ToolExecutor {
|
||||
};
|
||||
}
|
||||
|
||||
static async searchKnowledge({ query } = {}, context = []) {
|
||||
static async searchKnowledge({ query, response_mode } = {}, context = []) {
|
||||
const startTime = Date.now();
|
||||
query = query || '';
|
||||
const responseMode = response_mode === 'snippet' ? 'snippet' : 'answer';
|
||||
console.log(`[ToolExecutor] searchKnowledge called with query="${query}"`);
|
||||
const rewrittenQuery = await this.rewriteKnowledgeQuery(query, context);
|
||||
if (rewrittenQuery && rewrittenQuery !== query) {
|
||||
console.log(`[ToolExecutor] searchKnowledge rewritten query="${rewrittenQuery}"`);
|
||||
}
|
||||
|
||||
const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS;
|
||||
if (kbIds && kbIds !== 'your_knowledge_base_dataset_id') {
|
||||
try {
|
||||
console.log('[ToolExecutor] Trying Ark Knowledge Search...');
|
||||
const result = await this.searchArkKnowledge(query, context);
|
||||
console.log(`[ToolExecutor] Ark KB search succeeded in ${Date.now() - startTime}ms`);
|
||||
return result;
|
||||
const result = await this.searchArkKnowledge(rewrittenQuery || query, context, responseMode);
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms`);
|
||||
return {
|
||||
...result,
|
||||
original_query: query,
|
||||
rewritten_query: rewrittenQuery || query,
|
||||
latency_ms: latencyMs,
|
||||
};
|
||||
} catch (error) {
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.warn('[ToolExecutor] Ark Knowledge Search failed:', error.message);
|
||||
console.log('[ToolExecutor] Falling back to local Knowledge Base');
|
||||
return {
|
||||
query,
|
||||
original_query: query,
|
||||
rewritten_query: rewrittenQuery || query,
|
||||
latency_ms: latencyMs,
|
||||
errorType: error.code === 'ECONNABORTED' || /timeout/i.test(error.message) ? 'timeout' : 'request_failed',
|
||||
error: `知识库查询失败: ${error.message}`,
|
||||
source: 'ark_knowledge',
|
||||
hit: false,
|
||||
reason: 'error',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
console.log('[ToolExecutor] Using local Knowledge Base (voice fast path)');
|
||||
const result = this.searchLocalKnowledge(query);
|
||||
console.log(`[ToolExecutor] Local KB search completed in ${Date.now() - startTime}ms`);
|
||||
return result;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.warn('[ToolExecutor] Ark knowledge base is not configured');
|
||||
return {
|
||||
query,
|
||||
original_query: query,
|
||||
rewritten_query: rewrittenQuery || query,
|
||||
latency_ms: latencyMs,
|
||||
errorType: 'not_configured',
|
||||
error: '知识库未配置,请检查 VOLC_ARK_KNOWLEDGE_BASE_IDS',
|
||||
source: 'ark_knowledge',
|
||||
hit: false,
|
||||
reason: 'not_configured',
|
||||
};
|
||||
}
|
||||
|
||||
static async rewriteKnowledgeQuery(query, context = []) {
|
||||
const originalQuery = String(query || '').trim();
|
||||
if (!originalQuery) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const normalizedQuery = this.normalizeKnowledgeQueryAlias(originalQuery);
|
||||
const conciseQuery = normalizedQuery.replace(/[,。!?、,.!?\s]+/g, '');
|
||||
const recentContext = (Array.isArray(context) ? context : [])
|
||||
.filter((item) => item && (item.role === 'user' || item.role === 'assistant') && String(item.content || '').trim())
|
||||
.slice(-6)
|
||||
.map((item) => `${item.role === 'user' ? '用户' : '助手'}:${String(item.content || '').trim()}`)
|
||||
.join('\n');
|
||||
|
||||
const isPronounFollowUp = /^(这个|那个|它|该系统|这个系统|那个系统|详细|继续|怎么|为什么|适合谁|什么意思)/.test(normalizedQuery);
|
||||
if (this.hasCanonicalKnowledgeTerm(normalizedQuery) && conciseQuery.length <= 36 && !isPronounFollowUp) {
|
||||
return normalizedQuery;
|
||||
}
|
||||
|
||||
if (!process.env.VOLC_ARK_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID === 'your_ark_endpoint_id') {
|
||||
return normalizedQuery;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await arkChatService.chat([
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是知识库检索词改写助手。你的任务是把用户当前问题改写成适合企业知识库检索的完整查询语句。必须处理三类问题:1)补全多轮对话中的省略主语;2)纠正语音识别错误、口语噪声和同音误写;3)把别名统一成知识库里的规范说法。规则:不要改变用户真实意图;不要回答问题;只输出一行最终检索词;优先保留真正的产品名、系统名、技术名。当前知识库高频规范术语包括:一成系统、PM-FitLine、PM细胞营养素、NTC营养保送系统、Activize Oxyplus、小红产品、Basics、大白产品、Restorate、小白产品、儿童倍适、火炉原理、阿育吠陀。示例:XX系统、一城系统、逸城系统、一程系统等都统一理解为一成系统;NTC营养配送系统、NTC营养输送系统统一为NTC营养保送系统;Fitline、PM fitline 统一为 PM-FitLine;小红统一为小红产品 Activize Oxyplus。',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `最近上下文:\n${recentContext || '无'}\n\n当前原始问题:${normalizedQuery}\n\n请输出最终检索词:`,
|
||||
},
|
||||
], []);
|
||||
const rewritten = this.normalizeKnowledgeQueryAlias(String(result.content || '').replace(/^["'“”]+|["'“”]+$/g, '').trim());
|
||||
return rewritten || normalizedQuery;
|
||||
} catch (error) {
|
||||
console.warn('[ToolExecutor] rewriteKnowledgeQuery failed:', error.message);
|
||||
return normalizedQuery;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 通过方舟 Chat Completions API + knowledge_base metadata 进行知识检索
|
||||
* 使用独立的 LLM 调用,专门用于知识库检索场景(如语音通话的工具回调)
|
||||
*/
|
||||
static async searchArkKnowledge(query, context = []) {
|
||||
static async searchArkKnowledge(query, context = [], responseMode = 'answer') {
|
||||
const endpointId = process.env.VOLC_ARK_ENDPOINT_ID;
|
||||
const authKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID;
|
||||
const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS;
|
||||
@@ -126,12 +252,14 @@ class ToolExecutor {
|
||||
// 提取最近 3 轮对话作为上下文(最多 6 条 user/assistant 消息)
|
||||
const recentContext = context
|
||||
.filter(m => m.role === 'user' || m.role === 'assistant')
|
||||
.slice(-6);
|
||||
.slice(responseMode === 'snippet' ? -4 : -6);
|
||||
|
||||
const messages = [
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是一个知识库检索助手。请根据知识库中的内容回答用户问题。如果知识库中没有相关内容,请如实说明。回答时请引用知识库来源。',
|
||||
content: responseMode === 'snippet'
|
||||
? '你是知识库片段提取助手。请基于知识库提取与用户问题最相关的2到4条简洁知识片段,供语音系统继续组织回复。规则:只输出直接相关的中文事实片段;每条尽量简短;不要寒暄,不要解释你的任务,不要写“根据知识库”;如果没有相关内容,请明确说未找到相关内容。'
|
||||
: '你是一个知识库检索助手。请根据知识库中的内容回答用户问题。如果知识库中没有相关内容,请如实说明。回答时请引用知识库来源。',
|
||||
},
|
||||
...recentContext,
|
||||
{
|
||||
@@ -165,21 +293,24 @@ class ToolExecutor {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${authKey}`,
|
||||
},
|
||||
timeout: 15000, // 方舟知识库超时 15s(减少等待,防止 LLM 重试风暴)
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
const choice = response.data.choices?.[0];
|
||||
const content = choice?.message?.content || '未找到相关信息';
|
||||
const classified = this.classifyKnowledgeAnswer(query, content);
|
||||
|
||||
return {
|
||||
query,
|
||||
results: [{
|
||||
title: '方舟知识库检索结果',
|
||||
content: content,
|
||||
content: classified.reply,
|
||||
}],
|
||||
total: 1,
|
||||
source: 'ark_knowledge',
|
||||
hit: classified.hit,
|
||||
reason: classified.reason,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -209,7 +340,7 @@ class ToolExecutor {
|
||||
],
|
||||
stream: true,
|
||||
auto_save_history: false,
|
||||
}, { headers, timeout: 15000 });
|
||||
}, { headers, timeout: 30000 });
|
||||
|
||||
const chatData = chatRes.data?.data;
|
||||
if (!chatData?.id || !chatData?.conversation_id) {
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
const { Signer } = require('@volcengine/openapi');
|
||||
const fetch = require('node-fetch');
|
||||
const { AccessToken, privileges } = require('../lib/token');
|
||||
|
||||
class VolcengineService {
|
||||
constructor() {
|
||||
this.baseUrl = 'https://rtc.volcengineapi.com';
|
||||
this.service = 'rtc';
|
||||
this.region = 'cn-north-1';
|
||||
this.version = '2024-12-01';
|
||||
}
|
||||
|
||||
async startVoiceChat(config) {
|
||||
console.log('[Volcengine] Starting voice chat (S2S端到端 + LLM混合, API v2024-12-01)');
|
||||
console.log('[Volcengine] RoomId:', config.RoomId);
|
||||
// ProviderParams 可能是 JSON 字符串或对象
|
||||
let pp = config.Config.S2SConfig?.ProviderParams;
|
||||
if (typeof pp === 'string') {
|
||||
try { pp = JSON.parse(pp); } catch (e) { pp = {}; }
|
||||
}
|
||||
console.log('[Volcengine] S2S AppId:', pp?.app?.appid);
|
||||
console.log('[Volcengine] S2S model:', pp?.dialog?.extra?.model);
|
||||
console.log('[Volcengine] S2S speaker:', pp?.tts?.speaker);
|
||||
console.log('[Volcengine] ProviderParams type:', typeof config.Config.S2SConfig?.ProviderParams);
|
||||
console.log('[Volcengine] LLM EndPointId:', config.Config.LLMConfig?.EndPointId);
|
||||
console.log('[Volcengine] Tools:', config.Config.LLMConfig?.Tools?.length || 0);
|
||||
console.log('[Volcengine] Full request body:', JSON.stringify(config, null, 2));
|
||||
const result = await this._callOpenAPI('StartVoiceChat', config);
|
||||
console.log('[Volcengine] StartVoiceChat response:', JSON.stringify(result, null, 2));
|
||||
return result;
|
||||
}
|
||||
|
||||
async updateVoiceChat(params) {
|
||||
console.log('[Volcengine] Updating voice chat (v2024-12-01)');
|
||||
console.log('[Volcengine] UpdateVoiceChat params:', JSON.stringify(params, null, 2));
|
||||
const result = await this._callOpenAPI('UpdateVoiceChat', params);
|
||||
console.log('[Volcengine] UpdateVoiceChat response:', JSON.stringify(result, null, 2));
|
||||
return result;
|
||||
}
|
||||
|
||||
async stopVoiceChat(params) {
|
||||
console.log('[Volcengine] Stopping voice chat, RoomId:', params.RoomId);
|
||||
return this._callOpenAPI('StopVoiceChat', params);
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成 RTC 入房 Token
|
||||
* 使用官方 AccessToken 库:https://github.com/volcengine/rtc-aigc-demo/blob/main/Server/token.js
|
||||
*/
|
||||
generateRTCToken(roomId, userId) {
|
||||
const appId = process.env.VOLC_RTC_APP_ID;
|
||||
const appKey = process.env.VOLC_RTC_APP_KEY;
|
||||
|
||||
if (!appId || !appKey || appKey === 'your_rtc_app_key') {
|
||||
console.warn('[Volcengine] RTC AppKey not configured, returning placeholder token');
|
||||
return `placeholder_token_${roomId}_${userId}_${Date.now()}`;
|
||||
}
|
||||
|
||||
const token = new AccessToken(appId, appKey, roomId, userId);
|
||||
const expireTime = Math.floor(Date.now() / 1000) + 24 * 3600; // 24 小时有效
|
||||
token.expireTime(expireTime);
|
||||
token.addPrivilege(privileges.PrivPublishStream, 0);
|
||||
token.addPrivilege(privileges.PrivSubscribeStream, 0);
|
||||
|
||||
const serialized = token.serialize();
|
||||
console.log(`[Volcengine] RTC Token generated for room=${roomId}, user=${userId}`);
|
||||
return serialized;
|
||||
}
|
||||
|
||||
async _callOpenAPI(action, body, versionOverride) {
|
||||
const ak = process.env.VOLC_ACCESS_KEY_ID;
|
||||
const sk = process.env.VOLC_SECRET_ACCESS_KEY;
|
||||
const version = versionOverride || this.version;
|
||||
|
||||
if (!ak || !sk || ak === 'your_access_key_id') {
|
||||
console.warn(`[Volcengine] Credentials not configured, returning mock response for ${action}`);
|
||||
return this._mockResponse(action, body);
|
||||
}
|
||||
|
||||
// 与官方 rtc-aigc-demo 完全一致的签名方式
|
||||
const openApiRequestData = {
|
||||
region: this.region,
|
||||
method: 'POST',
|
||||
params: {
|
||||
Action: action,
|
||||
Version: version,
|
||||
},
|
||||
headers: {
|
||||
Host: 'rtc.volcengineapi.com',
|
||||
'Content-type': 'application/json',
|
||||
},
|
||||
body,
|
||||
};
|
||||
|
||||
const signer = new Signer(openApiRequestData, this.service);
|
||||
signer.addAuthorization({ accessKeyId: ak, secretKey: sk });
|
||||
|
||||
const url = `${this.baseUrl}?Action=${action}&Version=${version}`;
|
||||
console.log(`[Volcengine] ${action} calling:`, url);
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: openApiRequestData.headers,
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const data = await response.json();
|
||||
|
||||
if (data?.ResponseMetadata?.Error) {
|
||||
const err = data.ResponseMetadata.Error;
|
||||
throw new Error(`${action} failed: ${err.Code} - ${err.Message}`);
|
||||
}
|
||||
return data;
|
||||
} catch (error) {
|
||||
console.error(`[Volcengine] ${action} error:`, error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mock 响应(开发阶段凭证未配置时使用)
|
||||
*/
|
||||
_mockResponse(action, params) {
|
||||
console.log(`[Volcengine][MOCK] ${action} called with:`, JSON.stringify(params, null, 2).substring(0, 500));
|
||||
return {
|
||||
ResponseMetadata: { RequestId: `mock-${Date.now()}`, Action: action },
|
||||
Result: { Message: 'Mock response - credentials not configured' },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = new VolcengineService();
|
||||
Reference in New Issue
Block a user