fix(test2): 修复语音欢迎语时序与重复回答持久化
This commit is contained in:
@@ -16,11 +16,29 @@ const {
|
|||||||
normalizeTextForSpeech,
|
normalizeTextForSpeech,
|
||||||
splitTextForSpeech,
|
splitTextForSpeech,
|
||||||
estimateSpeechDurationMs,
|
estimateSpeechDurationMs,
|
||||||
|
shouldForceKnowledgeRoute,
|
||||||
resolveReply,
|
resolveReply,
|
||||||
} = require('./realtimeDialogRouting');
|
} = require('./realtimeDialogRouting');
|
||||||
|
|
||||||
const sessions = new Map();
|
const sessions = new Map();
|
||||||
|
|
||||||
|
const IDLE_TIMEOUT_MS = 5 * 60 * 1000;
|
||||||
|
|
||||||
|
function resetIdleTimer(session) {
|
||||||
|
clearTimeout(session.idleTimer);
|
||||||
|
session.lastActivityAt = Date.now();
|
||||||
|
session.idleTimer = setTimeout(() => {
|
||||||
|
session.idleTimer = null;
|
||||||
|
console.log(`[NativeVoice] idle timeout (${IDLE_TIMEOUT_MS / 1000}s) session=${session.sessionId}`);
|
||||||
|
sendJson(session.client, { type: 'idle_timeout', timeout: IDLE_TIMEOUT_MS });
|
||||||
|
setTimeout(() => {
|
||||||
|
if (session.client && session.client.readyState === WebSocket.OPEN) {
|
||||||
|
session.client.close();
|
||||||
|
}
|
||||||
|
}, 2000);
|
||||||
|
}, IDLE_TIMEOUT_MS);
|
||||||
|
}
|
||||||
|
|
||||||
function sendJson(ws, payload) {
|
function sendJson(ws, payload) {
|
||||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||||
ws.send(JSON.stringify(payload));
|
ws.send(JSON.stringify(payload));
|
||||||
@@ -33,7 +51,7 @@ function buildStartSessionPayload(options) {
|
|||||||
extra: {},
|
extra: {},
|
||||||
},
|
},
|
||||||
tts: {
|
tts: {
|
||||||
speaker: options.speaker || 'zh_female_vv_jupiter_bigtts',
|
speaker: options.speaker || process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts',
|
||||||
audio_config: {
|
audio_config: {
|
||||||
channel: 1,
|
channel: 1,
|
||||||
format: 'pcm_s16le',
|
format: 'pcm_s16le',
|
||||||
@@ -42,12 +60,12 @@ function buildStartSessionPayload(options) {
|
|||||||
},
|
},
|
||||||
dialog: {
|
dialog: {
|
||||||
dialog_id: '',
|
dialog_id: '',
|
||||||
bot_name: options.botName || '豆包',
|
bot_name: options.botName || '大沃',
|
||||||
system_role: normalizeTextForSpeech(options.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。'),
|
system_role: normalizeTextForSpeech(options.systemRole || '你是大沃,一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答。无论是闲聊还是引用知识库内容,都要保持一样的说话风格,不要切换成朗读语气。用户进来时请自然地打个招呼,像朋友聊天一样,不要用客服话术。'),
|
||||||
speaking_style: normalizeTextForSpeech(options.speakingStyle || '请使用清晰、自然、简洁的口吻。'),
|
speaking_style: normalizeTextForSpeech(options.speakingStyle || '说话像朋友聊天一样自然轻松,语气亲切活泼,不要像客服念稿。即使引用知识库内容也要用聊天的语气说出来,不要切换成播音腔或朗读语气。'),
|
||||||
extra: {
|
extra: {
|
||||||
input_mod: 'audio',
|
input_mod: 'audio',
|
||||||
model: 'O',
|
model: options.modelVersion || 'O',
|
||||||
strict_audit: false,
|
strict_audit: false,
|
||||||
audit_response: '抱歉,这个问题我暂时无法回答。',
|
audit_response: '抱歉,这个问题我暂时无法回答。',
|
||||||
},
|
},
|
||||||
@@ -92,6 +110,7 @@ function persistUserSpeech(session, text) {
|
|||||||
session.lastPersistedUserText = cleanText;
|
session.lastPersistedUserText = cleanText;
|
||||||
session.lastPersistedUserAt = now;
|
session.lastPersistedUserAt = now;
|
||||||
session.latestUserText = cleanText;
|
session.latestUserText = cleanText;
|
||||||
|
resetIdleTimer(session);
|
||||||
db.addMessage(session.sessionId, 'user', cleanText, 'voice_asr').catch((e) => console.warn('[NativeVoice][DB] add user failed:', e.message));
|
db.addMessage(session.sessionId, 'user', cleanText, 'voice_asr').catch((e) => console.warn('[NativeVoice][DB] add user failed:', e.message));
|
||||||
sendJson(session.client, {
|
sendJson(session.client, {
|
||||||
type: 'subtitle',
|
type: 'subtitle',
|
||||||
@@ -112,6 +131,7 @@ function persistAssistantSpeech(session, text, { source = 'voice_bot', toolName
|
|||||||
}
|
}
|
||||||
session.lastPersistedAssistantText = cleanText;
|
session.lastPersistedAssistantText = cleanText;
|
||||||
session.lastPersistedAssistantAt = now;
|
session.lastPersistedAssistantAt = now;
|
||||||
|
resetIdleTimer(session);
|
||||||
if (persistToDb) {
|
if (persistToDb) {
|
||||||
db.addMessage(session.sessionId, 'assistant', cleanText, source, toolName, meta).catch((e) => console.warn('[NativeVoice][DB] add assistant failed:', e.message));
|
db.addMessage(session.sessionId, 'assistant', cleanText, source, toolName, meta).catch((e) => console.warn('[NativeVoice][DB] add assistant failed:', e.message));
|
||||||
}
|
}
|
||||||
@@ -203,48 +223,47 @@ async function sendSpeechText(session, speechText) {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
function sendGreeting(session) {
|
function sendReady(session) {
|
||||||
const greetingText = normalizeTextForSpeech(session.greetingText || '你好,我是你的智能语音助手,有什么可以帮你的吗?');
|
if (session.readySent) {
|
||||||
if (!greetingText || session.hasSentGreeting) {
|
return;
|
||||||
if (!session.readySent) {
|
}
|
||||||
session.readySent = true;
|
session.readySent = true;
|
||||||
sendJson(session.client, { type: 'ready' });
|
sendJson(session.client, { type: 'ready' });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function sendGreeting(session) {
|
||||||
|
if (session.hasSentGreeting) {
|
||||||
|
sendReady(session);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
session.hasSentGreeting = true;
|
session.hasSentGreeting = true;
|
||||||
persistAssistantSpeech(session, greetingText, { source: 'voice_bot', persistToDb: false });
|
const greetingText = session.greetingText || '嗨,你好呀!我是大沃,你的专属智能助手。关于德国PM产品、一成系统、招商合作,随时问我就好~';
|
||||||
clearTimeout(session.readyTimer);
|
console.log(`[NativeVoice] sendGreeting session=${session.sessionId} text=${JSON.stringify(greetingText.slice(0, 80))}`);
|
||||||
session.readyTimer = setTimeout(() => {
|
sendJson(session.client, {
|
||||||
session.readyTimer = null;
|
type: 'subtitle',
|
||||||
if (!session.readySent) {
|
role: 'assistant',
|
||||||
session.readySent = true;
|
text: greetingText,
|
||||||
sendJson(session.client, { type: 'ready' });
|
isFinal: true,
|
||||||
}
|
source: 'voice_bot',
|
||||||
}, estimateSpeechDurationMs(greetingText) + 300);
|
sequence: `greeting_${Date.now()}`,
|
||||||
const playGreeting = () => {
|
});
|
||||||
session.pendingGreetingAck = true;
|
persistAssistantSpeech(session, greetingText, { source: 'voice_bot' });
|
||||||
clearTimeout(session.greetingAckTimer);
|
|
||||||
session.greetingAckTimer = setTimeout(() => {
|
|
||||||
session.greetingAckTimer = null;
|
|
||||||
if (session.pendingGreetingAck && session.greetingRetryCount < 1) {
|
|
||||||
session.greetingRetryCount += 1;
|
|
||||||
console.warn(`[NativeVoice] greeting ack timeout, retry session=${session.sessionId}`);
|
|
||||||
playGreeting();
|
|
||||||
}
|
|
||||||
}, 2000);
|
|
||||||
sendSpeechText(session, greetingText).catch((error) => {
|
|
||||||
session.pendingGreetingAck = false;
|
|
||||||
clearTimeout(session.greetingAckTimer);
|
|
||||||
session.greetingAckTimer = null;
|
|
||||||
session.hasSentGreeting = false;
|
|
||||||
console.warn('[NativeVoice] greeting failed:', error.message);
|
|
||||||
});
|
|
||||||
};
|
|
||||||
clearTimeout(session.greetingTimer);
|
clearTimeout(session.greetingTimer);
|
||||||
|
clearTimeout(session.readyTimer);
|
||||||
session.greetingTimer = setTimeout(() => {
|
session.greetingTimer = setTimeout(() => {
|
||||||
session.greetingTimer = null;
|
session.greetingTimer = null;
|
||||||
playGreeting();
|
sendSpeechText(session, greetingText)
|
||||||
|
.then(() => {
|
||||||
|
session.readyTimer = setTimeout(() => {
|
||||||
|
session.readyTimer = null;
|
||||||
|
sendReady(session);
|
||||||
|
}, Math.max(1200, Math.min(estimateSpeechDurationMs(greetingText) + 300, 8000)));
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
session.hasSentGreeting = false;
|
||||||
|
sendReady(session);
|
||||||
|
console.warn('[NativeVoice] greeting speech failed:', error.message);
|
||||||
|
});
|
||||||
}, 800);
|
}, 800);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,10 +294,20 @@ async function processReply(session, text) {
|
|||||||
}
|
}
|
||||||
session.processingReply = true;
|
session.processingReply = true;
|
||||||
sendJson(session.client, { type: 'assistant_pending', active: true });
|
sendJson(session.client, { type: 'assistant_pending', active: true });
|
||||||
console.log(`[NativeVoice] processReply start session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 120))}`);
|
const isKnowledgeCandidate = shouldForceKnowledgeRoute(cleanText);
|
||||||
|
if (isKnowledgeCandidate) {
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'processing' });
|
||||||
|
}
|
||||||
|
console.log(`[NativeVoice] processReply start session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 120))} blocked=${session.blockUpstreamAudio} kbCandidate=${isKnowledgeCandidate}`);
|
||||||
try {
|
try {
|
||||||
const { delivery, speechText, ragItems, source, toolName, routeDecision, responseMeta } = await resolveReply(session.sessionId, session, cleanText);
|
const { delivery, speechText, ragItems, source, toolName, routeDecision, responseMeta } = await resolveReply(session.sessionId, session, cleanText);
|
||||||
if (delivery === 'upstream_chat') {
|
if (delivery === 'upstream_chat') {
|
||||||
|
if (isKnowledgeCandidate) {
|
||||||
|
console.log(`[NativeVoice] processReply kb-nohit retrigger session=${session.sessionId}`);
|
||||||
|
await sendExternalRag(session, [{ title: '用户问题', content: cleanText }]);
|
||||||
|
} else {
|
||||||
|
session.blockUpstreamAudio = false;
|
||||||
|
}
|
||||||
session.awaitingUpstreamReply = true;
|
session.awaitingUpstreamReply = true;
|
||||||
session.pendingAssistantSource = 'voice_bot';
|
session.pendingAssistantSource = 'voice_bot';
|
||||||
session.pendingAssistantToolName = null;
|
session.pendingAssistantToolName = null;
|
||||||
@@ -287,6 +316,10 @@ async function processReply(session, text) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (delivery === 'external_rag') {
|
if (delivery === 'external_rag') {
|
||||||
|
if (!session.blockUpstreamAudio) {
|
||||||
|
session.blockUpstreamAudio = true;
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'knowledge_hit' });
|
||||||
|
}
|
||||||
session.awaitingUpstreamReply = true;
|
session.awaitingUpstreamReply = true;
|
||||||
session.pendingAssistantSource = source;
|
session.pendingAssistantSource = source;
|
||||||
session.pendingAssistantToolName = toolName;
|
session.pendingAssistantToolName = toolName;
|
||||||
@@ -301,26 +334,32 @@ async function processReply(session, text) {
|
|||||||
session.chatTTSUntil = 0;
|
session.chatTTSUntil = 0;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
session.isSendingChatTTSText = true;
|
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_rag source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
|
||||||
session.chatTTSUntil = Date.now() + 30000;
|
session.awaitingUpstreamReply = true;
|
||||||
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_tts source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
|
session.pendingAssistantSource = source;
|
||||||
persistAssistantSpeech(session, speechText, { source, toolName, meta: responseMeta });
|
session.pendingAssistantToolName = toolName;
|
||||||
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(speechText);
|
session.pendingAssistantMeta = responseMeta;
|
||||||
await sendSpeechText(session, speechText);
|
await sendExternalRag(session, [{ title: '回复内容', content: speechText }]);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[NativeVoice] processReply failed:', error.message);
|
console.error('[NativeVoice] processReply failed:', error.message);
|
||||||
sendJson(session.client, { type: 'error', error: error.message });
|
sendJson(session.client, { type: 'error', error: error.message });
|
||||||
} finally {
|
} finally {
|
||||||
session.processingReply = false;
|
session.processingReply = false;
|
||||||
|
if (!session.awaitingUpstreamReply) {
|
||||||
|
session.blockUpstreamAudio = false;
|
||||||
|
}
|
||||||
if (!session.awaitingUpstreamReply) {
|
if (!session.awaitingUpstreamReply) {
|
||||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||||
}
|
}
|
||||||
const pending = session.queuedUserText;
|
const pending = session.queuedUserText;
|
||||||
session.queuedUserText = '';
|
session.queuedUserText = '';
|
||||||
if (pending && pending !== cleanText && (!session.directSpeakUntil || Date.now() >= session.directSpeakUntil)) {
|
if (pending && pending !== cleanText && (!session.directSpeakUntil || Date.now() >= session.directSpeakUntil)) {
|
||||||
setTimeout(() => processReply(session, pending).catch((err) => {
|
setTimeout(() => {
|
||||||
console.error('[NativeVoice] queued processReply failed:', err.message);
|
session.blockUpstreamAudio = true;
|
||||||
}), 200);
|
processReply(session, pending).catch((err) => {
|
||||||
|
console.error('[NativeVoice] queued processReply failed:', err.message);
|
||||||
|
});
|
||||||
|
}, 200);
|
||||||
} else if (pending && pending !== cleanText) {
|
} else if (pending && pending !== cleanText) {
|
||||||
const waitMs = Math.max(200, session.directSpeakUntil - Date.now() + 200);
|
const waitMs = Math.max(200, session.directSpeakUntil - Date.now() + 200);
|
||||||
clearTimeout(session.queuedReplyTimer);
|
clearTimeout(session.queuedReplyTimer);
|
||||||
@@ -328,6 +367,7 @@ async function processReply(session, text) {
|
|||||||
session.queuedReplyTimer = null;
|
session.queuedReplyTimer = null;
|
||||||
const queuedText = session.queuedUserText || pending;
|
const queuedText = session.queuedUserText || pending;
|
||||||
session.queuedUserText = '';
|
session.queuedUserText = '';
|
||||||
|
session.blockUpstreamAudio = true;
|
||||||
processReply(session, queuedText).catch((err) => {
|
processReply(session, queuedText).catch((err) => {
|
||||||
console.error('[NativeVoice] delayed queued processReply failed:', err.message);
|
console.error('[NativeVoice] delayed queued processReply failed:', err.message);
|
||||||
});
|
});
|
||||||
@@ -346,9 +386,14 @@ function handleUpstreamMessage(session, data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
||||||
if (session.isSendingChatTTSText && session.currentTtsType === 'default') {
|
if (session.blockUpstreamAudio) {
|
||||||
|
if (!session._audioBlockLogOnce) {
|
||||||
|
session._audioBlockLogOnce = true;
|
||||||
|
console.log(`[NativeVoice] audio blocked (blockUpstream) session=${session.sessionId} ttsType=${session.currentTtsType}`);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
session._audioBlockLogOnce = false;
|
||||||
if (session.client && session.client.readyState === WebSocket.OPEN) {
|
if (session.client && session.client.readyState === WebSocket.OPEN) {
|
||||||
session.client.send(message.payload, { binary: true });
|
session.client.send(message.payload, { binary: true });
|
||||||
}
|
}
|
||||||
@@ -369,10 +414,8 @@ function handleUpstreamMessage(session, data) {
|
|||||||
if (message.event === 150) {
|
if (message.event === 150) {
|
||||||
session.upstreamReady = true;
|
session.upstreamReady = true;
|
||||||
console.log(`[NativeVoice] upstream ready session=${session.sessionId}`);
|
console.log(`[NativeVoice] upstream ready session=${session.sessionId}`);
|
||||||
if (!session.readySent) {
|
resetIdleTimer(session);
|
||||||
session.readySent = true;
|
sendGreeting(session);
|
||||||
sendJson(session.client, { type: 'ready' });
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -383,6 +426,10 @@ function handleUpstreamMessage(session, data) {
|
|||||||
clearTimeout(session.greetingAckTimer);
|
clearTimeout(session.greetingAckTimer);
|
||||||
session.greetingAckTimer = null;
|
session.greetingAckTimer = null;
|
||||||
}
|
}
|
||||||
|
if (session.blockUpstreamAudio && payload?.tts_type && payload.tts_type !== 'default') {
|
||||||
|
session.blockUpstreamAudio = false;
|
||||||
|
console.log(`[NativeVoice] unblock audio on ttsType=${payload.tts_type} session=${session.sessionId}`);
|
||||||
|
}
|
||||||
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
|
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
|
||||||
sendJson(session.client, { type: 'tts_event', payload });
|
sendJson(session.client, { type: 'tts_event', payload });
|
||||||
return;
|
return;
|
||||||
@@ -391,7 +438,7 @@ function handleUpstreamMessage(session, data) {
|
|||||||
const isLocalChatTTSTextActive = !!session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now();
|
const isLocalChatTTSTextActive = !!session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now();
|
||||||
|
|
||||||
if (message.event === 351) {
|
if (message.event === 351) {
|
||||||
if (isLocalChatTTSTextActive) {
|
if (isLocalChatTTSTextActive || session.blockUpstreamAudio) {
|
||||||
session.assistantStreamBuffer = '';
|
session.assistantStreamBuffer = '';
|
||||||
session.assistantStreamReplyId = '';
|
session.assistantStreamReplyId = '';
|
||||||
return;
|
return;
|
||||||
@@ -401,31 +448,31 @@ function handleUpstreamMessage(session, data) {
|
|||||||
const pendingAssistantMeta = session.pendingAssistantMeta || null;
|
const pendingAssistantMeta = session.pendingAssistantMeta || null;
|
||||||
session.awaitingUpstreamReply = false;
|
session.awaitingUpstreamReply = false;
|
||||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||||
flushAssistantStream(session, {
|
|
||||||
source: pendingAssistantSource,
|
|
||||||
toolName: pendingAssistantToolName,
|
|
||||||
meta: pendingAssistantMeta,
|
|
||||||
});
|
|
||||||
session.pendingAssistantSource = null;
|
|
||||||
session.pendingAssistantToolName = null;
|
|
||||||
session.pendingAssistantMeta = null;
|
|
||||||
const assistantText = extractUserText(payload);
|
const assistantText = extractUserText(payload);
|
||||||
if (assistantText) {
|
if (assistantText) {
|
||||||
|
session.assistantStreamBuffer = '';
|
||||||
|
session.assistantStreamReplyId = '';
|
||||||
console.log(`[NativeVoice] upstream assistant session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
console.log(`[NativeVoice] upstream assistant session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
||||||
persistAssistantSpeech(session, assistantText, {
|
persistAssistantSpeech(session, assistantText, {
|
||||||
source: pendingAssistantSource,
|
source: pendingAssistantSource,
|
||||||
toolName: pendingAssistantToolName,
|
toolName: pendingAssistantToolName,
|
||||||
meta: pendingAssistantMeta,
|
meta: pendingAssistantMeta,
|
||||||
});
|
});
|
||||||
session.pendingAssistantSource = null;
|
} else {
|
||||||
session.pendingAssistantToolName = null;
|
flushAssistantStream(session, {
|
||||||
session.pendingAssistantMeta = null;
|
source: pendingAssistantSource,
|
||||||
|
toolName: pendingAssistantToolName,
|
||||||
|
meta: pendingAssistantMeta,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
session.pendingAssistantSource = null;
|
||||||
|
session.pendingAssistantToolName = null;
|
||||||
|
session.pendingAssistantMeta = null;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (message.event === 550) {
|
if (message.event === 550) {
|
||||||
if (isLocalChatTTSTextActive) {
|
if (isLocalChatTTSTextActive || session.blockUpstreamAudio) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (session.awaitingUpstreamReply) {
|
if (session.awaitingUpstreamReply) {
|
||||||
@@ -445,7 +492,14 @@ function handleUpstreamMessage(session, data) {
|
|||||||
session.assistantStreamReplyId = '';
|
session.assistantStreamReplyId = '';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (session.blockUpstreamAudio) {
|
||||||
|
session.assistantStreamBuffer = '';
|
||||||
|
session.assistantStreamReplyId = '';
|
||||||
|
console.log(`[NativeVoice] blocked response ended (559), keeping block session=${session.sessionId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
session.awaitingUpstreamReply = false;
|
session.awaitingUpstreamReply = false;
|
||||||
|
session.blockUpstreamAudio = false;
|
||||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||||
flushAssistantStream(session, {
|
flushAssistantStream(session, {
|
||||||
source: session.pendingAssistantSource || 'voice_bot',
|
source: session.pendingAssistantSource || 'voice_bot',
|
||||||
@@ -463,6 +517,21 @@ function handleUpstreamMessage(session, data) {
|
|||||||
if (text) {
|
if (text) {
|
||||||
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
||||||
session.latestUserText = text;
|
session.latestUserText = text;
|
||||||
|
// 用户开口说话时立即打断 AI 播放
|
||||||
|
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
|
||||||
|
console.log(`[NativeVoice] user barge-in (partial) session=${session.sessionId}`);
|
||||||
|
session.directSpeakUntil = 0;
|
||||||
|
session.isSendingChatTTSText = false;
|
||||||
|
session.chatTTSUntil = 0;
|
||||||
|
clearTimeout(session.chatTTSTimer);
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
|
||||||
|
} else if (session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now()) {
|
||||||
|
console.log(`[NativeVoice] user barge-in chatTTS (partial) session=${session.sessionId}`);
|
||||||
|
session.isSendingChatTTSText = false;
|
||||||
|
session.chatTTSUntil = 0;
|
||||||
|
clearTimeout(session.chatTTSTimer);
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
|
||||||
|
}
|
||||||
sendJson(session.client, {
|
sendJson(session.client, {
|
||||||
type: 'subtitle',
|
type: 'subtitle',
|
||||||
role: 'user',
|
role: 'user',
|
||||||
@@ -483,8 +552,16 @@ function handleUpstreamMessage(session, data) {
|
|||||||
session.isSendingChatTTSText = false;
|
session.isSendingChatTTSText = false;
|
||||||
session.chatTTSUntil = 0;
|
session.chatTTSUntil = 0;
|
||||||
clearTimeout(session.chatTTSTimer);
|
clearTimeout(session.chatTTSTimer);
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
|
||||||
|
} else if (session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now()) {
|
||||||
|
console.log(`[NativeVoice] user interrupt chatTTS during speaking session=${session.sessionId}`);
|
||||||
|
session.isSendingChatTTSText = false;
|
||||||
|
session.chatTTSUntil = 0;
|
||||||
|
clearTimeout(session.chatTTSTimer);
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
|
||||||
}
|
}
|
||||||
if (persistUserSpeech(session, finalText)) {
|
if (persistUserSpeech(session, finalText)) {
|
||||||
|
session.blockUpstreamAudio = true;
|
||||||
processReply(session, finalText).catch((error) => {
|
processReply(session, finalText).catch((error) => {
|
||||||
console.error('[NativeVoice] processReply error:', error.message);
|
console.error('[NativeVoice] processReply error:', error.message);
|
||||||
});
|
});
|
||||||
@@ -520,8 +597,9 @@ function attachClientHandlers(session) {
|
|||||||
session.botName = parsed.botName || '豆包';
|
session.botName = parsed.botName || '豆包';
|
||||||
session.systemRole = parsed.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。';
|
session.systemRole = parsed.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。';
|
||||||
session.speakingStyle = parsed.speakingStyle || '请使用清晰、自然、简洁的口吻。';
|
session.speakingStyle = parsed.speakingStyle || '请使用清晰、自然、简洁的口吻。';
|
||||||
session.speaker = parsed.speaker || 'zh_female_vv_jupiter_bigtts';
|
session.speaker = parsed.speaker || process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts';
|
||||||
session.greetingText = parsed.greetingText || session.greetingText || '你好,我是你的智能语音助手,有什么可以帮你的吗?';
|
session.modelVersion = parsed.modelVersion || 'O';
|
||||||
|
session.greetingText = parsed.greetingText || session.greetingText || '嗨,你好呀!我是大沃,你的专属智能助手。关于德国PM产品、一成系统、招商合作,随时问我就好~';
|
||||||
session.userId = parsed.userId || session.userId || null;
|
session.userId = parsed.userId || session.userId || null;
|
||||||
session.upstream = createUpstreamConnection(session);
|
session.upstream = createUpstreamConnection(session);
|
||||||
loadHandoffSummaryForVoice(session).catch((error) => {
|
loadHandoffSummaryForVoice(session).catch((error) => {
|
||||||
@@ -548,6 +626,7 @@ function attachClientHandlers(session) {
|
|||||||
clearTimeout(session.greetingTimer);
|
clearTimeout(session.greetingTimer);
|
||||||
clearTimeout(session.greetingAckTimer);
|
clearTimeout(session.greetingAckTimer);
|
||||||
clearTimeout(session.readyTimer);
|
clearTimeout(session.readyTimer);
|
||||||
|
clearTimeout(session.idleTimer);
|
||||||
if (session.upstream && session.upstream.readyState === WebSocket.OPEN) {
|
if (session.upstream && session.upstream.readyState === WebSocket.OPEN) {
|
||||||
session.upstream.close();
|
session.upstream.close();
|
||||||
}
|
}
|
||||||
@@ -580,12 +659,19 @@ function createUpstreamConnection(session) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
upstream.on('error', (error) => {
|
upstream.on('error', (error) => {
|
||||||
console.error('[NativeVoice] upstream error:', error.message);
|
console.error('[NativeVoice] upstream ws error:', error.message);
|
||||||
sendJson(session.client, { type: 'error', error: error.message });
|
sendJson(session.client, { type: 'error', error: `语音服务连接异常: ${error.message}` });
|
||||||
});
|
});
|
||||||
|
|
||||||
upstream.on('close', () => {
|
upstream.on('close', (code) => {
|
||||||
sendJson(session.client, { type: 'closed' });
|
console.log(`[NativeVoice] upstream closed session=${session.sessionId} code=${code}`);
|
||||||
|
session.upstreamReady = false;
|
||||||
|
sendJson(session.client, { type: 'upstream_closed', code });
|
||||||
|
setTimeout(() => {
|
||||||
|
if (session.client && session.client.readyState === WebSocket.OPEN) {
|
||||||
|
session.client.close();
|
||||||
|
}
|
||||||
|
}, 3000);
|
||||||
});
|
});
|
||||||
|
|
||||||
return upstream;
|
return upstream;
|
||||||
@@ -601,6 +687,7 @@ function createSession(client, sessionId) {
|
|||||||
latestUserText: '',
|
latestUserText: '',
|
||||||
queuedUserText: '',
|
queuedUserText: '',
|
||||||
processingReply: false,
|
processingReply: false,
|
||||||
|
blockUpstreamAudio: false,
|
||||||
directSpeakUntil: 0,
|
directSpeakUntil: 0,
|
||||||
queuedReplyTimer: null,
|
queuedReplyTimer: null,
|
||||||
lastPersistedAssistantText: '',
|
lastPersistedAssistantText: '',
|
||||||
@@ -608,11 +695,12 @@ function createSession(client, sessionId) {
|
|||||||
assistantStreamBuffer: '',
|
assistantStreamBuffer: '',
|
||||||
assistantStreamReplyId: '',
|
assistantStreamReplyId: '',
|
||||||
currentTtsType: '',
|
currentTtsType: '',
|
||||||
botName: '豆包',
|
botName: '大沃',
|
||||||
systemRole: '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。',
|
systemRole: '你是大沃,一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答。用户进来时请自然地打个招呼,像朋友聊天一样,不要用客服话术。',
|
||||||
speakingStyle: '请使用清晰、自然、简洁的口吻。',
|
speakingStyle: '说话像朋友聊天一样自然轻松,语气亲切活泼,不要像客服念稿。',
|
||||||
speaker: 'zh_female_vv_jupiter_bigtts',
|
speaker: process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts',
|
||||||
greetingText: '你好,我是你的智能语音助手,有什么可以帮你的吗?',
|
modelVersion: 'O',
|
||||||
|
greetingText: '嗨,你好呀!我是大沃,你的专属智能助手。关于德国PM产品、一成系统、招商合作,随时问我就好~',
|
||||||
hasSentGreeting: false,
|
hasSentGreeting: false,
|
||||||
greetingTimer: null,
|
greetingTimer: null,
|
||||||
greetingAckTimer: null,
|
greetingAckTimer: null,
|
||||||
@@ -626,6 +714,8 @@ function createSession(client, sessionId) {
|
|||||||
pendingAssistantSource: null,
|
pendingAssistantSource: null,
|
||||||
pendingAssistantToolName: null,
|
pendingAssistantToolName: null,
|
||||||
pendingAssistantMeta: null,
|
pendingAssistantMeta: null,
|
||||||
|
idleTimer: null,
|
||||||
|
lastActivityAt: Date.now(),
|
||||||
};
|
};
|
||||||
sessions.set(sessionId, session);
|
sessions.set(sessionId, session);
|
||||||
attachClientHandlers(session);
|
attachClientHandlers(session);
|
||||||
|
|||||||
@@ -111,11 +111,11 @@ function buildDirectChatMessages(session, context, userText) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function hasKnowledgeKeyword(text) {
|
function hasKnowledgeKeyword(text) {
|
||||||
return /(系统|平台|产品|功能|介绍|说明|规则|流程|步骤|配置|接入|开通|操作|怎么用|如何用|适合谁|区别|价格|费用|政策|售后|文档|资料|方案|一成系统)/.test(text || '');
|
return /(系统|平台|产品|功能|介绍|说明|规则|流程|步骤|配置|接入|开通|操作|怎么用|如何用|适合谁|区别|价格|费用|政策|售后|文档|资料|方案|一成系统|PM公司|德国PM|公司地址|电话|联系方式|公司实力|背景|培训|新人|起步三关|精品会议|成长上总裁|招商|代理|加盟|合作|邀约话术|小红|大白|小白|Activize|Basics|Restorate|FitLine|细胞营养素|NTC|营养保送|火炉原理|阿育吠陀|Ayurveda|儿童倍适|AI落地|ai落地|转观念|科普|营养|成分|功效|基础三合一|三合一|基础套装|套装|Ai众享|数字化工作室|盛咖学愿)/i.test(text || '');
|
||||||
}
|
}
|
||||||
|
|
||||||
function isKnowledgeFollowUp(text) {
|
function isKnowledgeFollowUp(text) {
|
||||||
return /^(这个|那个|它|该系统|这个系统|那个系统|这个功能|那个功能|这个产品|那个产品|详细|详细说说|详细查一下|展开说说|继续说|继续讲|怎么用|怎么操作|怎么配置|适合谁|有什么区别|费用多少|价格多少|怎么申请|怎么开通|是什么|什么意思)/.test((text || '').trim());
|
return /^(这个|那个|它|该系统|这个系统|那个系统|这个功能|那个功能|这个产品|那个产品|这个公司|那家公司|这个政策|那个政策|这个培训|那个培训|详细|详细说说|详细查一下|展开说说|继续说|继续讲|怎么用|怎么操作|怎么配置|适合谁|有什么区别|费用多少|价格多少|怎么申请|怎么开通|是什么|什么意思|地址在哪|电话多少|联系方式|具体政策|具体内容|怎么吃|功效是什么|有什么功效|成分是什么|有什么成分|多少钱|哪里买|怎么买|配方|原理是什么|有什么好处|怎么服用|适合什么人)/.test((text || '').trim());
|
||||||
}
|
}
|
||||||
|
|
||||||
function shouldForceKnowledgeRoute(userText, context = []) {
|
function shouldForceKnowledgeRoute(userText, context = []) {
|
||||||
@@ -180,7 +180,7 @@ function getRuleBasedDirectRouteDecision(userText) {
|
|||||||
if (/^(喂[,,\s]*)?(你好|您好)[,,!。??\s]*(在吗|请问)?[!。??]*$/.test(text)) {
|
if (/^(喂[,,\s]*)?(你好|您好)[,,!。??\s]*(在吗|请问)?[!。??]*$/.test(text)) {
|
||||||
return { route: 'chat', args: {} };
|
return { route: 'chat', args: {} };
|
||||||
}
|
}
|
||||||
return { route: 'search_knowledge', args: { query: text } };
|
return { route: 'chat', args: {} };
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractToolResultText(toolName, toolResult) {
|
function extractToolResultText(toolName, toolResult) {
|
||||||
@@ -192,6 +192,9 @@ function extractToolResultText(toolName, toolResult) {
|
|||||||
if (toolResult.errorType === 'not_configured') {
|
if (toolResult.errorType === 'not_configured') {
|
||||||
return '知识库当前未配置完成,请先检查知识库配置。';
|
return '知识库当前未配置完成,请先检查知识库配置。';
|
||||||
}
|
}
|
||||||
|
if (toolResult.errorType === 'endpoint_not_configured') {
|
||||||
|
return '知识库已配置但方舟LLM端点未就绪,暂时无法检索,请稍后再试。';
|
||||||
|
}
|
||||||
if (toolResult.results && Array.isArray(toolResult.results)) {
|
if (toolResult.results && Array.isArray(toolResult.results)) {
|
||||||
return toolResult.results.map((item) => item.content || JSON.stringify(item)).join('\n');
|
return toolResult.results.map((item) => item.content || JSON.stringify(item)).join('\n');
|
||||||
}
|
}
|
||||||
@@ -207,7 +210,13 @@ function extractToolResultText(toolName, toolResult) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function resolveReply(sessionId, session, text) {
|
async function resolveReply(sessionId, session, text) {
|
||||||
const baseContext = await db.getHistoryForLLM(sessionId, 20).catch(() => []);
|
const recentMessages = await db.getRecentMessages(sessionId, 20).catch(() => []);
|
||||||
|
const scopedMessages = session?.handoffSummaryUsed
|
||||||
|
? recentMessages.filter((item) => !/^chat_/i.test(String(item?.source || '')))
|
||||||
|
: recentMessages;
|
||||||
|
const baseContext = scopedMessages
|
||||||
|
.filter((item) => item && (item.role === 'user' || item.role === 'assistant'))
|
||||||
|
.map((item) => ({ role: item.role, content: item.content }));
|
||||||
const context = withHandoffSummary(session, baseContext);
|
const context = withHandoffSummary(session, baseContext);
|
||||||
const originalText = text.trim();
|
const originalText = text.trim();
|
||||||
let routeDecision = getRuleBasedDirectRouteDecision(text.trim());
|
let routeDecision = getRuleBasedDirectRouteDecision(text.trim());
|
||||||
@@ -236,7 +245,7 @@ async function resolveReply(sessionId, session, text) {
|
|||||||
toolName = routeDecision.route;
|
toolName = routeDecision.route;
|
||||||
source = 'voice_tool';
|
source = 'voice_tool';
|
||||||
const toolArgs = toolName === 'search_knowledge'
|
const toolArgs = toolName === 'search_knowledge'
|
||||||
? { ...(routeDecision.args || {}), response_mode: 'snippet' }
|
? { ...(routeDecision.args || {}), response_mode: 'answer' }
|
||||||
: routeDecision.args;
|
: routeDecision.args;
|
||||||
const toolResult = await ToolExecutor.execute(routeDecision.route, toolArgs, context);
|
const toolResult = await ToolExecutor.execute(routeDecision.route, toolArgs, context);
|
||||||
replyText = extractToolResultText(toolName, toolResult);
|
replyText = extractToolResultText(toolName, toolResult);
|
||||||
@@ -247,6 +256,8 @@ async function resolveReply(sessionId, session, text) {
|
|||||||
source: toolResult?.source || null,
|
source: toolResult?.source || null,
|
||||||
original_query: toolResult?.original_query || routeDecision.args?.query || originalText,
|
original_query: toolResult?.original_query || routeDecision.args?.query || originalText,
|
||||||
rewritten_query: toolResult?.rewritten_query || null,
|
rewritten_query: toolResult?.rewritten_query || null,
|
||||||
|
selected_dataset_ids: toolResult?.selected_dataset_ids || null,
|
||||||
|
selected_kb_routes: toolResult?.selected_kb_routes || null,
|
||||||
hit: typeof toolResult?.hit === 'boolean' ? toolResult.hit : null,
|
hit: typeof toolResult?.hit === 'boolean' ? toolResult.hit : null,
|
||||||
reason: toolResult?.reason || null,
|
reason: toolResult?.reason || null,
|
||||||
error_type: toolResult?.errorType || null,
|
error_type: toolResult?.errorType || null,
|
||||||
@@ -278,6 +289,19 @@ async function resolveReply(sessionId, session, text) {
|
|||||||
responseMeta,
|
responseMeta,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (toolName === 'search_knowledge' && !toolResult?.hit) {
|
||||||
|
session.handoffSummaryUsed = true;
|
||||||
|
return {
|
||||||
|
delivery: 'upstream_chat',
|
||||||
|
speechText: '',
|
||||||
|
ragItems: [],
|
||||||
|
source: 'voice_bot',
|
||||||
|
toolName: null,
|
||||||
|
routeDecision,
|
||||||
|
responseMeta,
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const speechText = normalizeTextForSpeech(replyText);
|
const speechText = normalizeTextForSpeech(replyText);
|
||||||
session.handoffSummaryUsed = true;
|
session.handoffSummaryUsed = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user