fix(voice-kb): sync assistant profile and stabilize reply flow
This commit is contained in:
@@ -3,6 +3,7 @@ const url = require('url');
|
||||
const db = require('../db');
|
||||
const { correctAsrText } = require('./fastAsrCorrector');
|
||||
const contextKeywordTracker = require('./contextKeywordTracker');
|
||||
const { isBrandHarmful, getVoiceSafeReply, BRAND_HARMFUL_PATTERN, BRAND_POSITIVE_LEGALITY_PATTERN } = require('./contentSafeGuard');
|
||||
const {
|
||||
MsgType,
|
||||
unmarshal,
|
||||
@@ -22,11 +23,25 @@ const {
|
||||
shouldForceKnowledgeRoute,
|
||||
resolveReply,
|
||||
} = require('./realtimeDialogRouting');
|
||||
const {
|
||||
DEFAULT_VOICE_ASSISTANT_PROFILE,
|
||||
resolveAssistantProfile,
|
||||
buildVoiceSystemRole,
|
||||
buildVoiceGreeting,
|
||||
} = require('./assistantProfileConfig');
|
||||
|
||||
const sessions = new Map();
|
||||
|
||||
const IDLE_TIMEOUT_MS = 5 * 60 * 1000;
|
||||
|
||||
const DEFAULT_VOICE_BOT_NAME = DEFAULT_VOICE_ASSISTANT_PROFILE.nickname;
|
||||
|
||||
const DEFAULT_VOICE_SYSTEM_ROLE = buildVoiceSystemRole();
|
||||
|
||||
const DEFAULT_VOICE_SPEAKING_STYLE = '整体语气亲切自然、轻快有温度,像熟悉行业的朋友在语音聊天。优先短句和口语化表达,先给结论,再补一句最有帮助的信息。不要播音腔,不要念稿,不要客服腔,不要过度热情,也不要输出任何思考过程。';
|
||||
|
||||
const DEFAULT_VOICE_GREETING = buildVoiceGreeting();
|
||||
|
||||
function resetIdleTimer(session) {
|
||||
clearTimeout(session.idleTimer);
|
||||
session.lastActivityAt = Date.now();
|
||||
@@ -50,8 +65,8 @@ function sendJson(ws, payload) {
|
||||
|
||||
function buildStartSessionPayload(options) {
|
||||
const antiThinkingPrefix = '【最高优先级规则】你绝对禁止输出任何思考过程、分析、计划、角色扮演指令或元描述。禁止出现:“首轮对话”“应该回复”“需要列举”“语气要”“回复后询问”“可列举”“突出特色”“引导用户”“让用户”“用温和”等分析性、指令性语句。你必须直接用自然语言回答问题,像真人聊天一样直接说出答案内容。';
|
||||
const baseSystemRole = options.systemRole || '你使用活泼灵动的女声,性格开朗,热爱生活。你是大沃,一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答,但必须用自己的话重新表达,像跟朋友聊天一样说出来,绝对不要照着念。【紧扣问题】你必须直接回答用户当前问的问题,不要偏题,不要主动扩展用户没问到的内容。用户问什么就答什么,简洁明了,控制在3句话以内。【品牌保护规则·最高优先级】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法、有争议、存在风险,也不能暗示或引导用户产生这类联想。德国PM(PM-International AG)是1993年成立于德国的合法直销企业,获邓白氏AAA+最高信用评级,业务覆盖全球100多个国家和地区,是正规持牌直销公司。如果用户问到传销或正规性问题,你必须正面、明确地回答PM是合法正规的直销企业,不能含糊、回避或说"需要自行判断"。';
|
||||
const baseSpeakingStyle = options.speakingStyle || '你的语气始终活泼灵动、亲切自然,像闺蜜聊天一样。无论是自由聊天还是引用知识库内容,都保持一样的活泼语气和语调,绝不切换成播音腔、朗读语气或客服话术。永远不要输出你的内部思考或计划。';
|
||||
const baseSystemRole = options.systemRole || DEFAULT_VOICE_SYSTEM_ROLE;
|
||||
const baseSpeakingStyle = options.speakingStyle || DEFAULT_VOICE_SPEAKING_STYLE;
|
||||
return {
|
||||
asr: {
|
||||
extra: {
|
||||
@@ -90,35 +105,32 @@ function parseJsonPayload(message) {
|
||||
}
|
||||
}
|
||||
|
||||
function extractUserText(jsonPayload, sessionId = null) {
|
||||
let text = jsonPayload?.text
|
||||
function extractRawText(jsonPayload) {
|
||||
const text = jsonPayload?.text
|
||||
|| jsonPayload?.content
|
||||
|| jsonPayload?.results?.[0]?.text
|
||||
|| jsonPayload?.results?.[0]?.alternatives?.[0]?.text
|
||||
|| '';
|
||||
text = String(text || '').trim();
|
||||
return String(text || '').trim();
|
||||
}
|
||||
|
||||
function extractUserText(jsonPayload, sessionId = null) {
|
||||
let text = extractRawText(jsonPayload);
|
||||
text = correctAsrText(text);
|
||||
text = normalizeKnowledgeAlias(text);
|
||||
if (sessionId) {
|
||||
contextKeywordTracker.updateSession(sessionId, text);
|
||||
contextKeywordTracker.updateSession(sessionId, normalizeKnowledgeAlias(text));
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
const BRAND_HARMFUL_PATTERN = /传销|骗局|骗子公司|骗子|非法集资|非法经营|非法营销|不正规|不合法|庞氏骗局|老鼠会|拉人头|割韭菜|资金盘|涉嫌违法|涉嫌传销|疑似传销|层级分销|PM.*(?:是|属于|涉嫌|疑似).*(?:传销|骗局|非法|不合法|不正规)|(?:传销|骗局|非法|不合法|不正规).*(?:组织|公司|企业|模式)/;
|
||||
const BRAND_SAFE_REPLY = '德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家和地区。如果你想了解更多,可以问我关于PM公司的详细介绍哦。';
|
||||
const BRAND_POSITIVE_LEGALITY_PATTERN = /(德国PM|PM-International|PM公司|PM-FitLine|FitLine).*(不是传销|合法正规的?直销企业|合法直销公司|正规直销企业|正规持牌直销公司|邓白氏AAA\+)|((不是传销|合法正规的?直销企业|合法直销公司|正规直销企业|正规持牌直销公司|邓白氏AAA\+).*(德国PM|PM-International|PM公司|PM-FitLine|FitLine))/i;
|
||||
|
||||
const THINKING_PATTERN = /^(首轮对话|用户想|用户问|应该回复|需要列举|可列举|突出特色|引导进一步|引导用户|让用户|回复后询问|语气要|用温和|需热情|需简洁|需专业)/;
|
||||
const THINKING_MID_PATTERN = /(?:需客观回复|应说明其|回复后询问|引导.*对话|用.*口吻回复|语气要.*热情|需要.*引导|应该.*回复|先.*再.*最后)/;
|
||||
|
||||
function sanitizeAssistantText(text) {
|
||||
if (!text) return text;
|
||||
if (BRAND_POSITIVE_LEGALITY_PATTERN.test(String(text || '').replace(/\s+/g, ' '))) {
|
||||
return text;
|
||||
}
|
||||
if (BRAND_HARMFUL_PATTERN.test(text)) {
|
||||
if (isBrandHarmful(text)) {
|
||||
console.warn(`[NativeVoice][SafeGuard] blocked harmful content: ${JSON.stringify(text.slice(0, 200))}`);
|
||||
return BRAND_SAFE_REPLY;
|
||||
return getVoiceSafeReply();
|
||||
}
|
||||
if (THINKING_PATTERN.test(text.trim())) {
|
||||
console.warn(`[NativeVoice][SafeGuard] blocked thinking output: ${JSON.stringify(text.slice(0, 200))}`);
|
||||
@@ -186,7 +198,7 @@ function persistAssistantSpeech(session, text, { source = 'voice_bot', toolName
|
||||
}
|
||||
|
||||
function appendAssistantStream(session, payload) {
|
||||
const chunkText = extractUserText(payload);
|
||||
const chunkText = extractRawText(payload);
|
||||
if (!chunkText) {
|
||||
return '';
|
||||
}
|
||||
@@ -263,6 +275,7 @@ async function sendSpeechText(session, speechText) {
|
||||
return;
|
||||
}
|
||||
console.log(`[NativeVoice] sendSpeechText start session=${session.sessionId} chunks=${chunks.length} textLen=${speechText.length}`);
|
||||
session.currentSpeechText = speechText;
|
||||
session.isSendingChatTTSText = true;
|
||||
session.currentTtsType = 'chat_tts_text';
|
||||
session.chatTTSUntil = Date.now() + estimateSpeechDurationMs(speechText) + 800;
|
||||
@@ -305,25 +318,20 @@ async function sendSpeechText(session, speechText) {
|
||||
return;
|
||||
}
|
||||
session.hasSentGreeting = true;
|
||||
const greetingText = session.greetingText || '嗨,你好呀!我是大沃,你的专属智能助手。关于德国PM产品、一成系统、招商合作,随时问我就好~';
|
||||
const greetingText = session.greetingText || DEFAULT_VOICE_GREETING;
|
||||
console.log(`[NativeVoice] sendGreeting session=${session.sessionId} text=${JSON.stringify(greetingText.slice(0, 80))}`);
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'assistant',
|
||||
text: greetingText,
|
||||
isFinal: true,
|
||||
source: 'voice_bot',
|
||||
sequence: `greeting_${Date.now()}`,
|
||||
});
|
||||
persistAssistantSpeech(session, greetingText, { source: 'voice_bot' });
|
||||
clearTimeout(session.greetingTimer);
|
||||
clearTimeout(session.readyTimer);
|
||||
session.greetingSentAt = Date.now();
|
||||
session.greetingProtectionUntil = Date.now() + 2000;
|
||||
session.currentSpeechText = greetingText;
|
||||
try {
|
||||
session.upstream.send(createSayHelloMessage(session.sessionId, greetingText));
|
||||
console.log(`[NativeVoice] sendSayHello event=300 session=${session.sessionId}`);
|
||||
} catch (error) {
|
||||
session.hasSentGreeting = false;
|
||||
session.greetingProtectionUntil = 0;
|
||||
console.warn('[NativeVoice] SayHello failed:', error.message);
|
||||
}
|
||||
sendReady(session);
|
||||
@@ -367,6 +375,7 @@ function clearUpstreamSuppression(session) {
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
session.pendingAssistantTurnSeq = 0;
|
||||
session.blockUpstreamAudio = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
}
|
||||
@@ -403,15 +412,56 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq |
|
||||
const activeTurnSeq = turnSeq || session.latestUserTurnSeq || 0;
|
||||
session.processingReply = true;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: true });
|
||||
const isKnowledgeCandidate = shouldForceKnowledgeRoute(cleanText);
|
||||
let isKnowledgeCandidate = shouldForceKnowledgeRoute(cleanText);
|
||||
// KB话题保护窗口:最近60秒内有KB hit,当前轮不是纯闲聊/告别,也视为KB候选
|
||||
// 防止用户质疑/纠正产品信息时S2S自由编造(如"粉末来的呀你搞错了吧")
|
||||
const KB_PROTECTION_WINDOW_MS = 60000;
|
||||
if (!isKnowledgeCandidate && session._lastKbHitAt && (Date.now() - session._lastKbHitAt < KB_PROTECTION_WINDOW_MS)) {
|
||||
const isPureChitchat = /^(喂|你好|嗨|谢谢|再见|拜拜|好的|嗯|哦|行|没事了|不用了|可以了)[,,。!?\s]*$/.test(cleanText);
|
||||
if (!isPureChitchat) {
|
||||
isKnowledgeCandidate = true;
|
||||
console.log(`[NativeVoice] KB protection window active, promoting to kbCandidate session=${session.sessionId} lastKbHit=${Math.round((Date.now() - session._lastKbHitAt) / 1000)}s ago`);
|
||||
}
|
||||
}
|
||||
if (isKnowledgeCandidate) {
|
||||
session.blockUpstreamAudio = true;
|
||||
suppressUpstreamReply(session, 30000);
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'processing' });
|
||||
// 过渡语已移除:KB查询优化后延迟已降至~2.6s,无需填充
|
||||
session._fillerActive = false;
|
||||
}
|
||||
console.log(`[NativeVoice] processReply start session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 120))} blocked=${session.blockUpstreamAudio} kbCandidate=${isKnowledgeCandidate}`);
|
||||
try {
|
||||
const { delivery, speechText, ragItems, source, toolName, routeDecision, responseMeta } = await resolveReply(session.sessionId, session, cleanText);
|
||||
// KB预查缓存消费:如果partial阶段已启动预查且文本匹配,直接使用缓存结果
|
||||
let resolveResult = null;
|
||||
if (isKnowledgeCandidate && session.pendingKbPrequery && session._kbPrequeryText) {
|
||||
const preText = (session._kbPrequeryText || '').replace(/[啊哦嗯呢呀哎诶额,。!?、,.\s]/g, '');
|
||||
const finalText = cleanText.replace(/[啊哦嗯呢呀哎诶额,。!?、,.\s]/g, '');
|
||||
// 放宽相似度:子串包含 或 重叠字符占比>=60% 即视为匹配
|
||||
let isSimilar = preText && finalText && (finalText.includes(preText) || preText.includes(finalText));
|
||||
if (!isSimilar && preText && finalText) {
|
||||
const shorter = preText.length <= finalText.length ? preText : finalText;
|
||||
const longer = preText.length <= finalText.length ? finalText : preText;
|
||||
let overlap = 0;
|
||||
for (let i = 0; i < shorter.length; i++) {
|
||||
if (longer.includes(shorter[i])) overlap++;
|
||||
}
|
||||
isSimilar = overlap / shorter.length >= 0.45;
|
||||
}
|
||||
if (isSimilar) {
|
||||
console.log(`[NativeVoice] using KB prequery cache session=${session.sessionId} preText=${JSON.stringify(session._kbPrequeryText.slice(0, 60))}`);
|
||||
resolveResult = await session.pendingKbPrequery;
|
||||
} else {
|
||||
console.log(`[NativeVoice] KB prequery text mismatch, re-querying session=${session.sessionId} pre=${JSON.stringify(preText.slice(0, 40))} final=${JSON.stringify(finalText.slice(0, 40))}`);
|
||||
}
|
||||
}
|
||||
session.pendingKbPrequery = null;
|
||||
session._kbPrequeryText = '';
|
||||
session._kbPrequeryStartedAt = 0;
|
||||
if (!resolveResult) {
|
||||
resolveResult = await resolveReply(session.sessionId, session, cleanText);
|
||||
}
|
||||
const { delivery, speechText, ragItems, source, toolName, routeDecision, responseMeta } = resolveResult;
|
||||
if (activeTurnSeq !== (session.latestUserTurnSeq || 0)) {
|
||||
console.log(`[NativeVoice] stale reply ignored session=${session.sessionId} activeTurn=${activeTurnSeq} latestTurn=${session.latestUserTurnSeq || 0}`);
|
||||
clearUpstreamSuppression(session);
|
||||
@@ -425,10 +475,12 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq |
|
||||
} else {
|
||||
session.blockUpstreamAudio = false;
|
||||
}
|
||||
session._lastPartialAt = 0;
|
||||
session.awaitingUpstreamReply = true;
|
||||
session.pendingAssistantSource = 'voice_bot';
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = responseMeta;
|
||||
session.pendingAssistantTurnSeq = activeTurnSeq;
|
||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=upstream_chat`);
|
||||
return;
|
||||
}
|
||||
@@ -438,13 +490,21 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq |
|
||||
}
|
||||
session.discardNextAssistantResponse = true;
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'knowledge_hit' });
|
||||
const kbText = (ragItems || []).map((item) => item?.content || '').filter(Boolean).join('\n').trim();
|
||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag→local_tts items=${Array.isArray(ragItems) ? ragItems.length : 0} textLen=${kbText.length}`);
|
||||
if (kbText) {
|
||||
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(kbText) + 800;
|
||||
suppressUpstreamReply(session, estimateSpeechDurationMs(kbText) + 1800);
|
||||
persistAssistantSpeech(session, kbText, { source, toolName, meta: responseMeta });
|
||||
await sendSpeechText(session, kbText);
|
||||
const ragContent = (ragItems || []).filter((item) => item && item.content);
|
||||
if (ragContent.length > 0) {
|
||||
console.log(`[NativeVoice] processReply sending external_rag to S2S session=${session.sessionId} route=${routeDecision?.route || 'unknown'} items=${ragContent.length}`);
|
||||
// KB话题记忆:记录本轮用户原始问题和时间戳,用于保护窗口和追问enrichment
|
||||
if (responseMeta?.hit !== false && responseMeta?.reason !== 'honest_fallback') {
|
||||
session._lastKbTopic = cleanText;
|
||||
session._lastKbHitAt = Date.now();
|
||||
}
|
||||
session._pendingExternalRagReply = true;
|
||||
await sendExternalRag(session, ragContent);
|
||||
session.awaitingUpstreamReply = true;
|
||||
session.pendingAssistantSource = source;
|
||||
session.pendingAssistantToolName = toolName;
|
||||
session.pendingAssistantMeta = responseMeta;
|
||||
session.pendingAssistantTurnSeq = activeTurnSeq;
|
||||
} else {
|
||||
console.log(`[NativeVoice] processReply external_rag empty content, fallback to upstream session=${session.sessionId}`);
|
||||
session.blockUpstreamAudio = false;
|
||||
@@ -461,6 +521,7 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq |
|
||||
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_tts source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
|
||||
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(speechText) + 800;
|
||||
suppressUpstreamReply(session, estimateSpeechDurationMs(speechText) + 1800);
|
||||
session.lastDeliveredAssistantTurnSeq = activeTurnSeq;
|
||||
persistAssistantSpeech(session, speechText, { source, toolName, meta: responseMeta });
|
||||
await sendSpeechText(session, speechText);
|
||||
} catch (error) {
|
||||
@@ -515,7 +576,13 @@ function handleUpstreamMessage(session, data) {
|
||||
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
||||
const isDefaultTts = !session.currentTtsType || session.currentTtsType === 'default';
|
||||
const isSuppressingUpstreamAudio = (session.suppressUpstreamUntil || 0) > Date.now() && isDefaultTts;
|
||||
if ((session.blockUpstreamAudio && isDefaultTts) || isSuppressingUpstreamAudio) {
|
||||
// 用户刚停止说话后短暂阻止默认TTS音频,给event 459的blockUpstreamAudio留时间生效
|
||||
const isUserJustSpeaking = isDefaultTts && session._lastPartialAt && (Date.now() - session._lastPartialAt < 800);
|
||||
// blockUpstreamAudio 生效时:仅放行 external_rag 和限时过渡语音频,其余全部阻断
|
||||
// 修复:旧逻辑只阻断 isDefaultTts,导致 chat_tts_text 窗口期 S2S 自主回复音频泄漏
|
||||
const isBlockPassthrough = session.currentTtsType === 'external_rag' ||
|
||||
(session._fillerActive && (session.chatTTSUntil || 0) > Date.now());
|
||||
if ((session.blockUpstreamAudio && !isBlockPassthrough) || isSuppressingUpstreamAudio || isUserJustSpeaking) {
|
||||
if (!session._audioBlockLogOnce) {
|
||||
session._audioBlockLogOnce = true;
|
||||
console.log(`[NativeVoice] audio blocked session=${session.sessionId} ttsType=${session.currentTtsType} block=${session.blockUpstreamAudio} suppress=${isSuppressingUpstreamAudio}`);
|
||||
@@ -565,7 +632,17 @@ function handleUpstreamMessage(session, data) {
|
||||
session.suppressUpstreamUntil = 0;
|
||||
clearTimeout(session.suppressReplyTimer);
|
||||
session.suppressReplyTimer = null;
|
||||
session.discardNextAssistantResponse = false;
|
||||
// 注意:不清除discardNextAssistantResponse,让它拦截S2S默认回复的残留event 351
|
||||
// 该标记会在KB回复的event 550 chunks到达时自动清除
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
// 清除过渡语的chat TTS状态,确保external_rag回复不被isLocalChatTTSTextActive拦截
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
session.currentSpeechText = '';
|
||||
session._fillerActive = false;
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'rag_response_start' });
|
||||
console.log(`[NativeVoice] unblock for external_rag tts session=${session.sessionId}`);
|
||||
} else if (session.blockUpstreamAudio && payload?.tts_type === 'chat_tts_text') {
|
||||
console.log(`[NativeVoice] chat_tts_text started, keeping block for S2S default response session=${session.sessionId}`);
|
||||
@@ -594,24 +671,66 @@ function handleUpstreamMessage(session, data) {
|
||||
const pendingAssistantSource = session.pendingAssistantSource || 'voice_bot';
|
||||
const pendingAssistantToolName = session.pendingAssistantToolName || null;
|
||||
const pendingAssistantMeta = session.pendingAssistantMeta || null;
|
||||
const pendingAssistantTurnSeq = session.pendingAssistantTurnSeq || session.latestUserTurnSeq || 0;
|
||||
session.awaitingUpstreamReply = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
const assistantText = extractUserText(payload);
|
||||
if (pendingAssistantTurnSeq && session.lastDeliveredAssistantTurnSeq === pendingAssistantTurnSeq) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
console.log(`[NativeVoice] duplicate assistant final ignored (351) session=${session.sessionId} turn=${pendingAssistantTurnSeq}`);
|
||||
return;
|
||||
}
|
||||
const assistantText = extractRawText(payload);
|
||||
if (assistantText) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
console.log(`[NativeVoice] upstream assistant session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
||||
persistAssistantSpeech(session, assistantText, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
// 过渡语的event 351:不持久化,直接丢弃
|
||||
if (session._fillerActive) {
|
||||
console.log(`[NativeVoice] discarded filler assistant text session=${session.sessionId}`);
|
||||
session._fillerActive = false;
|
||||
return;
|
||||
}
|
||||
// 清除external_rag等待标记,KB回复已到达
|
||||
if (session._pendingExternalRagReply) {
|
||||
session._pendingExternalRagReply = false;
|
||||
}
|
||||
// 品牌安全检测:最终助手文本包含有害内容时,阻断音频并注入安全回复
|
||||
if (isBrandHarmful(assistantText)) {
|
||||
console.warn(`[NativeVoice][SafeGuard] harmful content in final assistant text, blocking session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
||||
session.blockUpstreamAudio = true;
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'harmful_blocked' });
|
||||
const safeReply = getVoiceSafeReply();
|
||||
session.lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq;
|
||||
persistAssistantSpeech(session, safeReply, { source: 'voice_bot' });
|
||||
sendSpeechText(session, safeReply).catch((err) => {
|
||||
console.warn('[NativeVoice][SafeGuard] sendSpeechText failed:', err.message);
|
||||
});
|
||||
} else {
|
||||
console.log(`[NativeVoice] upstream assistant session=${session.sessionId} text=${JSON.stringify(assistantText.slice(0, 120))}`);
|
||||
session.lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq;
|
||||
persistAssistantSpeech(session, assistantText, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
// KB回复完成后重新阻断音频,防止下一个问题的S2S默认回复在early block前泄露
|
||||
if (session.currentTtsType === 'external_rag') {
|
||||
session.blockUpstreamAudio = true;
|
||||
console.log(`[NativeVoice] re-blocked after KB response session=${session.sessionId}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
flushAssistantStream(session, {
|
||||
const didFlush = flushAssistantStream(session, {
|
||||
source: pendingAssistantSource,
|
||||
toolName: pendingAssistantToolName,
|
||||
meta: pendingAssistantMeta,
|
||||
});
|
||||
if (didFlush) {
|
||||
session.lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq;
|
||||
}
|
||||
}
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
@@ -620,6 +739,11 @@ function handleUpstreamMessage(session, data) {
|
||||
}
|
||||
|
||||
if (message.event === 550) {
|
||||
// external_rag chunks到达时,清除discardNextAssistantResponse(默认回复的351已过或不会来)
|
||||
if (session.discardNextAssistantResponse && session.currentTtsType === 'external_rag') {
|
||||
session.discardNextAssistantResponse = false;
|
||||
console.log(`[NativeVoice] cleared discardNextAssistantResponse for external_rag stream session=${session.sessionId}`);
|
||||
}
|
||||
if (isLocalChatTTSTextActive || session.blockUpstreamAudio || isSuppressingUpstreamReply || session.discardNextAssistantResponse) {
|
||||
return;
|
||||
}
|
||||
@@ -629,8 +753,24 @@ function handleUpstreamMessage(session, data) {
|
||||
}
|
||||
const fullText = appendAssistantStream(session, payload);
|
||||
if (fullText) {
|
||||
// 品牌安全检测:S2S模型输出传销等负面内容时,立即阻断音频并注入安全回复
|
||||
if (fullText.length >= 4 && isBrandHarmful(fullText)) {
|
||||
console.warn(`[NativeVoice][SafeGuard] harmful content detected in stream, blocking audio session=${session.sessionId} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
||||
session.blockUpstreamAudio = true;
|
||||
session.discardNextAssistantResponse = true;
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'harmful_blocked' });
|
||||
// 注入安全回复语音,替代有害内容
|
||||
const safeReply = getVoiceSafeReply();
|
||||
persistAssistantSpeech(session, safeReply, { source: 'voice_bot' });
|
||||
sendSpeechText(session, safeReply).catch((err) => {
|
||||
console.warn('[NativeVoice][SafeGuard] sendSpeechText failed:', err.message);
|
||||
});
|
||||
return;
|
||||
}
|
||||
// 检测思考模式:S2S模型输出分析/计划而非直接回答,立即阻断
|
||||
if (fullText.length >= 10 && THINKING_PATTERN.test(fullText.trim())) {
|
||||
if (fullText.length >= 10 && (THINKING_PATTERN.test(fullText.trim()) || THINKING_MID_PATTERN.test(fullText))) {
|
||||
console.warn(`[NativeVoice][SafeGuard] thinking detected in stream, blocking audio session=${session.sessionId} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
||||
session.blockUpstreamAudio = true;
|
||||
session.discardNextAssistantResponse = true;
|
||||
@@ -663,14 +803,34 @@ function handleUpstreamMessage(session, data) {
|
||||
console.log(`[NativeVoice] discarded stale stream end (559, kb-nohit retrigger) session=${session.sessionId}`);
|
||||
return;
|
||||
}
|
||||
// external_rag流期间,阻止默认回复的559过早flush部分KB文本
|
||||
if (session._pendingExternalRagReply) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
console.log(`[NativeVoice] suppressed 559 flush during external_rag flow session=${session.sessionId}`);
|
||||
return;
|
||||
}
|
||||
const pendingAssistantTurnSeq = session.pendingAssistantTurnSeq || session.latestUserTurnSeq || 0;
|
||||
if (pendingAssistantTurnSeq && session.lastDeliveredAssistantTurnSeq === pendingAssistantTurnSeq) {
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
console.log(`[NativeVoice] duplicate assistant final ignored (559) session=${session.sessionId} turn=${pendingAssistantTurnSeq}`);
|
||||
return;
|
||||
}
|
||||
session.awaitingUpstreamReply = false;
|
||||
session.blockUpstreamAudio = false;
|
||||
sendJson(session.client, { type: 'assistant_pending', active: false });
|
||||
flushAssistantStream(session, {
|
||||
const didFlush = flushAssistantStream(session, {
|
||||
source: session.pendingAssistantSource || 'voice_bot',
|
||||
toolName: session.pendingAssistantToolName || null,
|
||||
meta: session.pendingAssistantMeta || null,
|
||||
});
|
||||
if (didFlush) {
|
||||
session.lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq;
|
||||
}
|
||||
session.pendingAssistantSource = null;
|
||||
session.pendingAssistantToolName = null;
|
||||
session.pendingAssistantMeta = null;
|
||||
@@ -680,22 +840,59 @@ function handleUpstreamMessage(session, data) {
|
||||
if (message.event === 450 || (message.event === 451 && !isFinalUserPayload(payload))) {
|
||||
const text = extractUserText(payload, session.sessionId);
|
||||
if (text) {
|
||||
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
||||
session.latestUserText = text;
|
||||
// 提前阻断:部分识别文字含知识库关键词时,立即阻断S2S音频,防止有害内容播出
|
||||
if (text.length >= 4 && !session.blockUpstreamAudio && shouldForceKnowledgeRoute(text)) {
|
||||
session.blockUpstreamAudio = true;
|
||||
console.log(`[NativeVoice] early block: partial text matched KB keywords session=${session.sessionId} text=${JSON.stringify(text.slice(0, 80))}`);
|
||||
}
|
||||
// 用户开口说话时立即打断所有 AI 播放(包括 S2S 默认 TTS)
|
||||
const now = Date.now();
|
||||
const isDirectSpeaking = session.directSpeakUntil && now < session.directSpeakUntil;
|
||||
const isChatTTSSpeaking = session.isSendingChatTTSText && (session.chatTTSUntil || 0) > now;
|
||||
// TTS回声检测:播放期间如果ASR识别文本是当前播放文本的子串,判定为回声,忽略
|
||||
if ((isDirectSpeaking || isChatTTSSpeaking) && session.currentSpeechText) {
|
||||
const normalizedPartial = text.replace(/[,。!?、,.\s]/g, '');
|
||||
const normalizedSpeech = session.currentSpeechText.replace(/[,。!?、,.\s]/g, '');
|
||||
if (normalizedPartial.length <= 3 || normalizedSpeech.includes(normalizedPartial)) {
|
||||
if (!session._echoLogOnce) {
|
||||
session._echoLogOnce = true;
|
||||
console.log(`[NativeVoice] TTS echo detected, ignoring partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 80))}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
session._echoLogOnce = false;
|
||||
} else {
|
||||
session._echoLogOnce = false;
|
||||
}
|
||||
// Greeting保护窗口:发送问候语后短暂保护期内忽略barge-in
|
||||
if (session.greetingProtectionUntil && now < session.greetingProtectionUntil) {
|
||||
console.log(`[NativeVoice] greeting protection active, ignoring partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
||||
const normalizedPartial = normalizeKnowledgeAlias(text);
|
||||
session.latestUserText = normalizedPartial;
|
||||
session._lastPartialAt = now;
|
||||
// 提前阻断:部分识别文字含知识库关键词时,立即阻断S2S音频,防止有害内容播出
|
||||
if (normalizedPartial.length >= 6 && !session.blockUpstreamAudio && shouldForceKnowledgeRoute(normalizedPartial)) {
|
||||
session.blockUpstreamAudio = true;
|
||||
session.currentTtsType = 'default';
|
||||
// 立即清除客户端已收到的S2S音频,防止用户听到抢答片段
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'early_block' });
|
||||
console.log(`[NativeVoice] early block: partial text matched KB keywords session=${session.sessionId} text=${JSON.stringify(text.slice(0, 80))}`);
|
||||
// KB预查询:提前启动知识库查询,减少final ASR后的等待时间
|
||||
const kbPrequeryDebounce = 600;
|
||||
if (normalizedPartial.length >= 8 && (!session._kbPrequeryStartedAt || now - session._kbPrequeryStartedAt > kbPrequeryDebounce)) {
|
||||
session._kbPrequeryStartedAt = now;
|
||||
session._kbPrequeryText = normalizedPartial;
|
||||
console.log(`[NativeVoice] KB prequery started session=${session.sessionId} text=${JSON.stringify(normalizedPartial.slice(0, 80))}`);
|
||||
session.pendingKbPrequery = resolveReply(session.sessionId, session, normalizedPartial).catch((err) => {
|
||||
console.warn(`[NativeVoice] KB prequery failed session=${session.sessionId}:`, err.message);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
}
|
||||
// 用户开口说话时立即打断所有 AI 播放(包括 S2S 默认 TTS)
|
||||
if (isDirectSpeaking || isChatTTSSpeaking) {
|
||||
console.log(`[NativeVoice] user barge-in (partial) session=${session.sessionId} direct=${isDirectSpeaking} chatTTS=${isChatTTSSpeaking}`);
|
||||
session.directSpeakUntil = 0;
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
session.currentSpeechText = '';
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
if (session.suppressReplyTimer || session.suppressUpstreamUntil) {
|
||||
clearUpstreamSuppression(session);
|
||||
@@ -709,7 +906,7 @@ function handleUpstreamMessage(session, data) {
|
||||
sendJson(session.client, {
|
||||
type: 'subtitle',
|
||||
role: 'user',
|
||||
text,
|
||||
text: text,
|
||||
isFinal: false,
|
||||
sequence: `native_partial_${Date.now()}`,
|
||||
});
|
||||
@@ -718,13 +915,40 @@ function handleUpstreamMessage(session, data) {
|
||||
}
|
||||
|
||||
if (message.event === 459 || (message.event === 451 && isFinalUserPayload(payload))) {
|
||||
const finalText = extractUserText(payload, session.sessionId) || session.latestUserText || '';
|
||||
const rawFinalText = extractUserText(payload, session.sessionId) || '';
|
||||
const finalText = normalizeKnowledgeAlias(rawFinalText) || session.latestUserText || '';
|
||||
const now459 = Date.now();
|
||||
// 双事件去重:S2S可能同时发送event 459和event 451(is_final),用去标点归一化文本+时间窗口去重
|
||||
const normalizedForDedup = finalText.replace(/[,。!?、,.?!\s]/g, '');
|
||||
if (normalizedForDedup && session._lastFinalNormalized === normalizedForDedup && now459 - (session._lastFinalAt || 0) < 1500) {
|
||||
console.log(`[NativeVoice] duplicate final ignored (event=${message.event}) session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
session._lastFinalNormalized = normalizedForDedup;
|
||||
session._lastFinalAt = now459;
|
||||
// TTS回声检测(final级别):播放期间ASR最终识别文本如果是当前播放文本的子串,判定为回声
|
||||
const isDirectSpeaking459 = session.directSpeakUntil && now459 < session.directSpeakUntil;
|
||||
const isChatTTSSpeaking459 = session.isSendingChatTTSText && (session.chatTTSUntil || 0) > now459;
|
||||
if ((isDirectSpeaking459 || isChatTTSSpeaking459) && session.currentSpeechText && finalText) {
|
||||
const normalizedFinal = finalText.replace(/[,。!?、,.\s]/g, '');
|
||||
const normalizedSpeech = session.currentSpeechText.replace(/[,。!?、,.\s]/g, '');
|
||||
if (normalizedFinal.length <= 4 || normalizedSpeech.includes(normalizedFinal)) {
|
||||
console.log(`[NativeVoice] TTS echo detected in final, ignoring session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Greeting保护窗口
|
||||
if (session.greetingProtectionUntil && now459 < session.greetingProtectionUntil && finalText) {
|
||||
console.log(`[NativeVoice] greeting protection active, ignoring final session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 80))}`);
|
||||
return;
|
||||
}
|
||||
console.log(`[NativeVoice] upstream final session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 120))}`);
|
||||
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
|
||||
console.log(`[NativeVoice] user interrupt during speaking session=${session.sessionId}`);
|
||||
session.directSpeakUntil = 0;
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
session.currentSpeechText = '';
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
|
||||
if (session.suppressReplyTimer || session.suppressUpstreamUntil) {
|
||||
@@ -734,14 +958,18 @@ function handleUpstreamMessage(session, data) {
|
||||
console.log(`[NativeVoice] user interrupt chatTTS during speaking session=${session.sessionId}`);
|
||||
session.isSendingChatTTSText = false;
|
||||
session.chatTTSUntil = 0;
|
||||
session.currentSpeechText = '';
|
||||
clearTimeout(session.chatTTSTimer);
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
|
||||
if (session.suppressReplyTimer || session.suppressUpstreamUntil) {
|
||||
clearUpstreamSuppression(session);
|
||||
}
|
||||
}
|
||||
if (persistUserSpeech(session, finalText)) {
|
||||
if (persistUserSpeech(session, rawFinalText || finalText)) {
|
||||
session.blockUpstreamAudio = true;
|
||||
session.currentTtsType = 'default';
|
||||
session.assistantStreamBuffer = '';
|
||||
session.assistantStreamReplyId = '';
|
||||
sendJson(session.client, { type: 'tts_reset', reason: 'new_turn' });
|
||||
processReply(session, finalText).catch((error) => {
|
||||
console.error('[NativeVoice] processReply error:', error.message);
|
||||
@@ -775,12 +1003,17 @@ function attachClientHandlers(session) {
|
||||
}
|
||||
|
||||
if (parsed.type === 'start') {
|
||||
session.botName = parsed.botName || '豆包';
|
||||
session.systemRole = parsed.systemRole || session.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。【品牌保护规则】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法,也不能暗示或引导用户认为它们有问题。德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家。';
|
||||
session.speakingStyle = parsed.speakingStyle || '请使用清晰、自然、简洁的口吻。';
|
||||
const assistantProfile = resolveAssistantProfile({
|
||||
...(session.assistantProfile || {}),
|
||||
...((parsed.assistantProfile && typeof parsed.assistantProfile === 'object') ? parsed.assistantProfile : {}),
|
||||
});
|
||||
session.assistantProfile = assistantProfile;
|
||||
session.botName = parsed.botName || assistantProfile.nickname || DEFAULT_VOICE_BOT_NAME;
|
||||
session.systemRole = parsed.systemRole || buildVoiceSystemRole(assistantProfile);
|
||||
session.speakingStyle = parsed.speakingStyle || session.speakingStyle || DEFAULT_VOICE_SPEAKING_STYLE;
|
||||
session.speaker = parsed.speaker || process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts';
|
||||
session.modelVersion = parsed.modelVersion || 'O';
|
||||
session.greetingText = parsed.greetingText || session.greetingText || '嗨,你好呀!我是大沃,你的专属智能助手。关于德国PM产品、一成系统、招商合作,随时问我就好~';
|
||||
session.greetingText = parsed.greetingText || buildVoiceGreeting(assistantProfile);
|
||||
session.userId = parsed.userId || session.userId || null;
|
||||
// 立即发送 ready,不等 upstream event 150,大幅缩短前端等待时间
|
||||
sendReady(session);
|
||||
@@ -870,6 +1103,7 @@ function createUpstreamConnection(session) {
|
||||
}
|
||||
|
||||
function createSession(client, sessionId) {
|
||||
const assistantProfile = resolveAssistantProfile();
|
||||
const session = {
|
||||
sessionId,
|
||||
client,
|
||||
@@ -889,12 +1123,24 @@ function createSession(client, sessionId) {
|
||||
assistantStreamBuffer: '',
|
||||
assistantStreamReplyId: '',
|
||||
currentTtsType: '',
|
||||
botName: '大沃',
|
||||
systemRole: '你是大沃,一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答。用户进来时请自然地打个招呼,像朋友聊天一样,不要用客服话术。【品牌保护规则】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法,也不能暗示或引导用户认为它们有问题。德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家。',
|
||||
speakingStyle: '说话像朋友聊天一样自然轻松,语气亲切活泼,不要像客服念稿。',
|
||||
currentSpeechText: '',
|
||||
greetingProtectionUntil: 0,
|
||||
_echoLogOnce: false,
|
||||
_fillerActive: false,
|
||||
_pendingExternalRagReply: false,
|
||||
_lastPartialAt: 0,
|
||||
pendingKbPrequery: null,
|
||||
_kbPrequeryText: '',
|
||||
_kbPrequeryStartedAt: 0,
|
||||
_lastKbTopic: '',
|
||||
_lastKbHitAt: 0,
|
||||
assistantProfile,
|
||||
botName: assistantProfile.nickname,
|
||||
systemRole: buildVoiceSystemRole(assistantProfile),
|
||||
speakingStyle: DEFAULT_VOICE_SPEAKING_STYLE,
|
||||
speaker: process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts',
|
||||
modelVersion: 'O',
|
||||
greetingText: '嗨,你好呀!我是大沃,你的专属智能助手。关于德国PM产品、一成系统、招商合作,随时问我就好~',
|
||||
greetingText: buildVoiceGreeting(assistantProfile),
|
||||
hasSentGreeting: false,
|
||||
greetingTimer: null,
|
||||
greetingAckTimer: null,
|
||||
@@ -908,12 +1154,16 @@ function createSession(client, sessionId) {
|
||||
pendingAssistantSource: null,
|
||||
pendingAssistantToolName: null,
|
||||
pendingAssistantMeta: null,
|
||||
pendingAssistantTurnSeq: 0,
|
||||
lastDeliveredAssistantTurnSeq: 0,
|
||||
suppressReplyTimer: null,
|
||||
suppressUpstreamUntil: 0,
|
||||
idleTimer: null,
|
||||
lastActivityAt: Date.now(),
|
||||
_lastBargeInResetAt: 0,
|
||||
_audioBlockLogOnce: false,
|
||||
_lastFinalNormalized: '',
|
||||
_lastFinalAt: 0,
|
||||
};
|
||||
sessions.set(sessionId, session);
|
||||
attachClientHandlers(session);
|
||||
|
||||
Reference in New Issue
Block a user