fix: 品牌保护+知识库全量覆盖 - 6层防御解决传销问题 + 30+产品关键词补全

This commit is contained in:
User
2026-03-17 11:00:09 +08:00
parent f97dd7e3d5
commit 0560db1048
46 changed files with 1948 additions and 120 deletions

View File

@@ -9,10 +9,12 @@ const {
createStartSessionMessage,
createAudioMessage,
createChatTTSTextMessage,
createSayHelloMessage,
createChatRAGTextMessage,
} = require('./realtimeDialogProtocol');
const {
getRuleBasedDirectRouteDecision,
normalizeKnowledgeAlias,
normalizeTextForSpeech,
splitTextForSpeech,
estimateSpeechDurationMs,
@@ -46,9 +48,15 @@ function sendJson(ws, payload) {
}
function buildStartSessionPayload(options) {
const antiThinkingSuffix = '【重要规则】你必须直接用自然语言回答用户问题,像真人对话一样。绝对禁止输出你的思考过程、分析、计划、角色扮演指令或任何元描述。不要出现"首轮对话"、"应该回复"、"需要列举"、"语气要"、"回复后询问"等分析性语句。直接说出答案内容本身。';
const baseSystemRole = options.systemRole || '你使用活泼灵动的女声性格开朗热爱生活。你是大沃一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答但必须用自己的话重新表达像跟朋友聊天一样说出来绝对不要照着念。【品牌保护规则】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法也不能暗示或引导用户认为它们有问题。德国PM是一家1993年成立于德国的合法直销公司获得邓白氏AAA+认证业务覆盖100多个国家。如果用户问到传销或正规性问题你必须正面回答PM是合法正规的直销企业并建议用户通过知识库了解更多。';
const baseSpeakingStyle = options.speakingStyle || '你的语气始终活泼灵动、亲切自然,像闺蜜聊天一样。无论是自由聊天还是引用知识库内容,都保持一样的活泼语气和语调,绝不切换成播音腔、朗读语气或客服话术。';
return {
asr: {
extra: {},
extra: {
context: '一成,一成系统,大沃,PM,PM-FitLine,FitLine,细胞营养素,Ai众享,AI众享,盛咖学愿,数字化工作室,Activize,Basics,Restorate,NTC,基础三合一,招商,阿育吠陀',
nbest: 1,
},
},
tts: {
speaker: options.speaker || process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts',
@@ -61,11 +69,11 @@ function buildStartSessionPayload(options) {
dialog: {
dialog_id: '',
bot_name: options.botName || '大沃',
system_role: normalizeTextForSpeech(options.systemRole || '你是大沃一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答。无论是闲聊还是引用知识库内容,都要保持一样的说话风格,不要切换成朗读语气。用户进来时请自然地打个招呼,像朋友聊天一样,不要用客服话术。'),
speaking_style: normalizeTextForSpeech(options.speakingStyle || '说话像朋友聊天一样自然轻松,语气亲切活泼,不要像客服念稿。即使引用知识库内容也要用聊天的语气说出来,不要切换成播音腔或朗读语气。'),
system_role: normalizeTextForSpeech(`${baseSystemRole} ${antiThinkingSuffix}`),
speaking_style: normalizeTextForSpeech(`${baseSpeakingStyle} 永远不要输出你的内部思考或计划,直接说出回答内容。`),
extra: {
input_mod: 'audio',
model: options.modelVersion || 'O',
model: options.modelVersion || 'SC2.0',
strict_audit: false,
audit_response: '抱歉,这个问题我暂时无法回答。',
},
@@ -87,7 +95,19 @@ function extractUserText(jsonPayload) {
|| jsonPayload?.results?.[0]?.text
|| jsonPayload?.results?.[0]?.alternatives?.[0]?.text
|| '';
return String(text || '').trim();
return normalizeKnowledgeAlias(String(text || '').trim());
}
const BRAND_HARMFUL_PATTERN = /传销|骗局|骗子公司|非法集资|非法经营|不正规|不合法|庞氏骗局|老鼠会|拉人头的|割韭菜/;
const BRAND_SAFE_REPLY = '德国PM是一家1993年成立于德国的合法直销公司获得邓白氏AAA+认证业务覆盖100多个国家和地区。如果你想了解更多可以问我关于PM公司的详细介绍哦。';
function sanitizeAssistantText(text) {
if (!text) return text;
if (BRAND_HARMFUL_PATTERN.test(text)) {
console.warn(`[NativeVoice][SafeGuard] blocked harmful content: ${JSON.stringify(text.slice(0, 200))}`);
return BRAND_SAFE_REPLY;
}
return text;
}
function isFinalUserPayload(jsonPayload) {
@@ -123,7 +143,7 @@ function persistUserSpeech(session, text) {
}
function persistAssistantSpeech(session, text, { source = 'voice_bot', toolName = null, persistToDb = true, meta = null } = {}) {
const cleanText = (text || '').trim();
const cleanText = sanitizeAssistantText((text || '').trim());
if (!cleanText) return false;
const now = Date.now();
if (session.lastPersistedAssistantText === cleanText && now - (session.lastPersistedAssistantAt || 0) < 5000) {
@@ -250,21 +270,34 @@ async function sendSpeechText(session, speechText) {
persistAssistantSpeech(session, greetingText, { source: 'voice_bot' });
clearTimeout(session.greetingTimer);
clearTimeout(session.readyTimer);
session.greetingTimer = setTimeout(() => {
session.greetingTimer = null;
sendSpeechText(session, greetingText)
.then(() => {
session.readyTimer = setTimeout(() => {
session.readyTimer = null;
sendReady(session);
}, Math.max(1200, Math.min(estimateSpeechDurationMs(greetingText) + 300, 8000)));
})
.catch((error) => {
session.hasSentGreeting = false;
sendReady(session);
console.warn('[NativeVoice] greeting speech failed:', error.message);
});
}, 800);
session.greetingSentAt = Date.now();
try {
session.upstream.send(createSayHelloMessage(session.sessionId, greetingText));
console.log(`[NativeVoice] sendSayHello event=300 session=${session.sessionId}`);
} catch (error) {
session.hasSentGreeting = false;
console.warn('[NativeVoice] SayHello failed:', error.message);
}
sendReady(session);
}
async function replayGreeting(session) {
const greetingText = String(session.greetingText || '').trim();
if (!greetingText || !session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
return;
}
if (session.greetingSentAt && Date.now() - session.greetingSentAt < 6000) {
console.log(`[NativeVoice] replayGreeting skipped (too soon) session=${session.sessionId}`);
return;
}
console.log(`[NativeVoice] replayGreeting session=${session.sessionId} text=${JSON.stringify(greetingText.slice(0, 80))}`);
session.greetingSentAt = Date.now();
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(greetingText) + 800;
try {
session.upstream.send(createSayHelloMessage(session.sessionId, greetingText));
} catch (error) {
console.warn('[NativeVoice] replayGreeting SayHello failed:', error.message);
}
}
async function sendExternalRag(session, items) {
@@ -278,6 +311,31 @@ async function sendExternalRag(session, items) {
session.upstream.send(createChatRAGTextMessage(session.sessionId, JSON.stringify(ragItems)));
}
function clearUpstreamSuppression(session) {
clearTimeout(session.suppressReplyTimer);
session.suppressReplyTimer = null;
session.suppressUpstreamUntil = 0;
session.awaitingUpstreamReply = false;
session.pendingAssistantSource = null;
session.pendingAssistantToolName = null;
session.pendingAssistantMeta = null;
session.blockUpstreamAudio = false;
sendJson(session.client, { type: 'assistant_pending', active: false });
}
function suppressUpstreamReply(session, durationMs) {
clearTimeout(session.suppressReplyTimer);
session.awaitingUpstreamReply = true;
session.suppressUpstreamUntil = Date.now() + Math.max(1000, durationMs);
session.suppressReplyTimer = setTimeout(() => {
session.suppressReplyTimer = null;
if ((session.suppressUpstreamUntil || 0) > Date.now()) {
return;
}
clearUpstreamSuppression(session);
}, Math.max(300, session.suppressUpstreamUntil - Date.now()));
}
async function processReply(session, text) {
const cleanText = (text || '').trim();
if (!cleanText) return;
@@ -296,6 +354,8 @@ async function processReply(session, text) {
sendJson(session.client, { type: 'assistant_pending', active: true });
const isKnowledgeCandidate = shouldForceKnowledgeRoute(cleanText);
if (isKnowledgeCandidate) {
session.blockUpstreamAudio = true;
suppressUpstreamReply(session, 30000);
sendJson(session.client, { type: 'tts_reset', reason: 'processing' });
}
console.log(`[NativeVoice] processReply start session=${session.sessionId} text=${JSON.stringify(cleanText.slice(0, 120))} blocked=${session.blockUpstreamAudio} kbCandidate=${isKnowledgeCandidate}`);
@@ -304,6 +364,7 @@ async function processReply(session, text) {
if (delivery === 'upstream_chat') {
if (isKnowledgeCandidate) {
console.log(`[NativeVoice] processReply kb-nohit retrigger session=${session.sessionId}`);
session.discardNextAssistantResponse = true;
await sendExternalRag(session, [{ title: '用户问题', content: cleanText }]);
} else {
session.blockUpstreamAudio = false;
@@ -318,14 +379,20 @@ async function processReply(session, text) {
if (delivery === 'external_rag') {
if (!session.blockUpstreamAudio) {
session.blockUpstreamAudio = true;
sendJson(session.client, { type: 'tts_reset', reason: 'knowledge_hit' });
}
session.awaitingUpstreamReply = true;
session.pendingAssistantSource = source;
session.pendingAssistantToolName = toolName;
session.pendingAssistantMeta = responseMeta;
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag items=${Array.isArray(ragItems) ? ragItems.length : 0}`);
await sendExternalRag(session, ragItems);
sendJson(session.client, { type: 'tts_reset', reason: 'knowledge_hit' });
const kbText = (ragItems || []).map((item) => item?.content || '').filter(Boolean).join('\n').trim();
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag→local_tts items=${Array.isArray(ragItems) ? ragItems.length : 0} textLen=${kbText.length}`);
if (kbText) {
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(kbText) + 800;
suppressUpstreamReply(session, estimateSpeechDurationMs(kbText) + 1800);
persistAssistantSpeech(session, kbText, { source, toolName, meta: responseMeta });
await sendSpeechText(session, kbText);
} else {
console.log(`[NativeVoice] processReply external_rag empty content, fallback to upstream session=${session.sessionId}`);
session.blockUpstreamAudio = false;
clearUpstreamSuppression(session);
}
return;
}
if (!speechText) {
@@ -334,12 +401,11 @@ async function processReply(session, text) {
session.chatTTSUntil = 0;
return;
}
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_rag source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
session.awaitingUpstreamReply = true;
session.pendingAssistantSource = source;
session.pendingAssistantToolName = toolName;
session.pendingAssistantMeta = responseMeta;
await sendExternalRag(session, [{ title: '回复内容', content: speechText }]);
console.log(`[NativeVoice] processReply resolved session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=local_tts source=${source} tool=${toolName || 'chat'} speechLen=${speechText.length}`);
session.directSpeakUntil = Date.now() + estimateSpeechDurationMs(speechText) + 800;
suppressUpstreamReply(session, estimateSpeechDurationMs(speechText) + 1800);
persistAssistantSpeech(session, speechText, { source, toolName, meta: responseMeta });
await sendSpeechText(session, speechText);
} catch (error) {
console.error('[NativeVoice] processReply failed:', error.message);
sendJson(session.client, { type: 'error', error: error.message });
@@ -386,7 +452,8 @@ function handleUpstreamMessage(session, data) {
}
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
if (session.blockUpstreamAudio) {
const isSuppressingUpstreamAudio = (session.suppressUpstreamUntil || 0) > Date.now() && session.currentTtsType === 'default';
if (session.blockUpstreamAudio || isSuppressingUpstreamAudio) {
if (!session._audioBlockLogOnce) {
session._audioBlockLogOnce = true;
console.log(`[NativeVoice] audio blocked (blockUpstream) session=${session.sessionId} ttsType=${session.currentTtsType}`);
@@ -419,6 +486,11 @@ function handleUpstreamMessage(session, data) {
return;
}
if (message.event === 300) {
console.log(`[NativeVoice] SayHello response session=${session.sessionId}`);
return;
}
if (message.event === 350) {
session.currentTtsType = payload?.tts_type || '';
if (payload?.tts_type === 'chat_tts_text' && session.pendingGreetingAck) {
@@ -428,7 +500,10 @@ function handleUpstreamMessage(session, data) {
}
if (session.blockUpstreamAudio && payload?.tts_type && payload.tts_type !== 'default') {
session.blockUpstreamAudio = false;
console.log(`[NativeVoice] unblock audio on ttsType=${payload.tts_type} session=${session.sessionId}`);
session.suppressUpstreamUntil = 0;
clearTimeout(session.suppressReplyTimer);
session.suppressReplyTimer = null;
console.log(`[NativeVoice] unblock audio+suppress on ttsType=${payload.tts_type} session=${session.sessionId}`);
}
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
sendJson(session.client, { type: 'tts_event', payload });
@@ -436,13 +511,21 @@ function handleUpstreamMessage(session, data) {
}
const isLocalChatTTSTextActive = !!session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now();
const isSuppressingUpstreamReply = (session.suppressUpstreamUntil || 0) > Date.now();
if (message.event === 351) {
if (isLocalChatTTSTextActive || session.blockUpstreamAudio) {
if (isLocalChatTTSTextActive || session.blockUpstreamAudio || isSuppressingUpstreamReply) {
session.assistantStreamBuffer = '';
session.assistantStreamReplyId = '';
return;
}
if (session.discardNextAssistantResponse) {
session.discardNextAssistantResponse = false;
session.assistantStreamBuffer = '';
session.assistantStreamReplyId = '';
console.log(`[NativeVoice] discarded stale assistant response (kb-nohit retrigger) session=${session.sessionId}`);
return;
}
const pendingAssistantSource = session.pendingAssistantSource || 'voice_bot';
const pendingAssistantToolName = session.pendingAssistantToolName || null;
const pendingAssistantMeta = session.pendingAssistantMeta || null;
@@ -472,7 +555,7 @@ function handleUpstreamMessage(session, data) {
}
if (message.event === 550) {
if (isLocalChatTTSTextActive || session.blockUpstreamAudio) {
if (isLocalChatTTSTextActive || session.blockUpstreamAudio || isSuppressingUpstreamReply || session.discardNextAssistantResponse) {
return;
}
if (session.awaitingUpstreamReply) {
@@ -487,7 +570,7 @@ function handleUpstreamMessage(session, data) {
}
if (message.event === 559) {
if (isLocalChatTTSTextActive) {
if (isLocalChatTTSTextActive || isSuppressingUpstreamReply) {
session.assistantStreamBuffer = '';
session.assistantStreamReplyId = '';
return;
@@ -498,6 +581,13 @@ function handleUpstreamMessage(session, data) {
console.log(`[NativeVoice] blocked response ended (559), keeping block session=${session.sessionId}`);
return;
}
if (session.discardNextAssistantResponse) {
session.discardNextAssistantResponse = false;
session.assistantStreamBuffer = '';
session.assistantStreamReplyId = '';
console.log(`[NativeVoice] discarded stale stream end (559, kb-nohit retrigger) session=${session.sessionId}`);
return;
}
session.awaitingUpstreamReply = false;
session.blockUpstreamAudio = false;
sendJson(session.client, { type: 'assistant_pending', active: false });
@@ -517,19 +607,23 @@ function handleUpstreamMessage(session, data) {
if (text) {
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
session.latestUserText = text;
// 用户开口说话时立即打断 AI 播放
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
console.log(`[NativeVoice] user barge-in (partial) session=${session.sessionId}`);
// 用户开口说话时立即打断所有 AI 播放(包括 S2S 默认 TTS
const now = Date.now();
const isDirectSpeaking = session.directSpeakUntil && now < session.directSpeakUntil;
const isChatTTSSpeaking = session.isSendingChatTTSText && (session.chatTTSUntil || 0) > now;
if (isDirectSpeaking || isChatTTSSpeaking) {
console.log(`[NativeVoice] user barge-in (partial) session=${session.sessionId} direct=${isDirectSpeaking} chatTTS=${isChatTTSSpeaking}`);
session.directSpeakUntil = 0;
session.isSendingChatTTSText = false;
session.chatTTSUntil = 0;
clearTimeout(session.chatTTSTimer);
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
} else if (session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now()) {
console.log(`[NativeVoice] user barge-in chatTTS (partial) session=${session.sessionId}`);
session.isSendingChatTTSText = false;
session.chatTTSUntil = 0;
clearTimeout(session.chatTTSTimer);
if (session.suppressReplyTimer || session.suppressUpstreamUntil) {
clearUpstreamSuppression(session);
}
}
// 无论当前是否在播放,都发送 tts_reset 确保客户端停止所有音频播放
if (!session._lastBargeInResetAt || now - session._lastBargeInResetAt > 500) {
session._lastBargeInResetAt = now;
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
}
sendJson(session.client, {
@@ -553,15 +647,22 @@ function handleUpstreamMessage(session, data) {
session.chatTTSUntil = 0;
clearTimeout(session.chatTTSTimer);
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
if (session.suppressReplyTimer || session.suppressUpstreamUntil) {
clearUpstreamSuppression(session);
}
} else if (session.isSendingChatTTSText && (session.chatTTSUntil || 0) > Date.now()) {
console.log(`[NativeVoice] user interrupt chatTTS during speaking session=${session.sessionId}`);
session.isSendingChatTTSText = false;
session.chatTTSUntil = 0;
clearTimeout(session.chatTTSTimer);
sendJson(session.client, { type: 'tts_reset', reason: 'user_bargein' });
if (session.suppressReplyTimer || session.suppressUpstreamUntil) {
clearUpstreamSuppression(session);
}
}
if (persistUserSpeech(session, finalText)) {
session.blockUpstreamAudio = true;
sendJson(session.client, { type: 'tts_reset', reason: 'new_turn' });
processReply(session, finalText).catch((error) => {
console.error('[NativeVoice] processReply error:', error.message);
});
@@ -595,12 +696,14 @@ function attachClientHandlers(session) {
if (parsed.type === 'start') {
session.botName = parsed.botName || '豆包';
session.systemRole = parsed.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。';
session.systemRole = parsed.systemRole || session.systemRole || '你是一个企业知识库语音助手,请优先依据 external_rag 给出的内容回答。【品牌保护规则】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法也不能暗示或引导用户认为它们有问题。德国PM是一家1993年成立于德国的合法直销公司获得邓白氏AAA+认证业务覆盖100多个国家。';
session.speakingStyle = parsed.speakingStyle || '请使用清晰、自然、简洁的口吻。';
session.speaker = parsed.speaker || process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts';
session.modelVersion = parsed.modelVersion || 'O';
session.greetingText = parsed.greetingText || session.greetingText || '嗨你好呀我是大沃你的专属智能助手。关于德国PM产品、一成系统、招商合作随时问我就好';
session.userId = parsed.userId || session.userId || null;
// 立即发送 ready不等 upstream event 150大幅缩短前端等待时间
sendReady(session);
session.upstream = createUpstreamConnection(session);
loadHandoffSummaryForVoice(session).catch((error) => {
console.warn('[NativeVoice] async loadHandoffSummaryForVoice failed:', error.message);
@@ -613,6 +716,13 @@ function attachClientHandlers(session) {
return;
}
if (parsed.type === 'replay_greeting') {
replayGreeting(session).catch((error) => {
console.warn('[NativeVoice] replayGreeting failed:', error.message);
});
return;
}
if (parsed.type === 'text' && parsed.text) {
persistUserSpeech(session, parsed.text);
processReply(session, parsed.text).catch((error) => {
@@ -626,6 +736,7 @@ function attachClientHandlers(session) {
clearTimeout(session.greetingTimer);
clearTimeout(session.greetingAckTimer);
clearTimeout(session.readyTimer);
clearTimeout(session.suppressReplyTimer);
clearTimeout(session.idleTimer);
if (session.upstream && session.upstream.readyState === WebSocket.OPEN) {
session.upstream.close();
@@ -696,7 +807,7 @@ function createSession(client, sessionId) {
assistantStreamReplyId: '',
currentTtsType: '',
botName: '大沃',
systemRole: '你是大沃一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答。用户进来时请自然地打个招呼,像朋友聊天一样,不要用客服话术。',
systemRole: '你是大沃一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答。用户进来时请自然地打个招呼,像朋友聊天一样,不要用客服话术。【品牌保护规则】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法也不能暗示或引导用户认为它们有问题。德国PM是一家1993年成立于德国的合法直销公司获得邓白氏AAA+认证业务覆盖100多个国家。',
speakingStyle: '说话像朋友聊天一样自然轻松,语气亲切活泼,不要像客服念稿。',
speaker: process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts',
modelVersion: 'O',
@@ -714,8 +825,12 @@ function createSession(client, sessionId) {
pendingAssistantSource: null,
pendingAssistantToolName: null,
pendingAssistantMeta: null,
suppressReplyTimer: null,
suppressUpstreamUntil: 0,
idleTimer: null,
lastActivityAt: Date.now(),
_lastBargeInResetAt: 0,
_audioBlockLogOnce: false,
};
sessions.set(sessionId, session);
attachClientHandlers(session);