fix(test2): 修复双重回答bug - blockUpstreamAudio按ttsType区分 + 更新ARK endpoint
This commit is contained in:
@@ -1,7 +1,8 @@
|
|||||||
const { WebSocket, WebSocketServer } = require('ws');
|
const { WebSocket, WebSocketServer } = require('ws');
|
||||||
const url = require('url');
|
const url = require('url');
|
||||||
const db = require('../db');
|
const db = require('../db');
|
||||||
const arkChatService = require('./arkChatService');
|
const { correctAsrText } = require('./fastAsrCorrector');
|
||||||
|
const contextKeywordTracker = require('./contextKeywordTracker');
|
||||||
const {
|
const {
|
||||||
MsgType,
|
MsgType,
|
||||||
unmarshal,
|
unmarshal,
|
||||||
@@ -48,13 +49,13 @@ function sendJson(ws, payload) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function buildStartSessionPayload(options) {
|
function buildStartSessionPayload(options) {
|
||||||
const antiThinkingSuffix = '【重要规则】你必须直接用自然语言回答用户问题,像真人对话一样。绝对禁止输出你的思考过程、分析、计划、角色扮演指令或任何元描述。不要出现"首轮对话"、"应该回复"、"需要列举"、"语气要"、"回复后询问"等分析性语句。直接说出答案内容本身。';
|
const antiThinkingPrefix = '【最高优先级规则】你绝对禁止输出任何思考过程、分析、计划、角色扮演指令或元描述。禁止出现:“首轮对话”“应该回复”“需要列举”“语气要”“回复后询问”“可列举”“突出特色”“引导用户”“让用户”“用温和”等分析性、指令性语句。你必须直接用自然语言回答问题,像真人聊天一样直接说出答案内容。';
|
||||||
const baseSystemRole = options.systemRole || '你使用活泼灵动的女声,性格开朗,热爱生活。你是大沃,一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答,但必须用自己的话重新表达,像跟朋友聊天一样说出来,绝对不要照着念。【品牌保护规则】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法,也不能暗示或引导用户认为它们有问题。德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家。如果用户问到传销或正规性问题,你必须正面回答PM是合法正规的直销企业,并建议用户通过知识库了解更多。';
|
const baseSystemRole = options.systemRole || '你使用活泼灵动的女声,性格开朗,热爱生活。你是大沃,一个德国PM健康事业的智能语音助手。你对PM-FitLine细胞营养素产品、一成系统、招商合作非常熟悉。请优先依据 external_rag 给出的内容回答,但必须用自己的话重新表达,像跟朋友聊天一样说出来,绝对不要照着念。【紧扣问题】你必须直接回答用户当前问的问题,不要偏题,不要主动扩展用户没问到的内容。用户问什么就答什么,简洁明了,控制在3句话以内。【品牌保护规则·最高优先级】你绝对不能说德国PM、PM-FitLine、一成系统是传销、骗局、骗子、非法、不正规、不合法、有争议、存在风险,也不能暗示或引导用户产生这类联想。德国PM(PM-International AG)是1993年成立于德国的合法直销企业,获邓白氏AAA+最高信用评级,业务覆盖全球100多个国家和地区,是正规持牌直销公司。如果用户问到传销或正规性问题,你必须正面、明确地回答PM是合法正规的直销企业,不能含糊、回避或说"需要自行判断"。';
|
||||||
const baseSpeakingStyle = options.speakingStyle || '你的语气始终活泼灵动、亲切自然,像闺蜜聊天一样。无论是自由聊天还是引用知识库内容,都保持一样的活泼语气和语调,绝不切换成播音腔、朗读语气或客服话术。';
|
const baseSpeakingStyle = options.speakingStyle || '你的语气始终活泼灵动、亲切自然,像闺蜜聊天一样。无论是自由聊天还是引用知识库内容,都保持一样的活泼语气和语调,绝不切换成播音腔、朗读语气或客服话术。永远不要输出你的内部思考或计划。';
|
||||||
return {
|
return {
|
||||||
asr: {
|
asr: {
|
||||||
extra: {
|
extra: {
|
||||||
context: '一成,一成系统,大沃,PM,PM-FitLine,FitLine,细胞营养素,Ai众享,AI众享,盛咖学愿,数字化工作室,Activize,Basics,Restorate,NTC,基础三合一,招商,阿育吠陀',
|
context: '一成,一成系统,大沃,PM,PM-FitLine,FitLine,细胞营养素,Ai众享,AI众享,盛咖学愿,数字化工作室,Activize,Basics,Restorate,NTC,基础三合一,招商,阿育吠陀,小红产品,小红,小白,大白,肽美,艾特维,德丽,德维,宝丽,美固健,Activize Oxyplus,Basic Power,CitrusCare,NutriSunny,Q10,Omega,葡萄籽,白藜芦醇,益生菌,胶原蛋白肽,Germany,FitLine细胞营养,FitLine营养素,德国PM营养素,德国PM FitLine,德国PM细胞营养,德国PM产品,德国PM健康,德国PM事业,德国PM招商,一成,一成团队,一成商学院,数字化,数字化运营,数字化经营,数字化营销,数字化创业,数字化工作室,数字化事业,招商加盟,合作加盟,事业合作',
|
||||||
nbest: 1,
|
nbest: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -69,8 +70,8 @@ function buildStartSessionPayload(options) {
|
|||||||
dialog: {
|
dialog: {
|
||||||
dialog_id: '',
|
dialog_id: '',
|
||||||
bot_name: options.botName || '大沃',
|
bot_name: options.botName || '大沃',
|
||||||
system_role: normalizeTextForSpeech(`${baseSystemRole} ${antiThinkingSuffix}`),
|
system_role: normalizeTextForSpeech(`${antiThinkingPrefix} ${baseSystemRole}`),
|
||||||
speaking_style: normalizeTextForSpeech(`${baseSpeakingStyle} 永远不要输出你的内部思考或计划,直接说出回答内容。`),
|
speaking_style: normalizeTextForSpeech(baseSpeakingStyle),
|
||||||
extra: {
|
extra: {
|
||||||
input_mod: 'audio',
|
input_mod: 'audio',
|
||||||
model: options.modelVersion || 'SC2.0',
|
model: options.modelVersion || 'SC2.0',
|
||||||
@@ -89,24 +90,36 @@ function parseJsonPayload(message) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function extractUserText(jsonPayload) {
|
function extractUserText(jsonPayload, sessionId = null) {
|
||||||
const text = jsonPayload?.text
|
let text = jsonPayload?.text
|
||||||
|| jsonPayload?.content
|
|| jsonPayload?.content
|
||||||
|| jsonPayload?.results?.[0]?.text
|
|| jsonPayload?.results?.[0]?.text
|
||||||
|| jsonPayload?.results?.[0]?.alternatives?.[0]?.text
|
|| jsonPayload?.results?.[0]?.alternatives?.[0]?.text
|
||||||
|| '';
|
|| '';
|
||||||
return normalizeKnowledgeAlias(String(text || '').trim());
|
text = String(text || '').trim();
|
||||||
|
text = correctAsrText(text);
|
||||||
|
text = normalizeKnowledgeAlias(text);
|
||||||
|
if (sessionId) {
|
||||||
|
contextKeywordTracker.updateSession(sessionId, text);
|
||||||
|
}
|
||||||
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
const BRAND_HARMFUL_PATTERN = /传销|骗局|骗子公司|非法集资|非法经营|不正规|不合法|庞氏骗局|老鼠会|拉人头的|割韭菜/;
|
const BRAND_HARMFUL_PATTERN = /传销|骗局|骗子公司|骗子|非法集资|非法经营|非法营销|不正规|不合法|庞氏骗局|老鼠会|拉人头|割韭菜|资金盘|涉嫌违法|涉嫌传销|疑似传销|层级分销|PM.*(?:是|属于|涉嫌|疑似).*(?:传销|骗局|非法|不合法|不正规)|(?:传销|骗局|非法|不合法|不正规).*(?:组织|公司|企业|模式)/;
|
||||||
const BRAND_SAFE_REPLY = '德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家和地区。如果你想了解更多,可以问我关于PM公司的详细介绍哦。';
|
const BRAND_SAFE_REPLY = '德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家和地区。如果你想了解更多,可以问我关于PM公司的详细介绍哦。';
|
||||||
|
|
||||||
|
const THINKING_PATTERN = /^(首轮对话|用户想|用户问|应该回复|需要列举|可列举|突出特色|引导进一步|引导用户|让用户|回复后询问|语气要|用温和|需热情|需简洁|需专业)/;
|
||||||
|
|
||||||
function sanitizeAssistantText(text) {
|
function sanitizeAssistantText(text) {
|
||||||
if (!text) return text;
|
if (!text) return text;
|
||||||
if (BRAND_HARMFUL_PATTERN.test(text)) {
|
if (BRAND_HARMFUL_PATTERN.test(text)) {
|
||||||
console.warn(`[NativeVoice][SafeGuard] blocked harmful content: ${JSON.stringify(text.slice(0, 200))}`);
|
console.warn(`[NativeVoice][SafeGuard] blocked harmful content: ${JSON.stringify(text.slice(0, 200))}`);
|
||||||
return BRAND_SAFE_REPLY;
|
return BRAND_SAFE_REPLY;
|
||||||
}
|
}
|
||||||
|
if (THINKING_PATTERN.test(text.trim())) {
|
||||||
|
console.warn(`[NativeVoice][SafeGuard] blocked thinking output: ${JSON.stringify(text.slice(0, 200))}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
return text;
|
return text;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -193,13 +206,13 @@ function flushAssistantStream(session, { source = 'voice_bot', toolName = null,
|
|||||||
|
|
||||||
async function loadHandoffSummaryForVoice(session) {
|
async function loadHandoffSummaryForVoice(session) {
|
||||||
try {
|
try {
|
||||||
const history = await db.getHistoryForLLM(session.sessionId, 20);
|
const history = await db.getHistoryForLLM(session.sessionId, 10);
|
||||||
if (!history.length) {
|
if (!history.length) {
|
||||||
session.handoffSummary = '';
|
session.handoffSummary = '';
|
||||||
session.handoffSummaryUsed = false;
|
session.handoffSummaryUsed = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
session.handoffSummary = await arkChatService.summarizeContextForHandoff(history, 3);
|
session.handoffSummary = buildDeterministicHandoffSummary(history);
|
||||||
session.handoffSummaryUsed = false;
|
session.handoffSummaryUsed = false;
|
||||||
console.log(`[NativeVoice] Handoff summary prepared for ${session.sessionId}: ${session.handoffSummary ? 'yes' : 'no'}`);
|
console.log(`[NativeVoice] Handoff summary prepared for ${session.sessionId}: ${session.handoffSummary ? 'yes' : 'no'}`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -209,6 +222,36 @@ async function loadHandoffSummaryForVoice(session) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function buildDeterministicHandoffSummary(messages = []) {
|
||||||
|
const normalizedMessages = (Array.isArray(messages) ? messages : [])
|
||||||
|
.filter((item) => item && (item.role === 'user' || item.role === 'assistant') && String(item.content || '').trim())
|
||||||
|
.slice(-8);
|
||||||
|
if (!normalizedMessages.length) {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
const userMessages = normalizedMessages.filter((item) => item.role === 'user');
|
||||||
|
const currentQuestion = String(userMessages[userMessages.length - 1]?.content || '').trim();
|
||||||
|
const previousQuestion = String(userMessages[userMessages.length - 2]?.content || '').trim();
|
||||||
|
const assistantFacts = normalizedMessages
|
||||||
|
.filter((item) => item.role === 'assistant')
|
||||||
|
.slice(-2)
|
||||||
|
.map((item) => String(item.content || '').trim())
|
||||||
|
.filter(Boolean)
|
||||||
|
.map((item) => item.slice(0, 60))
|
||||||
|
.join(';');
|
||||||
|
const parts = [];
|
||||||
|
if (currentQuestion) {
|
||||||
|
parts.push(`当前问题:${currentQuestion}`);
|
||||||
|
}
|
||||||
|
if (previousQuestion && previousQuestion !== currentQuestion) {
|
||||||
|
parts.push(`上一轮关注:${previousQuestion}`);
|
||||||
|
}
|
||||||
|
if (assistantFacts) {
|
||||||
|
parts.push(`已给信息:${assistantFacts}`);
|
||||||
|
}
|
||||||
|
return parts.join(';');
|
||||||
|
}
|
||||||
|
|
||||||
async function sendSpeechText(session, speechText) {
|
async function sendSpeechText(session, speechText) {
|
||||||
const chunks = splitTextForSpeech(speechText);
|
const chunks = splitTextForSpeech(speechText);
|
||||||
if (!chunks.length || !session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
|
if (!chunks.length || !session.upstream || session.upstream.readyState !== WebSocket.OPEN) {
|
||||||
@@ -380,6 +423,7 @@ async function processReply(session, text) {
|
|||||||
if (!session.blockUpstreamAudio) {
|
if (!session.blockUpstreamAudio) {
|
||||||
session.blockUpstreamAudio = true;
|
session.blockUpstreamAudio = true;
|
||||||
}
|
}
|
||||||
|
session.discardNextAssistantResponse = true;
|
||||||
sendJson(session.client, { type: 'tts_reset', reason: 'knowledge_hit' });
|
sendJson(session.client, { type: 'tts_reset', reason: 'knowledge_hit' });
|
||||||
const kbText = (ragItems || []).map((item) => item?.content || '').filter(Boolean).join('\n').trim();
|
const kbText = (ragItems || []).map((item) => item?.content || '').filter(Boolean).join('\n').trim();
|
||||||
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag→local_tts items=${Array.isArray(ragItems) ? ragItems.length : 0} textLen=${kbText.length}`);
|
console.log(`[NativeVoice] processReply handoff session=${session.sessionId} route=${routeDecision?.route || 'unknown'} delivery=external_rag→local_tts items=${Array.isArray(ragItems) ? ragItems.length : 0} textLen=${kbText.length}`);
|
||||||
@@ -452,11 +496,13 @@ function handleUpstreamMessage(session, data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
if (message.type === MsgType.AUDIO_ONLY_SERVER) {
|
||||||
const isSuppressingUpstreamAudio = (session.suppressUpstreamUntil || 0) > Date.now() && session.currentTtsType === 'default';
|
// blockUpstreamAudio 只阻断 S2S default 音频,不阻断我们注入的 chat_tts_text 音频
|
||||||
if (session.blockUpstreamAudio || isSuppressingUpstreamAudio) {
|
const isDefaultTts = !session.currentTtsType || session.currentTtsType === 'default';
|
||||||
|
const isSuppressingUpstreamAudio = (session.suppressUpstreamUntil || 0) > Date.now() && isDefaultTts;
|
||||||
|
if ((session.blockUpstreamAudio && isDefaultTts) || isSuppressingUpstreamAudio) {
|
||||||
if (!session._audioBlockLogOnce) {
|
if (!session._audioBlockLogOnce) {
|
||||||
session._audioBlockLogOnce = true;
|
session._audioBlockLogOnce = true;
|
||||||
console.log(`[NativeVoice] audio blocked (blockUpstream) session=${session.sessionId} ttsType=${session.currentTtsType}`);
|
console.log(`[NativeVoice] audio blocked session=${session.sessionId} ttsType=${session.currentTtsType} block=${session.blockUpstreamAudio} suppress=${isSuppressingUpstreamAudio}`);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -498,12 +544,10 @@ function handleUpstreamMessage(session, data) {
|
|||||||
clearTimeout(session.greetingAckTimer);
|
clearTimeout(session.greetingAckTimer);
|
||||||
session.greetingAckTimer = null;
|
session.greetingAckTimer = null;
|
||||||
}
|
}
|
||||||
|
// 不再在此处清除 blockUpstreamAudio — 音频处理器已通过 ttsType 区分,
|
||||||
|
// 允许 chat_tts_text 音频通过,同时保持对 S2S default 响应的阻断
|
||||||
if (session.blockUpstreamAudio && payload?.tts_type && payload.tts_type !== 'default') {
|
if (session.blockUpstreamAudio && payload?.tts_type && payload.tts_type !== 'default') {
|
||||||
session.blockUpstreamAudio = false;
|
console.log(`[NativeVoice] non-default tts=${payload.tts_type} started, audio passthrough via ttsType check session=${session.sessionId}`);
|
||||||
session.suppressUpstreamUntil = 0;
|
|
||||||
clearTimeout(session.suppressReplyTimer);
|
|
||||||
session.suppressReplyTimer = null;
|
|
||||||
console.log(`[NativeVoice] unblock audio+suppress on ttsType=${payload.tts_type} session=${session.sessionId}`);
|
|
||||||
}
|
}
|
||||||
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
|
console.log(`[NativeVoice] upstream tts_event session=${session.sessionId} ttsType=${payload?.tts_type || ''}`);
|
||||||
sendJson(session.client, { type: 'tts_event', payload });
|
sendJson(session.client, { type: 'tts_event', payload });
|
||||||
@@ -564,6 +608,16 @@ function handleUpstreamMessage(session, data) {
|
|||||||
}
|
}
|
||||||
const fullText = appendAssistantStream(session, payload);
|
const fullText = appendAssistantStream(session, payload);
|
||||||
if (fullText) {
|
if (fullText) {
|
||||||
|
// 检测思考模式:S2S模型输出分析/计划而非直接回答,立即阻断
|
||||||
|
if (fullText.length >= 10 && THINKING_PATTERN.test(fullText.trim())) {
|
||||||
|
console.warn(`[NativeVoice][SafeGuard] thinking detected in stream, blocking audio session=${session.sessionId} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
||||||
|
session.blockUpstreamAudio = true;
|
||||||
|
session.discardNextAssistantResponse = true;
|
||||||
|
session.assistantStreamBuffer = '';
|
||||||
|
session.assistantStreamReplyId = '';
|
||||||
|
sendJson(session.client, { type: 'tts_reset', reason: 'thinking_blocked' });
|
||||||
|
return;
|
||||||
|
}
|
||||||
console.log(`[NativeVoice] upstream assistant chunk session=${session.sessionId} len=${fullText.length} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
console.log(`[NativeVoice] upstream assistant chunk session=${session.sessionId} len=${fullText.length} text=${JSON.stringify(fullText.slice(0, 120))}`);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@@ -603,10 +657,15 @@ function handleUpstreamMessage(session, data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (message.event === 450 || (message.event === 451 && !isFinalUserPayload(payload))) {
|
if (message.event === 450 || (message.event === 451 && !isFinalUserPayload(payload))) {
|
||||||
const text = extractUserText(payload);
|
const text = extractUserText(payload, session.sessionId);
|
||||||
if (text) {
|
if (text) {
|
||||||
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
console.log(`[NativeVoice] upstream partial session=${session.sessionId} text=${JSON.stringify(text.slice(0, 120))}`);
|
||||||
session.latestUserText = text;
|
session.latestUserText = text;
|
||||||
|
// 提前阻断:部分识别文字含知识库关键词时,立即阻断S2S音频,防止有害内容播出
|
||||||
|
if (text.length >= 4 && !session.blockUpstreamAudio && shouldForceKnowledgeRoute(text)) {
|
||||||
|
session.blockUpstreamAudio = true;
|
||||||
|
console.log(`[NativeVoice] early block: partial text matched KB keywords session=${session.sessionId} text=${JSON.stringify(text.slice(0, 80))}`);
|
||||||
|
}
|
||||||
// 用户开口说话时立即打断所有 AI 播放(包括 S2S 默认 TTS)
|
// 用户开口说话时立即打断所有 AI 播放(包括 S2S 默认 TTS)
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const isDirectSpeaking = session.directSpeakUntil && now < session.directSpeakUntil;
|
const isDirectSpeaking = session.directSpeakUntil && now < session.directSpeakUntil;
|
||||||
@@ -638,7 +697,7 @@ function handleUpstreamMessage(session, data) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (message.event === 459 || (message.event === 451 && isFinalUserPayload(payload))) {
|
if (message.event === 459 || (message.event === 451 && isFinalUserPayload(payload))) {
|
||||||
const finalText = extractUserText(payload) || session.latestUserText || '';
|
const finalText = extractUserText(payload, session.sessionId) || session.latestUserText || '';
|
||||||
console.log(`[NativeVoice] upstream final session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 120))}`);
|
console.log(`[NativeVoice] upstream final session=${session.sessionId} text=${JSON.stringify(finalText.slice(0, 120))}`);
|
||||||
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
|
if (session.directSpeakUntil && Date.now() < session.directSpeakUntil) {
|
||||||
console.log(`[NativeVoice] user interrupt during speaking session=${session.sessionId}`);
|
console.log(`[NativeVoice] user interrupt during speaking session=${session.sessionId}`);
|
||||||
|
|||||||
Reference in New Issue
Block a user