2026-03-13 13:06:46 +08:00
const { WebSocket , WebSocketServer } = require ( 'ws' ) ;
const url = require ( 'url' ) ;
const db = require ( '../db' ) ;
2026-03-18 16:32:27 +08:00
const { correctAsrText } = require ( './fastAsrCorrector' ) ;
const contextKeywordTracker = require ( './contextKeywordTracker' ) ;
2026-03-23 13:58:41 +08:00
const { isBrandHarmful , getVoiceSafeReply , BRAND _HARMFUL _PATTERN , BRAND _POSITIVE _LEGALITY _PATTERN } = require ( './contentSafeGuard' ) ;
2026-03-13 13:06:46 +08:00
const {
MsgType ,
unmarshal ,
createStartConnectionMessage ,
createStartSessionMessage ,
createAudioMessage ,
createChatTTSTextMessage ,
2026-03-17 11:00:09 +08:00
createSayHelloMessage ,
2026-03-13 13:06:46 +08:00
createChatRAGTextMessage ,
} = require ( './realtimeDialogProtocol' ) ;
const {
getRuleBasedDirectRouteDecision ,
2026-03-17 11:00:09 +08:00
normalizeKnowledgeAlias ,
2026-03-13 13:06:46 +08:00
normalizeTextForSpeech ,
splitTextForSpeech ,
estimateSpeechDurationMs ,
2026-03-16 14:43:51 +08:00
shouldForceKnowledgeRoute ,
2026-03-31 09:46:40 +08:00
isPureChitchat ,
2026-03-13 13:06:46 +08:00
resolveReply ,
} = require ( './realtimeDialogRouting' ) ;
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
const ToolExecutor = require ( './toolExecutor' ) ;
2026-03-23 13:58:41 +08:00
const {
DEFAULT _VOICE _ASSISTANT _PROFILE ,
2026-04-03 10:19:16 +08:00
DEFAULT _CONSULTANT _CONTACT ,
2026-03-23 13:58:41 +08:00
resolveAssistantProfile ,
2026-04-03 10:19:16 +08:00
getAssistantDisplayName ,
2026-03-23 13:58:41 +08:00
buildVoiceSystemRole ,
buildVoiceGreeting ,
} = require ( './assistantProfileConfig' ) ;
2026-03-24 17:19:36 +08:00
const { getAssistantProfile } = require ( './assistantProfileService' ) ;
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
const redisClient = require ( './redisClient' ) ;
2026-04-03 10:19:16 +08:00
const { checkProductLinkTrigger } = require ( './productLinkTrigger' ) ;
const { triggerSummarizeIfNeeded , persistFinalSummary } = require ( './conversationSummarizer' ) ;
2026-03-13 13:06:46 +08:00
const sessions = new Map ( ) ;
2026-04-03 10:19:16 +08:00
const CONSULTANT _REFERRAL _PATTERN = /咨询(?:专业|你的)?顾问|健康管理顾问|联系顾问|一对一指导|咨询专业|咨询医生|咨询营养师|咨询专业人士|建议.*咨询|问问医生|问问.*营养师/ ;
2026-03-16 14:43:51 +08:00
const IDLE _TIMEOUT _MS = 5 * 60 * 1000 ;
2026-03-31 09:46:40 +08:00
const AUDIO _KEEPALIVE _INTERVAL _MS = 20 * 1000 ;
// 3200 bytes ≈ 66ms of silence at 24kHz s16le mono (larger frame to ensure S2S acceptance)
const SILENT _AUDIO _FRAME = Buffer . alloc ( 3200 , 0 ) ;
2026-03-16 14:43:51 +08:00
2026-04-03 10:19:16 +08:00
const DEFAULT _VOICE _BOT _NAME = getAssistantDisplayName ( DEFAULT _VOICE _ASSISTANT _PROFILE ) ;
2026-03-23 13:58:41 +08:00
const DEFAULT _VOICE _SYSTEM _ROLE = buildVoiceSystemRole ( ) ;
const DEFAULT _VOICE _SPEAKING _STYLE = '整体语气亲切自然、轻快有温度,像熟悉行业的朋友在语音聊天。优先短句和口语化表达,先给结论,再补一句最有帮助的信息。不要播音腔,不要念稿,不要客服腔,不要过度热情,也不要输出任何思考过程。' ;
const DEFAULT _VOICE _GREETING = buildVoiceGreeting ( ) ;
2026-03-16 14:43:51 +08:00
function resetIdleTimer ( session ) {
clearTimeout ( session . idleTimer ) ;
session . lastActivityAt = Date . now ( ) ;
session . idleTimer = setTimeout ( ( ) => {
session . idleTimer = null ;
console . log ( ` [NativeVoice] idle timeout ( ${ IDLE _TIMEOUT _MS / 1000 } s) session= ${ session . sessionId } ` ) ;
sendJson ( session . client , { type : 'idle_timeout' , timeout : IDLE _TIMEOUT _MS } ) ;
setTimeout ( ( ) => {
if ( session . client && session . client . readyState === WebSocket . OPEN ) {
session . client . close ( ) ;
}
} , 2000 ) ;
} , IDLE _TIMEOUT _MS ) ;
}
2026-03-31 09:46:40 +08:00
function startAudioKeepalive ( session ) {
clearInterval ( session . _audioKeepaliveTimer ) ;
session . _audioKeepaliveTimer = setInterval ( ( ) => {
if ( session . upstream && session . upstream . readyState === WebSocket . OPEN && session . upstreamReady ) {
session . upstream . send ( createAudioMessage ( session . sessionId , SILENT _AUDIO _FRAME ) ) ;
console . log ( ` [NativeVoice] audio keepalive sent session= ${ session . sessionId } ` ) ;
} else {
console . log ( ` [NativeVoice] audio keepalive skipped session= ${ session . sessionId } ready= ${ session . upstreamReady } wsState= ${ session . upstream ? session . upstream . readyState : 'null' } ` ) ;
}
} , AUDIO _KEEPALIVE _INTERVAL _MS ) ;
}
function resetAudioKeepalive ( session ) {
if ( session . _audioKeepaliveTimer ) {
clearInterval ( session . _audioKeepaliveTimer ) ;
startAudioKeepalive ( session ) ;
}
}
2026-03-13 13:06:46 +08:00
function sendJson ( ws , payload ) {
if ( ws && ws . readyState === WebSocket . OPEN ) {
ws . send ( JSON . stringify ( payload ) ) ;
}
}
function buildStartSessionPayload ( options ) {
2026-03-18 16:32:27 +08:00
const antiThinkingPrefix = '【最高优先级规则】你绝对禁止输出任何思考过程、分析、计划、角色扮演指令或元描述。禁止出现:“首轮对话”“应该回复”“需要列举”“语气要”“回复后询问”“可列举”“突出特色”“引导用户”“让用户”“用温和”等分析性、指令性语句。你必须直接用自然语言回答问题,像真人聊天一样直接说出答案内容。' ;
2026-03-23 13:58:41 +08:00
const baseSystemRole = options . systemRole || DEFAULT _VOICE _SYSTEM _ROLE ;
const baseSpeakingStyle = options . speakingStyle || DEFAULT _VOICE _SPEAKING _STYLE ;
2026-03-13 13:06:46 +08:00
return {
asr : {
2026-03-17 11:00:09 +08:00
extra : {
2026-03-31 09:46:40 +08:00
context : '一成,一成系统,大沃,PM,PM-FitLine,FitLine,细胞营养素,Ai众享,AI众享,盛咖学愿,数字化工作室,Activize,Basics,Restorate,NTC,基础三合一,基础套装,招商,阿育吠陀,小红产品,小红,小白,大白,肽美,艾特维,德丽,德维,宝丽,美固健,Activize Oxyplus,Basic Power,CitrusCare,NutriSunny,Q10,Omega,葡萄籽,白藜芦醇,益生菌,胶原蛋白肽,Germany,FitLine细胞营养,FitLine营养素,德国PM营养素,德国PM FitLine,德国PM细胞营养,德国PM产品,德国PM健康,德国PM事业,德国PM招商,一成,一成团队,一成商学院,数字化,数字化运营,数字化经营,数字化营销,数字化创业,数字化工作室,数字化事业,招商加盟,合作加盟,事业合作,活力健,倍力健,氨基酸,乐活,排毒饮,小绿,纤萃,草本茶,发宝,乳酪煲,关节套装,细胞抗氧素,辅酵素,氧修护,CC套装,CC-Cell,Generation 50+,ProShape,D-Drink,IB5,MEN+,儿童倍适,小红精华液,PowerCocktail,PowerCocktail Junior,TopShape,Fitness-Drink,Herbal Tea,Hair+,Med Dental+,Young Care,Zellschutz,Apple Antioxy,Antioxy,BCAA,Women+,小黑,发健,口腔免疫喷雾,乳清蛋白,男士护肤,去角质,面膜,叶黄素,维适多,护理牙膏,火炉原理,暖炉原理,运动饮料,健康饮品,好转反应,整健反应,骨骼健,顾心,舒采健,衡醇饮,小粉C,异黄酮,倍适,眼霜,洁面,爽肤水' ,
boosting _table _id : 'ab4fde15-79b5-47e9-82b6-5125cca39f63' ,
2026-03-17 11:00:09 +08:00
nbest : 1 ,
} ,
2026-03-13 13:06:46 +08:00
} ,
tts : {
2026-03-16 14:43:51 +08:00
speaker : options . speaker || process . env . VOLC _S2S _SPEAKER _ID || 'zh_female_vv_jupiter_bigtts' ,
2026-03-13 13:06:46 +08:00
audio _config : {
channel : 1 ,
format : 'pcm_s16le' ,
sample _rate : 24000 ,
} ,
} ,
dialog : {
dialog _id : '' ,
2026-03-16 14:43:51 +08:00
bot _name : options . botName || '大沃' ,
2026-03-18 16:32:27 +08:00
system _role : normalizeTextForSpeech ( ` ${ antiThinkingPrefix } ${ baseSystemRole } ` ) ,
speaking _style : normalizeTextForSpeech ( baseSpeakingStyle ) ,
2026-03-13 13:06:46 +08:00
extra : {
input _mod : 'audio' ,
2026-03-17 11:00:09 +08:00
model : options . modelVersion || 'SC2.0' ,
2026-03-13 13:06:46 +08:00
strict _audit : false ,
audit _response : '抱歉,这个问题我暂时无法回答。' ,
} ,
} ,
} ;
}
function parseJsonPayload ( message ) {
try {
return JSON . parse ( message . payload . toString ( 'utf8' ) ) ;
} catch ( error ) {
return null ;
}
}
2026-03-23 13:58:41 +08:00
function extractRawText ( jsonPayload ) {
const text = jsonPayload ? . text
2026-03-13 13:06:46 +08:00
|| jsonPayload ? . content
|| jsonPayload ? . results ? . [ 0 ] ? . text
|| jsonPayload ? . results ? . [ 0 ] ? . alternatives ? . [ 0 ] ? . text
|| '' ;
2026-03-23 13:58:41 +08:00
return String ( text || '' ) . trim ( ) ;
}
function extractUserText ( jsonPayload , sessionId = null ) {
let text = extractRawText ( jsonPayload ) ;
2026-03-18 16:32:27 +08:00
text = correctAsrText ( text ) ;
if ( sessionId ) {
2026-03-23 13:58:41 +08:00
contextKeywordTracker . updateSession ( sessionId , normalizeKnowledgeAlias ( text ) ) ;
2026-03-18 16:32:27 +08:00
}
return text ;
2026-03-17 11:00:09 +08:00
}
2026-03-18 16:32:27 +08:00
const THINKING _PATTERN = /^(首轮对话|用户想|用户问|应该回复|需要列举|可列举|突出特色|引导进一步|引导用户|让用户|回复后询问|语气要|用温和|需热情|需简洁|需专业)/ ;
2026-03-23 13:58:41 +08:00
const THINKING _MID _PATTERN = /(?:需客观回复|应说明其|回复后询问|引导.*对话|用.*口吻回复|语气要.*热情|需要.*引导|应该.*回复|先.*再.*最后)/ ;
2026-03-18 16:32:27 +08:00
2026-03-17 11:00:09 +08:00
function sanitizeAssistantText ( text ) {
if ( ! text ) return text ;
2026-03-23 13:58:41 +08:00
if ( isBrandHarmful ( text ) ) {
2026-03-17 11:00:09 +08:00
console . warn ( ` [NativeVoice][SafeGuard] blocked harmful content: ${ JSON . stringify ( text . slice ( 0 , 200 ) ) } ` ) ;
2026-03-23 13:58:41 +08:00
return getVoiceSafeReply ( ) ;
2026-03-17 11:00:09 +08:00
}
2026-03-18 16:32:27 +08:00
if ( THINKING _PATTERN . test ( text . trim ( ) ) ) {
console . warn ( ` [NativeVoice][SafeGuard] blocked thinking output: ${ JSON . stringify ( text . slice ( 0 , 200 ) ) } ` ) ;
return null ;
}
2026-03-17 11:00:09 +08:00
return text ;
2026-03-13 13:06:46 +08:00
}
function isFinalUserPayload ( jsonPayload ) {
if ( jsonPayload ? . is _final === true ) {
return true ;
}
if ( Array . isArray ( jsonPayload ? . results ) ) {
return jsonPayload . results . some ( ( item ) => item && item . is _interim === false ) ;
}
return false ;
}
function persistUserSpeech ( session , text ) {
const cleanText = ( text || '' ) . trim ( ) ;
if ( ! cleanText ) return false ;
const now = Date . now ( ) ;
if ( session . lastPersistedUserText === cleanText && now - ( session . lastPersistedUserAt || 0 ) < 5000 ) {
return false ;
}
session . lastPersistedUserText = cleanText ;
session . lastPersistedUserAt = now ;
session . latestUserText = cleanText ;
2026-03-18 17:43:13 +08:00
session . latestUserTurnSeq = ( session . latestUserTurnSeq || 0 ) + 1 ;
2026-04-03 10:19:16 +08:00
session . _turnCount = ( session . _turnCount || 0 ) + 1 ;
2026-03-16 14:43:51 +08:00
resetIdleTimer ( session ) ;
2026-03-13 13:06:46 +08:00
db . addMessage ( session . sessionId , 'user' , cleanText , 'voice_asr' ) . catch ( ( e ) => console . warn ( '[NativeVoice][DB] add user failed:' , e . message ) ) ;
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
redisClient . pushMessage ( session . sessionId , { role : 'user' , content : cleanText , source : 'voice_asr' } ) . catch ( ( ) => { } ) ;
2026-03-13 13:06:46 +08:00
sendJson ( session . client , {
type : 'subtitle' ,
role : 'user' ,
text : cleanText ,
isFinal : true ,
sequence : ` native_user_ ${ now } ` ,
} ) ;
return true ;
}
function persistAssistantSpeech ( session , text , { source = 'voice_bot' , toolName = null , persistToDb = true , meta = null } = { } ) {
2026-03-17 11:00:09 +08:00
const cleanText = sanitizeAssistantText ( ( text || '' ) . trim ( ) ) ;
2026-03-13 13:06:46 +08:00
if ( ! cleanText ) return false ;
const now = Date . now ( ) ;
if ( session . lastPersistedAssistantText === cleanText && now - ( session . lastPersistedAssistantAt || 0 ) < 5000 ) {
return false ;
}
session . lastPersistedAssistantText = cleanText ;
session . lastPersistedAssistantAt = now ;
2026-03-16 14:43:51 +08:00
resetIdleTimer ( session ) ;
2026-03-13 13:06:46 +08:00
if ( persistToDb ) {
db . addMessage ( session . sessionId , 'assistant' , cleanText , source , toolName , meta ) . catch ( ( e ) => console . warn ( '[NativeVoice][DB] add assistant failed:' , e . message ) ) ;
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
redisClient . pushMessage ( session . sessionId , { role : 'assistant' , content : cleanText , source } ) . catch ( ( ) => { } ) ;
2026-03-13 13:06:46 +08:00
}
sendJson ( session . client , {
type : 'subtitle' ,
role : 'assistant' ,
text : cleanText ,
isFinal : true ,
source ,
toolName ,
sequence : ` native_assistant_ ${ now } ` ,
} ) ;
2026-04-03 10:19:16 +08:00
// 异步触发摘要检查( 每N轮)
if ( persistToDb ) {
triggerSummarizeIfNeeded ( session , session . sessionId ) ;
}
if ( CONSULTANT _REFERRAL _PATTERN . test ( cleanText ) ) {
const contact = session . consultantContact || DEFAULT _CONSULTANT _CONTACT ;
if ( contact . mobile || contact . wx _qr _code || contact . wechat _id ) {
console . log ( ` [NativeVoice] consultant referral detected session= ${ session . sessionId } text= ${ JSON . stringify ( cleanText . slice ( 0 , 80 ) ) } ` ) ;
sendJson ( session . client , {
type : 'consultant_contact' ,
name : contact . name || '大沃专业健康管理顾问' ,
mobile : contact . mobile || '' ,
wx _qr _code : contact . wx _qr _code || '' ,
wechat _id : contact . wechat _id || '' ,
message : '如需个性化健康建议,可联系大沃专业健康管理顾问' ,
} ) ;
}
}
2026-03-13 13:06:46 +08:00
return true ;
}
function appendAssistantStream ( session , payload ) {
2026-03-23 13:58:41 +08:00
const chunkText = extractRawText ( payload ) ;
2026-03-13 13:06:46 +08:00
if ( ! chunkText ) {
return '' ;
}
const replyId = payload ? . reply _id || '' ;
if ( replyId && session . assistantStreamReplyId && session . assistantStreamReplyId !== replyId ) {
session . assistantStreamBuffer = '' ;
}
session . assistantStreamReplyId = replyId || session . assistantStreamReplyId || '' ;
session . assistantStreamBuffer = ` ${ session . assistantStreamBuffer || '' } ${ chunkText } ` ;
return session . assistantStreamBuffer ;
}
function flushAssistantStream ( session , { source = 'voice_bot' , toolName = null , meta = null } = { } ) {
const fullText = ( session . assistantStreamBuffer || '' ) . trim ( ) ;
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
if ( ! fullText ) {
return false ;
}
return persistAssistantSpeech ( session , fullText , { source , toolName , meta } ) ;
}
2026-04-03 10:19:16 +08:00
2026-03-13 13:06:46 +08:00
async function loadHandoffSummaryForVoice ( session ) {
try {
2026-03-18 16:32:27 +08:00
const history = await db . getHistoryForLLM ( session . sessionId , 10 ) ;
2026-03-13 13:06:46 +08:00
if ( ! history . length ) {
session . handoffSummary = '' ;
session . handoffSummaryUsed = false ;
return ;
}
2026-03-18 16:32:27 +08:00
session . handoffSummary = buildDeterministicHandoffSummary ( history ) ;
2026-03-13 13:06:46 +08:00
session . handoffSummaryUsed = false ;
console . log ( ` [NativeVoice] Handoff summary prepared for ${ session . sessionId } : ${ session . handoffSummary ? 'yes' : 'no' } ` ) ;
} catch ( error ) {
session . handoffSummary = '' ;
session . handoffSummaryUsed = false ;
console . warn ( '[NativeVoice] loadHandoffSummaryForVoice failed:' , error . message ) ;
}
}
2026-03-18 16:32:27 +08:00
function buildDeterministicHandoffSummary ( messages = [ ] ) {
const normalizedMessages = ( Array . isArray ( messages ) ? messages : [ ] )
. filter ( ( item ) => item && ( item . role === 'user' || item . role === 'assistant' ) && String ( item . content || '' ) . trim ( ) )
. slice ( - 8 ) ;
if ( ! normalizedMessages . length ) {
return '' ;
}
const userMessages = normalizedMessages . filter ( ( item ) => item . role === 'user' ) ;
const currentQuestion = String ( userMessages [ userMessages . length - 1 ] ? . content || '' ) . trim ( ) ;
const previousQuestion = String ( userMessages [ userMessages . length - 2 ] ? . content || '' ) . trim ( ) ;
const assistantFacts = normalizedMessages
. filter ( ( item ) => item . role === 'assistant' )
. slice ( - 2 )
. map ( ( item ) => String ( item . content || '' ) . trim ( ) )
. filter ( Boolean )
. map ( ( item ) => item . slice ( 0 , 60 ) )
. join ( '; ' ) ;
const parts = [ ] ;
if ( currentQuestion ) {
parts . push ( ` 当前问题: ${ currentQuestion } ` ) ;
}
if ( previousQuestion && previousQuestion !== currentQuestion ) {
parts . push ( ` 上一轮关注: ${ previousQuestion } ` ) ;
}
if ( assistantFacts ) {
parts . push ( ` 已给信息: ${ assistantFacts } ` ) ;
}
return parts . join ( '; ' ) ;
}
2026-03-13 13:06:46 +08:00
async function sendSpeechText ( session , speechText ) {
const chunks = splitTextForSpeech ( speechText ) ;
if ( ! chunks . length || ! session . upstream || session . upstream . readyState !== WebSocket . OPEN ) {
return ;
}
console . log ( ` [NativeVoice] sendSpeechText start session= ${ session . sessionId } chunks= ${ chunks . length } textLen= ${ speechText . length } ` ) ;
2026-03-23 13:58:41 +08:00
session . currentSpeechText = speechText ;
2026-03-13 13:06:46 +08:00
session . isSendingChatTTSText = true ;
session . currentTtsType = 'chat_tts_text' ;
session . chatTTSUntil = Date . now ( ) + estimateSpeechDurationMs ( speechText ) + 800 ;
clearTimeout ( session . chatTTSTimer ) ;
session . chatTTSTimer = setTimeout ( ( ) => {
session . chatTTSTimer = null ;
if ( ( session . chatTTSUntil || 0 ) <= Date . now ( ) ) {
session . isSendingChatTTSText = false ;
}
} , Math . max ( 200 , session . chatTTSUntil - Date . now ( ) + 50 ) ) ;
sendJson ( session . client , { type : 'tts_reset' , ttsType : 'chat_tts_text' } ) ;
for ( let index = 0 ; index < chunks . length ; index += 1 ) {
const chunk = chunks [ index ] ;
console . log ( ` [NativeVoice] sendSpeechText chunk session= ${ session . sessionId } index= ${ index + 1 } / ${ chunks . length } len= ${ chunk . length } start= ${ index === 0 } end=false text= ${ JSON . stringify ( chunk . slice ( 0 , 80 ) ) } ` ) ;
session . upstream . send ( createChatTTSTextMessage ( session . sessionId , {
start : index === 0 ,
end : false ,
content : chunk ,
} ) ) ;
}
console . log ( ` [NativeVoice] sendSpeechText end session= ${ session . sessionId } ` ) ;
session . upstream . send ( createChatTTSTextMessage ( session . sessionId , {
start : false ,
end : true ,
content : '' ,
} ) ) ;
}
2026-03-16 14:43:51 +08:00
function sendReady ( session ) {
if ( session . readySent ) {
return ;
}
session . readySent = true ;
sendJson ( session . client , { type : 'ready' } ) ;
}
function sendGreeting ( session ) {
if ( session . hasSentGreeting ) {
sendReady ( session ) ;
2026-03-13 13:06:46 +08:00
return ;
}
session . hasSentGreeting = true ;
2026-03-23 13:58:41 +08:00
const greetingText = session . greetingText || DEFAULT _VOICE _GREETING ;
2026-03-16 14:43:51 +08:00
console . log ( ` [NativeVoice] sendGreeting session= ${ session . sessionId } text= ${ JSON . stringify ( greetingText . slice ( 0 , 80 ) ) } ` ) ;
persistAssistantSpeech ( session , greetingText , { source : 'voice_bot' } ) ;
2026-03-13 13:06:46 +08:00
clearTimeout ( session . greetingTimer ) ;
2026-03-16 14:43:51 +08:00
clearTimeout ( session . readyTimer ) ;
2026-03-17 11:00:09 +08:00
session . greetingSentAt = Date . now ( ) ;
2026-03-23 13:58:41 +08:00
session . greetingProtectionUntil = Date . now ( ) + 2000 ;
session . currentSpeechText = greetingText ;
2026-03-17 11:00:09 +08:00
try {
session . upstream . send ( createSayHelloMessage ( session . sessionId , greetingText ) ) ;
console . log ( ` [NativeVoice] sendSayHello event=300 session= ${ session . sessionId } ` ) ;
} catch ( error ) {
session . hasSentGreeting = false ;
2026-03-23 13:58:41 +08:00
session . greetingProtectionUntil = 0 ;
2026-03-17 11:00:09 +08:00
console . warn ( '[NativeVoice] SayHello failed:' , error . message ) ;
}
sendReady ( session ) ;
}
async function replayGreeting ( session ) {
const greetingText = String ( session . greetingText || '' ) . trim ( ) ;
if ( ! greetingText || ! session . upstream || session . upstream . readyState !== WebSocket . OPEN ) {
return ;
}
if ( session . greetingSentAt && Date . now ( ) - session . greetingSentAt < 6000 ) {
console . log ( ` [NativeVoice] replayGreeting skipped (too soon) session= ${ session . sessionId } ` ) ;
return ;
}
console . log ( ` [NativeVoice] replayGreeting session= ${ session . sessionId } text= ${ JSON . stringify ( greetingText . slice ( 0 , 80 ) ) } ` ) ;
session . greetingSentAt = Date . now ( ) ;
session . directSpeakUntil = Date . now ( ) + estimateSpeechDurationMs ( greetingText ) + 800 ;
try {
session . upstream . send ( createSayHelloMessage ( session . sessionId , greetingText ) ) ;
} catch ( error ) {
console . warn ( '[NativeVoice] replayGreeting SayHello failed:' , error . message ) ;
}
2026-03-13 13:06:46 +08:00
}
async function sendExternalRag ( session , items ) {
if ( ! session . upstream || session . upstream . readyState !== WebSocket . OPEN ) {
return ;
}
const ragItems = Array . isArray ( items ) ? items . filter ( ( item ) => item && item . content ) : [ ] ;
if ( ! ragItems . length ) {
return ;
}
session . upstream . send ( createChatRAGTextMessage ( session . sessionId , JSON . stringify ( ragItems ) ) ) ;
}
2026-03-17 11:00:09 +08:00
function clearUpstreamSuppression ( session ) {
clearTimeout ( session . suppressReplyTimer ) ;
session . suppressReplyTimer = null ;
session . suppressUpstreamUntil = 0 ;
session . awaitingUpstreamReply = false ;
session . pendingAssistantSource = null ;
session . pendingAssistantToolName = null ;
session . pendingAssistantMeta = null ;
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = null ;
2026-03-23 13:58:41 +08:00
session . pendingAssistantTurnSeq = 0 ;
2026-03-17 11:00:09 +08:00
session . blockUpstreamAudio = false ;
sendJson ( session . client , { type : 'assistant_pending' , active : false } ) ;
}
function suppressUpstreamReply ( session , durationMs ) {
clearTimeout ( session . suppressReplyTimer ) ;
session . awaitingUpstreamReply = true ;
session . suppressUpstreamUntil = Date . now ( ) + Math . max ( 1000 , durationMs ) ;
session . suppressReplyTimer = setTimeout ( ( ) => {
session . suppressReplyTimer = null ;
if ( ( session . suppressUpstreamUntil || 0 ) > Date . now ( ) ) {
return ;
}
clearUpstreamSuppression ( session ) ;
} , Math . max ( 300 , session . suppressUpstreamUntil - Date . now ( ) ) ) ;
}
2026-03-18 17:43:13 +08:00
async function processReply ( session , text , turnSeq = session . latestUserTurnSeq || 0 ) {
2026-03-13 13:06:46 +08:00
const cleanText = ( text || '' ) . trim ( ) ;
if ( ! cleanText ) return ;
if ( session . processingReply ) {
session . queuedUserText = cleanText ;
2026-03-18 17:43:13 +08:00
session . queuedUserTurnSeq = turnSeq ;
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] processReply queued(busy) session= ${ session . sessionId } text= ${ JSON . stringify ( cleanText . slice ( 0 , 80 ) ) } ` ) ;
return ;
}
const now = Date . now ( ) ;
if ( session . directSpeakUntil && now < session . directSpeakUntil ) {
session . queuedUserText = cleanText ;
2026-03-18 17:43:13 +08:00
session . queuedUserTurnSeq = turnSeq ;
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] processReply queued(speaking) session= ${ session . sessionId } waitMs= ${ session . directSpeakUntil - now } text= ${ JSON . stringify ( cleanText . slice ( 0 , 80 ) ) } ` ) ;
return ;
}
2026-03-18 17:43:13 +08:00
const activeTurnSeq = turnSeq || session . latestUserTurnSeq || 0 ;
2026-03-13 13:06:46 +08:00
session . processingReply = true ;
sendJson ( session . client , { type : 'assistant_pending' , active : true } ) ;
2026-03-31 09:46:40 +08:00
// KB-First: 所有非闲聊查询都视为KB候选, 阻断S2S音频等待KB结果
let isKnowledgeCandidate = ! isPureChitchat ( cleanText ) ;
2026-03-16 14:43:51 +08:00
if ( isKnowledgeCandidate ) {
2026-03-17 11:00:09 +08:00
session . blockUpstreamAudio = true ;
suppressUpstreamReply ( session , 30000 ) ;
2026-03-16 14:43:51 +08:00
sendJson ( session . client , { type : 'tts_reset' , reason : 'processing' } ) ;
2026-03-23 13:58:41 +08:00
// 过渡语已移除: KB查询优化后延迟已降至~2.6s,无需填充
session . _fillerActive = false ;
2026-03-16 14:43:51 +08:00
}
console . log ( ` [NativeVoice] processReply start session= ${ session . sessionId } text= ${ JSON . stringify ( cleanText . slice ( 0 , 120 ) ) } blocked= ${ session . blockUpstreamAudio } kbCandidate= ${ isKnowledgeCandidate } ` ) ;
2026-03-13 13:06:46 +08:00
try {
2026-03-23 13:58:41 +08:00
// KB预查缓存消费: 如果partial阶段已启动预查且文本匹配, 直接使用缓存结果
let resolveResult = null ;
if ( isKnowledgeCandidate && session . pendingKbPrequery && session . _kbPrequeryText ) {
const preText = ( session . _kbPrequeryText || '' ) . replace ( /[啊哦嗯呢呀哎诶额,。!?、,.\s]/g , '' ) ;
const finalText = cleanText . replace ( /[啊哦嗯呢呀哎诶额,。!?、,.\s]/g , '' ) ;
// 放宽相似度:子串包含 或 重叠字符占比>=60% 即视为匹配
let isSimilar = preText && finalText && ( finalText . includes ( preText ) || preText . includes ( finalText ) ) ;
if ( ! isSimilar && preText && finalText ) {
const shorter = preText . length <= finalText . length ? preText : finalText ;
const longer = preText . length <= finalText . length ? finalText : preText ;
let overlap = 0 ;
for ( let i = 0 ; i < shorter . length ; i ++ ) {
if ( longer . includes ( shorter [ i ] ) ) overlap ++ ;
}
isSimilar = overlap / shorter . length >= 0.45 ;
}
if ( isSimilar ) {
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
const prequeryResult = await session . pendingKbPrequery ;
// 只复用 hit 结果; no-hit 可能因 partial 文本路由不完整,用完整文本 re-search
if ( prequeryResult && prequeryResult . delivery !== 'upstream_chat' ) {
console . log ( ` [NativeVoice] using KB prequery cache (hit) session= ${ session . sessionId } preText= ${ JSON . stringify ( session . _kbPrequeryText . slice ( 0 , 60 ) ) } ` ) ;
resolveResult = prequeryResult ;
} else {
console . log ( ` [NativeVoice] prequery no-hit, re-searching with full text session= ${ session . sessionId } preText= ${ JSON . stringify ( ( session . _kbPrequeryText || '' ) . slice ( 0 , 40 ) ) } finalText= ${ JSON . stringify ( cleanText . slice ( 0 , 40 ) ) } ` ) ;
}
2026-03-23 13:58:41 +08:00
} else {
console . log ( ` [NativeVoice] KB prequery text mismatch, re-querying session= ${ session . sessionId } pre= ${ JSON . stringify ( preText . slice ( 0 , 40 ) ) } final= ${ JSON . stringify ( finalText . slice ( 0 , 40 ) ) } ` ) ;
}
}
session . pendingKbPrequery = null ;
session . _kbPrequeryText = '' ;
session . _kbPrequeryStartedAt = 0 ;
if ( ! resolveResult ) {
resolveResult = await resolveReply ( session . sessionId , session , cleanText ) ;
}
2026-04-03 10:19:16 +08:00
const { delivery , speechText , ragItems , source , toolName , routeDecision , responseMeta , evidencePack } = resolveResult ;
// 产品链接触发检测:用户请求查看产品详情时推送对应链接
const productLinkResult = checkProductLinkTrigger ( cleanText ) ;
if ( productLinkResult . triggered && productLinkResult . product ) {
console . log ( ` [NativeVoice] product link triggered session= ${ session . sessionId } product= ${ productLinkResult . product . name } ` ) ;
sendJson ( session . client , {
type : 'product_link' ,
product : productLinkResult . product . name ,
link : productLinkResult . product . link ,
description : productLinkResult . product . description ,
} ) ;
}
2026-03-18 17:43:13 +08:00
if ( activeTurnSeq !== ( session . latestUserTurnSeq || 0 ) ) {
console . log ( ` [NativeVoice] stale reply ignored session= ${ session . sessionId } activeTurn= ${ activeTurnSeq } latestTurn= ${ session . latestUserTurnSeq || 0 } ` ) ;
clearUpstreamSuppression ( session ) ;
return ;
}
2026-03-13 13:06:46 +08:00
if ( delivery === 'upstream_chat' ) {
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
// kbCandidate 但 S2S 未调工具 → 放开 S2S 自然回复
// 依赖: 1) system prompt 品牌保护指令引导 S2S 调工具 2) isBrandHarmful 流式拦截兜底
2026-03-16 14:43:51 +08:00
if ( isKnowledgeCandidate ) {
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
console . log ( ` [NativeVoice] processReply kbCandidate+upstream_chat, unblock S2S session= ${ session . sessionId } ` ) ;
2026-03-16 14:43:51 +08:00
}
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
session . blockUpstreamAudio = false ;
2026-03-23 13:58:41 +08:00
session . _lastPartialAt = 0 ;
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = null ;
2026-03-13 13:06:46 +08:00
session . awaitingUpstreamReply = true ;
session . pendingAssistantSource = 'voice_bot' ;
session . pendingAssistantToolName = null ;
session . pendingAssistantMeta = responseMeta ;
2026-03-23 13:58:41 +08:00
session . pendingAssistantTurnSeq = activeTurnSeq ;
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] processReply handoff session= ${ session . sessionId } route= ${ routeDecision ? . route || 'unknown' } delivery=upstream_chat ` ) ;
return ;
}
if ( delivery === 'external_rag' ) {
2026-03-16 14:43:51 +08:00
if ( ! session . blockUpstreamAudio ) {
session . blockUpstreamAudio = true ;
}
2026-03-18 16:32:27 +08:00
session . discardNextAssistantResponse = true ;
2026-03-17 11:00:09 +08:00
sendJson ( session . client , { type : 'tts_reset' , reason : 'knowledge_hit' } ) ;
2026-04-03 10:19:16 +08:00
const ragContent = ( ragItems || [ ] ) . filter ( ( item ) => item && item . content && item . kind !== 'context' ) ;
2026-03-23 13:58:41 +08:00
if ( ragContent . length > 0 ) {
console . log ( ` [NativeVoice] processReply sending external_rag to S2S session= ${ session . sessionId } route= ${ routeDecision ? . route || 'unknown' } items= ${ ragContent . length } ` ) ;
// KB话题记忆: 记录本轮用户原始问题和时间戳, 用于保护窗口和追问enrichment
if ( responseMeta ? . hit !== false && responseMeta ? . reason !== 'honest_fallback' ) {
session . _lastKbTopic = cleanText ;
session . _lastKbHitAt = Date . now ( ) ;
}
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = evidencePack || null ;
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
// 不提前发KB原文作字幕: 等S2S event 351返回实际语音文本后再更新字幕
// 这样字幕和语音保持一致( S2S会基于RAG内容生成自然口语化的回答)
2026-03-23 13:58:41 +08:00
session . _pendingExternalRagReply = true ;
await sendExternalRag ( session , ragContent ) ;
session . awaitingUpstreamReply = true ;
session . pendingAssistantSource = source ;
session . pendingAssistantToolName = toolName ;
session . pendingAssistantMeta = responseMeta ;
session . pendingAssistantTurnSeq = activeTurnSeq ;
2026-03-17 11:00:09 +08:00
} else {
console . log ( ` [NativeVoice] processReply external_rag empty content, fallback to upstream session= ${ session . sessionId } ` ) ;
session . blockUpstreamAudio = false ;
clearUpstreamSuppression ( session ) ;
}
2026-03-13 13:06:46 +08:00
return ;
}
if ( ! speechText ) {
console . log ( ` [NativeVoice] processReply empty session= ${ session . sessionId } route= ${ routeDecision ? . route || 'unknown' } delivery= ${ delivery || 'unknown' } ` ) ;
session . isSendingChatTTSText = false ;
session . chatTTSUntil = 0 ;
return ;
}
2026-03-17 11:00:09 +08:00
console . log ( ` [NativeVoice] processReply resolved session= ${ session . sessionId } route= ${ routeDecision ? . route || 'unknown' } delivery=local_tts source= ${ source } tool= ${ toolName || 'chat' } speechLen= ${ speechText . length } ` ) ;
session . directSpeakUntil = Date . now ( ) + estimateSpeechDurationMs ( speechText ) + 800 ;
suppressUpstreamReply ( session , estimateSpeechDurationMs ( speechText ) + 1800 ) ;
2026-03-23 13:58:41 +08:00
session . lastDeliveredAssistantTurnSeq = activeTurnSeq ;
2026-03-17 11:00:09 +08:00
persistAssistantSpeech ( session , speechText , { source , toolName , meta : responseMeta } ) ;
await sendSpeechText ( session , speechText ) ;
2026-03-13 13:06:46 +08:00
} catch ( error ) {
console . error ( '[NativeVoice] processReply failed:' , error . message ) ;
sendJson ( session . client , { type : 'error' , error : error . message } ) ;
} finally {
session . processingReply = false ;
2026-03-16 14:43:51 +08:00
if ( ! session . awaitingUpstreamReply ) {
session . blockUpstreamAudio = false ;
}
2026-03-13 13:06:46 +08:00
if ( ! session . awaitingUpstreamReply ) {
sendJson ( session . client , { type : 'assistant_pending' , active : false } ) ;
}
const pending = session . queuedUserText ;
2026-03-18 17:43:13 +08:00
const pendingTurnSeq = session . queuedUserTurnSeq || 0 ;
2026-03-13 13:06:46 +08:00
session . queuedUserText = '' ;
2026-03-18 17:43:13 +08:00
session . queuedUserTurnSeq = 0 ;
if ( pending && pendingTurnSeq && pendingTurnSeq !== activeTurnSeq && ( ! session . directSpeakUntil || Date . now ( ) >= session . directSpeakUntil ) ) {
2026-03-16 14:43:51 +08:00
setTimeout ( ( ) => {
session . blockUpstreamAudio = true ;
2026-03-18 17:43:13 +08:00
processReply ( session , pending , pendingTurnSeq ) . catch ( ( err ) => {
2026-03-16 14:43:51 +08:00
console . error ( '[NativeVoice] queued processReply failed:' , err . message ) ;
} ) ;
} , 200 ) ;
2026-03-18 17:43:13 +08:00
} else if ( pending && pendingTurnSeq && pendingTurnSeq !== activeTurnSeq ) {
2026-03-13 13:06:46 +08:00
const waitMs = Math . max ( 200 , session . directSpeakUntil - Date . now ( ) + 200 ) ;
clearTimeout ( session . queuedReplyTimer ) ;
session . queuedReplyTimer = setTimeout ( ( ) => {
session . queuedReplyTimer = null ;
const queuedText = session . queuedUserText || pending ;
2026-03-18 17:43:13 +08:00
const queuedTurnSeq = session . queuedUserTurnSeq || pendingTurnSeq ;
2026-03-13 13:06:46 +08:00
session . queuedUserText = '' ;
2026-03-18 17:43:13 +08:00
session . queuedUserTurnSeq = 0 ;
2026-03-16 14:43:51 +08:00
session . blockUpstreamAudio = true ;
2026-03-18 17:43:13 +08:00
processReply ( session , queuedText , queuedTurnSeq ) . catch ( ( err ) => {
2026-03-13 13:06:46 +08:00
console . error ( '[NativeVoice] delayed queued processReply failed:' , err . message ) ;
} ) ;
} , waitMs ) ;
}
}
}
function handleUpstreamMessage ( session , data ) {
let message ;
try {
message = unmarshal ( data ) ;
} catch ( error ) {
console . warn ( '[NativeVoice] unmarshal failed:' , error . message ) ;
return ;
}
if ( message . type === MsgType . AUDIO _ONLY _SERVER ) {
2026-03-18 16:32:27 +08:00
const isDefaultTts = ! session . currentTtsType || session . currentTtsType === 'default' ;
const isSuppressingUpstreamAudio = ( session . suppressUpstreamUntil || 0 ) > Date . now ( ) && isDefaultTts ;
2026-03-23 13:58:41 +08:00
// 用户刚停止说话后短暂阻止默认TTS音频, 给event 459的blockUpstreamAudio留时间生效
const isUserJustSpeaking = isDefaultTts && session . _lastPartialAt && ( Date . now ( ) - session . _lastPartialAt < 800 ) ;
// blockUpstreamAudio 生效时:仅放行 external_rag 和限时过渡语音频,其余全部阻断
// 修复:旧逻辑只阻断 isDefaultTts, 导致 chat_tts_text 窗口期 S2S 自主回复音频泄漏
const isBlockPassthrough = session . currentTtsType === 'external_rag' ||
( session . _fillerActive && ( session . chatTTSUntil || 0 ) > Date . now ( ) ) ;
if ( ( session . blockUpstreamAudio && ! isBlockPassthrough ) || isSuppressingUpstreamAudio || isUserJustSpeaking ) {
2026-03-16 14:43:51 +08:00
if ( ! session . _audioBlockLogOnce ) {
session . _audioBlockLogOnce = true ;
2026-03-18 16:32:27 +08:00
console . log ( ` [NativeVoice] audio blocked session= ${ session . sessionId } ttsType= ${ session . currentTtsType } block= ${ session . blockUpstreamAudio } suppress= ${ isSuppressingUpstreamAudio } ` ) ;
2026-03-16 14:43:51 +08:00
}
2026-03-13 13:06:46 +08:00
return ;
}
2026-03-16 14:43:51 +08:00
session . _audioBlockLogOnce = false ;
2026-03-13 13:06:46 +08:00
if ( session . client && session . client . readyState === WebSocket . OPEN ) {
session . client . send ( message . payload , { binary : true } ) ;
}
return ;
}
const payload = parseJsonPayload ( message ) ;
if ( message . type === MsgType . ERROR ) {
console . error ( ` [NativeVoice] upstream error session= ${ session . sessionId } code= ${ message . event } payload= ${ message . payload . toString ( 'utf8' ) . slice ( 0 , 200 ) } ` ) ;
sendJson ( session . client , { type : 'error' , error : message . payload . toString ( 'utf8' ) } ) ;
return ;
}
if ( message . type !== MsgType . FULL _SERVER ) {
return ;
}
if ( message . event === 150 ) {
session . upstreamReady = true ;
console . log ( ` [NativeVoice] upstream ready session= ${ session . sessionId } ` ) ;
2026-03-16 14:43:51 +08:00
resetIdleTimer ( session ) ;
2026-03-31 09:46:40 +08:00
startAudioKeepalive ( session ) ;
2026-03-16 14:43:51 +08:00
sendGreeting ( session ) ;
2026-03-13 13:06:46 +08:00
return ;
}
2026-03-17 11:00:09 +08:00
if ( message . event === 300 ) {
console . log ( ` [NativeVoice] SayHello response session= ${ session . sessionId } ` ) ;
return ;
}
2026-03-13 13:06:46 +08:00
if ( message . event === 350 ) {
session . currentTtsType = payload ? . tts _type || '' ;
if ( payload ? . tts _type === 'chat_tts_text' && session . pendingGreetingAck ) {
session . pendingGreetingAck = false ;
clearTimeout ( session . greetingAckTimer ) ;
session . greetingAckTimer = null ;
}
2026-03-18 17:43:13 +08:00
if ( session . blockUpstreamAudio && payload ? . tts _type === 'external_rag' ) {
session . blockUpstreamAudio = false ;
session . suppressUpstreamUntil = 0 ;
clearTimeout ( session . suppressReplyTimer ) ;
session . suppressReplyTimer = null ;
2026-03-23 13:58:41 +08:00
// 注意: 不清除discardNextAssistantResponse, 让它拦截S2S默认回复的残留event 351
// 该标记会在KB回复的event 550 chunks到达时自动清除
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
// 清除过渡语的chat TTS状态, 确保external_rag回复不被isLocalChatTTSTextActive拦截
session . isSendingChatTTSText = false ;
session . chatTTSUntil = 0 ;
session . currentSpeechText = '' ;
session . _fillerActive = false ;
clearTimeout ( session . chatTTSTimer ) ;
sendJson ( session . client , { type : 'tts_reset' , reason : 'rag_response_start' } ) ;
2026-03-18 17:43:13 +08:00
console . log ( ` [NativeVoice] unblock for external_rag tts session= ${ session . sessionId } ` ) ;
} else if ( session . blockUpstreamAudio && payload ? . tts _type === 'chat_tts_text' ) {
console . log ( ` [NativeVoice] chat_tts_text started, keeping block for S2S default response session= ${ session . sessionId } ` ) ;
2026-03-16 14:43:51 +08:00
}
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] upstream tts_event session= ${ session . sessionId } ttsType= ${ payload ? . tts _type || '' } ` ) ;
sendJson ( session . client , { type : 'tts_event' , payload } ) ;
return ;
}
const isLocalChatTTSTextActive = ! ! session . isSendingChatTTSText && ( session . chatTTSUntil || 0 ) > Date . now ( ) ;
2026-03-17 11:00:09 +08:00
const isSuppressingUpstreamReply = ( session . suppressUpstreamUntil || 0 ) > Date . now ( ) ;
2026-03-13 13:06:46 +08:00
if ( message . event === 351 ) {
2026-03-17 11:00:09 +08:00
if ( isLocalChatTTSTextActive || session . blockUpstreamAudio || isSuppressingUpstreamReply ) {
2026-03-13 13:06:46 +08:00
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
return ;
}
2026-03-17 11:00:09 +08:00
if ( session . discardNextAssistantResponse ) {
session . discardNextAssistantResponse = false ;
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
console . log ( ` [NativeVoice] discarded stale assistant response (kb-nohit retrigger) session= ${ session . sessionId } ` ) ;
return ;
}
2026-03-13 13:06:46 +08:00
const pendingAssistantSource = session . pendingAssistantSource || 'voice_bot' ;
const pendingAssistantToolName = session . pendingAssistantToolName || null ;
const pendingAssistantMeta = session . pendingAssistantMeta || null ;
2026-03-23 13:58:41 +08:00
const pendingAssistantTurnSeq = session . pendingAssistantTurnSeq || session . latestUserTurnSeq || 0 ;
2026-03-13 13:06:46 +08:00
session . awaitingUpstreamReply = false ;
sendJson ( session . client , { type : 'assistant_pending' , active : false } ) ;
2026-03-23 13:58:41 +08:00
if ( pendingAssistantTurnSeq && session . lastDeliveredAssistantTurnSeq === pendingAssistantTurnSeq ) {
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
session . pendingAssistantSource = null ;
session . pendingAssistantToolName = null ;
session . pendingAssistantMeta = null ;
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = null ;
2026-03-23 13:58:41 +08:00
console . log ( ` [NativeVoice] duplicate assistant final ignored (351) session= ${ session . sessionId } turn= ${ pendingAssistantTurnSeq } ` ) ;
return ;
}
const assistantText = extractRawText ( payload ) ;
2026-03-13 13:06:46 +08:00
if ( assistantText ) {
2026-03-16 14:43:51 +08:00
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
2026-03-23 13:58:41 +08:00
// 过渡语的event 351: 不持久化, 直接丢弃
if ( session . _fillerActive ) {
console . log ( ` [NativeVoice] discarded filler assistant text session= ${ session . sessionId } ` ) ;
session . _fillerActive = false ;
return ;
}
// 清除external_rag等待标记, KB回复已到达
if ( session . _pendingExternalRagReply ) {
session . _pendingExternalRagReply = false ;
}
2026-04-03 10:19:16 +08:00
console . log ( ` [NativeVoice] upstream assistant session= ${ session . sessionId } text= ${ JSON . stringify ( assistantText . slice ( 0 , 120 ) ) } ` ) ;
session . lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq ;
persistAssistantSpeech ( session , assistantText , {
source : pendingAssistantSource ,
toolName : pendingAssistantToolName ,
meta : pendingAssistantMeta ,
} ) ;
// KB回复完成后重新阻断音频, 防止下一个问题的S2S默认回复在early block前泄露
if ( session . currentTtsType === 'external_rag' ) {
2026-03-23 13:58:41 +08:00
session . blockUpstreamAudio = true ;
2026-04-03 10:19:16 +08:00
console . log ( ` [NativeVoice] re-blocked after KB response session= ${ session . sessionId } ` ) ;
2026-03-23 13:58:41 +08:00
}
2026-03-16 14:43:51 +08:00
} else {
2026-03-23 13:58:41 +08:00
const didFlush = flushAssistantStream ( session , {
2026-03-16 14:43:51 +08:00
source : pendingAssistantSource ,
toolName : pendingAssistantToolName ,
meta : pendingAssistantMeta ,
} ) ;
2026-03-23 13:58:41 +08:00
if ( didFlush ) {
session . lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq ;
}
2026-03-13 13:06:46 +08:00
}
2026-03-16 14:43:51 +08:00
session . pendingAssistantSource = null ;
session . pendingAssistantToolName = null ;
session . pendingAssistantMeta = null ;
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = null ;
2026-03-13 13:06:46 +08:00
return ;
}
if ( message . event === 550 ) {
2026-03-23 13:58:41 +08:00
// external_rag chunks到达时, 清除discardNextAssistantResponse( 默认回复的351已过或不会来)
if ( session . discardNextAssistantResponse && session . currentTtsType === 'external_rag' ) {
session . discardNextAssistantResponse = false ;
console . log ( ` [NativeVoice] cleared discardNextAssistantResponse for external_rag stream session= ${ session . sessionId } ` ) ;
}
2026-03-17 11:00:09 +08:00
if ( isLocalChatTTSTextActive || session . blockUpstreamAudio || isSuppressingUpstreamReply || session . discardNextAssistantResponse ) {
2026-03-13 13:06:46 +08:00
return ;
}
if ( session . awaitingUpstreamReply ) {
session . awaitingUpstreamReply = false ;
sendJson ( session . client , { type : 'assistant_pending' , active : false } ) ;
}
const fullText = appendAssistantStream ( session , payload ) ;
if ( fullText ) {
2026-03-23 13:58:41 +08:00
// 品牌安全检测: S2S模型输出传销等负面内容时, 立即阻断音频并注入安全回复
if ( fullText . length >= 4 && isBrandHarmful ( fullText ) ) {
console . warn ( ` [NativeVoice][SafeGuard] harmful content detected in stream, blocking audio session= ${ session . sessionId } text= ${ JSON . stringify ( fullText . slice ( 0 , 120 ) ) } ` ) ;
session . blockUpstreamAudio = true ;
session . discardNextAssistantResponse = true ;
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
sendJson ( session . client , { type : 'tts_reset' , reason : 'harmful_blocked' } ) ;
// 注入安全回复语音,替代有害内容
const safeReply = getVoiceSafeReply ( ) ;
persistAssistantSpeech ( session , safeReply , { source : 'voice_bot' } ) ;
sendSpeechText ( session , safeReply ) . catch ( ( err ) => {
console . warn ( '[NativeVoice][SafeGuard] sendSpeechText failed:' , err . message ) ;
} ) ;
return ;
}
2026-03-18 16:32:27 +08:00
// 检测思考模式: S2S模型输出分析/计划而非直接回答,立即阻断
2026-03-23 13:58:41 +08:00
if ( fullText . length >= 10 && ( THINKING _PATTERN . test ( fullText . trim ( ) ) || THINKING _MID _PATTERN . test ( fullText ) ) ) {
2026-03-18 16:32:27 +08:00
console . warn ( ` [NativeVoice][SafeGuard] thinking detected in stream, blocking audio session= ${ session . sessionId } text= ${ JSON . stringify ( fullText . slice ( 0 , 120 ) ) } ` ) ;
session . blockUpstreamAudio = true ;
session . discardNextAssistantResponse = true ;
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
sendJson ( session . client , { type : 'tts_reset' , reason : 'thinking_blocked' } ) ;
return ;
}
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] upstream assistant chunk session= ${ session . sessionId } len= ${ fullText . length } text= ${ JSON . stringify ( fullText . slice ( 0 , 120 ) ) } ` ) ;
}
return ;
}
if ( message . event === 559 ) {
2026-03-17 11:00:09 +08:00
if ( isLocalChatTTSTextActive || isSuppressingUpstreamReply ) {
2026-03-13 13:06:46 +08:00
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
return ;
}
2026-03-16 14:43:51 +08:00
if ( session . blockUpstreamAudio ) {
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
console . log ( ` [NativeVoice] blocked response ended (559), keeping block session= ${ session . sessionId } ` ) ;
return ;
}
2026-03-17 11:00:09 +08:00
if ( session . discardNextAssistantResponse ) {
session . discardNextAssistantResponse = false ;
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
console . log ( ` [NativeVoice] discarded stale stream end (559, kb-nohit retrigger) session= ${ session . sessionId } ` ) ;
return ;
}
2026-03-23 13:58:41 +08:00
// external_rag流期间, 阻止默认回复的559过早flush部分KB文本
if ( session . _pendingExternalRagReply ) {
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
console . log ( ` [NativeVoice] suppressed 559 flush during external_rag flow session= ${ session . sessionId } ` ) ;
return ;
}
const pendingAssistantTurnSeq = session . pendingAssistantTurnSeq || session . latestUserTurnSeq || 0 ;
if ( pendingAssistantTurnSeq && session . lastDeliveredAssistantTurnSeq === pendingAssistantTurnSeq ) {
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
session . pendingAssistantSource = null ;
session . pendingAssistantToolName = null ;
session . pendingAssistantMeta = null ;
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = null ;
2026-03-23 13:58:41 +08:00
console . log ( ` [NativeVoice] duplicate assistant final ignored (559) session= ${ session . sessionId } turn= ${ pendingAssistantTurnSeq } ` ) ;
return ;
}
2026-03-13 13:06:46 +08:00
session . awaitingUpstreamReply = false ;
2026-03-16 14:43:51 +08:00
session . blockUpstreamAudio = false ;
2026-03-13 13:06:46 +08:00
sendJson ( session . client , { type : 'assistant_pending' , active : false } ) ;
2026-03-23 13:58:41 +08:00
const didFlush = flushAssistantStream ( session , {
2026-03-13 13:06:46 +08:00
source : session . pendingAssistantSource || 'voice_bot' ,
toolName : session . pendingAssistantToolName || null ,
meta : session . pendingAssistantMeta || null ,
} ) ;
2026-03-23 13:58:41 +08:00
if ( didFlush ) {
session . lastDeliveredAssistantTurnSeq = pendingAssistantTurnSeq ;
}
2026-03-13 13:06:46 +08:00
session . pendingAssistantSource = null ;
session . pendingAssistantToolName = null ;
session . pendingAssistantMeta = null ;
2026-04-03 10:19:16 +08:00
session . _pendingEvidencePack = null ;
2026-03-13 13:06:46 +08:00
return ;
}
if ( message . event === 450 || ( message . event === 451 && ! isFinalUserPayload ( payload ) ) ) {
2026-03-18 16:32:27 +08:00
const text = extractUserText ( payload , session . sessionId ) ;
2026-03-13 13:06:46 +08:00
if ( text ) {
2026-03-23 13:58:41 +08:00
const now = Date . now ( ) ;
const isDirectSpeaking = session . directSpeakUntil && now < session . directSpeakUntil ;
const isChatTTSSpeaking = session . isSendingChatTTSText && ( session . chatTTSUntil || 0 ) > now ;
// TTS回声检测: 播放期间如果ASR识别文本是当前播放文本的子串, 判定为回声, 忽略
if ( ( isDirectSpeaking || isChatTTSSpeaking ) && session . currentSpeechText ) {
const normalizedPartial = text . replace ( /[,。!?、,.\s]/g , '' ) ;
const normalizedSpeech = session . currentSpeechText . replace ( /[,。!?、,.\s]/g , '' ) ;
if ( normalizedPartial . length <= 3 || normalizedSpeech . includes ( normalizedPartial ) ) {
if ( ! session . _echoLogOnce ) {
session . _echoLogOnce = true ;
console . log ( ` [NativeVoice] TTS echo detected, ignoring partial session= ${ session . sessionId } text= ${ JSON . stringify ( text . slice ( 0 , 80 ) ) } ` ) ;
}
return ;
}
session . _echoLogOnce = false ;
} else {
session . _echoLogOnce = false ;
}
// Greeting保护窗口: 发送问候语后短暂保护期内忽略barge-in
if ( session . greetingProtectionUntil && now < session . greetingProtectionUntil ) {
console . log ( ` [NativeVoice] greeting protection active, ignoring partial session= ${ session . sessionId } text= ${ JSON . stringify ( text . slice ( 0 , 80 ) ) } ` ) ;
return ;
}
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] upstream partial session= ${ session . sessionId } text= ${ JSON . stringify ( text . slice ( 0 , 120 ) ) } ` ) ;
2026-03-23 13:58:41 +08:00
const normalizedPartial = normalizeKnowledgeAlias ( text ) ;
session . latestUserText = normalizedPartial ;
session . _lastPartialAt = now ;
2026-03-31 09:46:40 +08:00
// KB-First: 非闲聊文本一律提前阻断S2S音频, 防止有害内容播出
if ( normalizedPartial . length >= 6 && ! session . blockUpstreamAudio && ! isPureChitchat ( normalizedPartial ) ) {
2026-03-18 16:32:27 +08:00
session . blockUpstreamAudio = true ;
2026-03-23 13:58:41 +08:00
session . currentTtsType = 'default' ;
sendJson ( session . client , { type : 'tts_reset' , reason : 'early_block' } ) ;
2026-03-31 09:46:40 +08:00
console . log ( ` [NativeVoice] early block: non-chitchat partial session= ${ session . sessionId } text= ${ JSON . stringify ( text . slice ( 0 , 80 ) ) } ` ) ;
2026-03-23 13:58:41 +08:00
// KB预查询: 提前启动知识库查询, 减少final ASR后的等待时间
const kbPrequeryDebounce = 600 ;
if ( normalizedPartial . length >= 8 && ( ! session . _kbPrequeryStartedAt || now - session . _kbPrequeryStartedAt > kbPrequeryDebounce ) ) {
session . _kbPrequeryStartedAt = now ;
session . _kbPrequeryText = normalizedPartial ;
console . log ( ` [NativeVoice] KB prequery started session= ${ session . sessionId } text= ${ JSON . stringify ( normalizedPartial . slice ( 0 , 80 ) ) } ` ) ;
session . pendingKbPrequery = resolveReply ( session . sessionId , session , normalizedPartial ) . catch ( ( err ) => {
console . warn ( ` [NativeVoice] KB prequery failed session= ${ session . sessionId } : ` , err . message ) ;
return null ;
} ) ;
}
2026-03-18 16:32:27 +08:00
}
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
// 用户开口说话时立即打断所有 AI 播放(包括 S2S external_rag 音频)
const isS2SAudioPlaying = ! session . blockUpstreamAudio && session . currentTtsType === 'external_rag' ;
if ( isDirectSpeaking || isChatTTSSpeaking || isS2SAudioPlaying ) {
console . log ( ` [NativeVoice] user barge-in (partial) session= ${ session . sessionId } direct= ${ isDirectSpeaking } chatTTS= ${ isChatTTSSpeaking } s2sRag= ${ isS2SAudioPlaying } ` ) ;
2026-03-16 14:43:51 +08:00
session . directSpeakUntil = 0 ;
session . isSendingChatTTSText = false ;
session . chatTTSUntil = 0 ;
2026-03-23 13:58:41 +08:00
session . currentSpeechText = '' ;
2026-03-16 14:43:51 +08:00
clearTimeout ( session . chatTTSTimer ) ;
2026-03-17 11:00:09 +08:00
if ( session . suppressReplyTimer || session . suppressUpstreamUntil ) {
clearUpstreamSuppression ( session ) ;
}
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
// 阻断 S2S 音频转发,防止用户打断后仍听到残留音频
session . blockUpstreamAudio = true ;
2026-03-17 11:00:09 +08:00
}
// 无论当前是否在播放,都发送 tts_reset 确保客户端停止所有音频播放
if ( ! session . _lastBargeInResetAt || now - session . _lastBargeInResetAt > 500 ) {
session . _lastBargeInResetAt = now ;
2026-03-16 14:43:51 +08:00
sendJson ( session . client , { type : 'tts_reset' , reason : 'user_bargein' } ) ;
}
2026-03-13 13:06:46 +08:00
sendJson ( session . client , {
type : 'subtitle' ,
role : 'user' ,
2026-03-23 13:58:41 +08:00
text : text ,
2026-03-13 13:06:46 +08:00
isFinal : false ,
sequence : ` native_partial_ ${ Date . now ( ) } ` ,
} ) ;
}
return ;
}
if ( message . event === 459 || ( message . event === 451 && isFinalUserPayload ( payload ) ) ) {
2026-03-23 13:58:41 +08:00
const rawFinalText = extractUserText ( payload , session . sessionId ) || '' ;
const finalText = normalizeKnowledgeAlias ( rawFinalText ) || session . latestUserText || '' ;
const now459 = Date . now ( ) ;
// 双事件去重: S2S可能同时发送event 459和event 451(is_final),用去标点归一化文本+时间窗口去重
const normalizedForDedup = finalText . replace ( /[,。!?、,.?!\s]/g , '' ) ;
if ( normalizedForDedup && session . _lastFinalNormalized === normalizedForDedup && now459 - ( session . _lastFinalAt || 0 ) < 1500 ) {
console . log ( ` [NativeVoice] duplicate final ignored (event= ${ message . event } ) session= ${ session . sessionId } text= ${ JSON . stringify ( finalText . slice ( 0 , 80 ) ) } ` ) ;
return ;
}
session . _lastFinalNormalized = normalizedForDedup ;
session . _lastFinalAt = now459 ;
// TTS回声检测( final级别) : 播放期间ASR最终识别文本如果是当前播放文本的子串, 判定为回声
const isDirectSpeaking459 = session . directSpeakUntil && now459 < session . directSpeakUntil ;
const isChatTTSSpeaking459 = session . isSendingChatTTSText && ( session . chatTTSUntil || 0 ) > now459 ;
if ( ( isDirectSpeaking459 || isChatTTSSpeaking459 ) && session . currentSpeechText && finalText ) {
const normalizedFinal = finalText . replace ( /[,。!?、,.\s]/g , '' ) ;
const normalizedSpeech = session . currentSpeechText . replace ( /[,。!?、,.\s]/g , '' ) ;
if ( normalizedFinal . length <= 4 || normalizedSpeech . includes ( normalizedFinal ) ) {
console . log ( ` [NativeVoice] TTS echo detected in final, ignoring session= ${ session . sessionId } text= ${ JSON . stringify ( finalText . slice ( 0 , 80 ) ) } ` ) ;
return ;
}
}
// Greeting保护窗口
if ( session . greetingProtectionUntil && now459 < session . greetingProtectionUntil && finalText ) {
console . log ( ` [NativeVoice] greeting protection active, ignoring final session= ${ session . sessionId } text= ${ JSON . stringify ( finalText . slice ( 0 , 80 ) ) } ` ) ;
return ;
}
2026-03-13 13:06:46 +08:00
console . log ( ` [NativeVoice] upstream final session= ${ session . sessionId } text= ${ JSON . stringify ( finalText . slice ( 0 , 120 ) ) } ` ) ;
if ( session . directSpeakUntil && Date . now ( ) < session . directSpeakUntil ) {
console . log ( ` [NativeVoice] user interrupt during speaking session= ${ session . sessionId } ` ) ;
session . directSpeakUntil = 0 ;
session . isSendingChatTTSText = false ;
session . chatTTSUntil = 0 ;
2026-03-23 13:58:41 +08:00
session . currentSpeechText = '' ;
2026-03-13 13:06:46 +08:00
clearTimeout ( session . chatTTSTimer ) ;
2026-03-16 14:43:51 +08:00
sendJson ( session . client , { type : 'tts_reset' , reason : 'user_bargein' } ) ;
2026-03-17 11:00:09 +08:00
if ( session . suppressReplyTimer || session . suppressUpstreamUntil ) {
clearUpstreamSuppression ( session ) ;
}
2026-03-16 14:43:51 +08:00
} else if ( session . isSendingChatTTSText && ( session . chatTTSUntil || 0 ) > Date . now ( ) ) {
console . log ( ` [NativeVoice] user interrupt chatTTS during speaking session= ${ session . sessionId } ` ) ;
session . isSendingChatTTSText = false ;
session . chatTTSUntil = 0 ;
2026-03-23 13:58:41 +08:00
session . currentSpeechText = '' ;
2026-03-16 14:43:51 +08:00
clearTimeout ( session . chatTTSTimer ) ;
sendJson ( session . client , { type : 'tts_reset' , reason : 'user_bargein' } ) ;
2026-03-17 11:00:09 +08:00
if ( session . suppressReplyTimer || session . suppressUpstreamUntil ) {
clearUpstreamSuppression ( session ) ;
}
2026-03-13 13:06:46 +08:00
}
2026-03-23 13:58:41 +08:00
if ( persistUserSpeech ( session , rawFinalText || finalText ) ) {
2026-03-16 14:43:51 +08:00
session . blockUpstreamAudio = true ;
2026-03-23 13:58:41 +08:00
session . currentTtsType = 'default' ;
session . assistantStreamBuffer = '' ;
session . assistantStreamReplyId = '' ;
2026-03-17 11:00:09 +08:00
sendJson ( session . client , { type : 'tts_reset' , reason : 'new_turn' } ) ;
2026-03-13 13:06:46 +08:00
processReply ( session , finalText ) . catch ( ( error ) => {
console . error ( '[NativeVoice] processReply error:' , error . message ) ;
} ) ;
}
return ;
}
sendJson ( session . client , {
type : 'event' ,
event : message . event ,
payload ,
} ) ;
}
function attachClientHandlers ( session ) {
session . client . on ( 'message' , async ( raw , isBinary ) => {
if ( isBinary ) {
if ( session . upstream && session . upstream . readyState === WebSocket . OPEN && session . upstreamReady ) {
session . upstream . send ( createAudioMessage ( session . sessionId , raw ) ) ;
2026-03-31 09:46:40 +08:00
resetAudioKeepalive ( session ) ;
2026-03-13 13:06:46 +08:00
}
return ;
}
let parsed ;
try {
parsed = JSON . parse ( raw . toString ( 'utf8' ) ) ;
} catch ( error ) {
sendJson ( session . client , { type : 'error' , error : 'invalid client json' } ) ;
return ;
}
if ( parsed . type === 'start' ) {
2026-03-24 17:19:36 +08:00
session . userId = parsed . userId || session . userId || null ;
const remoteProfileResult = await getAssistantProfile ( { userId : session . userId } ) ;
2026-03-23 13:58:41 +08:00
const assistantProfile = resolveAssistantProfile ( {
2026-03-24 17:19:36 +08:00
... ( remoteProfileResult . profile || { } ) ,
2026-03-23 13:58:41 +08:00
... ( session . assistantProfile || { } ) ,
... ( ( parsed . assistantProfile && typeof parsed . assistantProfile === 'object' ) ? parsed . assistantProfile : { } ) ,
} ) ;
session . assistantProfile = assistantProfile ;
2026-04-03 10:19:16 +08:00
session . botName = parsed . botName || getAssistantDisplayName ( assistantProfile ) || DEFAULT _VOICE _BOT _NAME ;
2026-03-31 09:46:40 +08:00
session . systemRole = buildVoiceSystemRole ( assistantProfile ) ;
2026-03-23 13:58:41 +08:00
session . speakingStyle = parsed . speakingStyle || session . speakingStyle || DEFAULT _VOICE _SPEAKING _STYLE ;
2026-03-16 14:43:51 +08:00
session . speaker = parsed . speaker || process . env . VOLC _S2S _SPEAKER _ID || 'zh_female_vv_jupiter_bigtts' ;
session . modelVersion = parsed . modelVersion || 'O' ;
2026-03-31 09:46:40 +08:00
session . greetingText = buildVoiceGreeting ( assistantProfile ) ;
2026-03-17 11:00:09 +08:00
// 立即发送 ready, 不等 upstream event 150, 大幅缩短前端等待时间
sendReady ( session ) ;
2026-03-13 13:06:46 +08:00
session . upstream = createUpstreamConnection ( session ) ;
loadHandoffSummaryForVoice ( session ) . catch ( ( error ) => {
console . warn ( '[NativeVoice] async loadHandoffSummaryForVoice failed:' , error . message ) ;
} ) ;
return ;
}
if ( parsed . type === 'stop' ) {
session . client . close ( ) ;
return ;
}
2026-03-17 11:00:09 +08:00
if ( parsed . type === 'replay_greeting' ) {
replayGreeting ( session ) . catch ( ( error ) => {
console . warn ( '[NativeVoice] replayGreeting failed:' , error . message ) ;
} ) ;
return ;
}
2026-03-13 13:06:46 +08:00
if ( parsed . type === 'text' && parsed . text ) {
2026-03-18 17:43:13 +08:00
if ( persistUserSpeech ( session , parsed . text ) ) {
processReply ( session , parsed . text , session . latestUserTurnSeq ) . catch ( ( error ) => {
console . error ( '[NativeVoice] text processReply failed:' , error . message ) ;
} ) ;
}
2026-03-13 13:06:46 +08:00
}
} ) ;
session . client . on ( 'close' , ( ) => {
clearTimeout ( session . chatTTSTimer ) ;
clearTimeout ( session . greetingTimer ) ;
clearTimeout ( session . greetingAckTimer ) ;
clearTimeout ( session . readyTimer ) ;
2026-03-17 11:00:09 +08:00
clearTimeout ( session . suppressReplyTimer ) ;
2026-03-16 14:43:51 +08:00
clearTimeout ( session . idleTimer ) ;
2026-03-31 09:46:40 +08:00
clearInterval ( session . _audioKeepaliveTimer ) ;
2026-03-13 13:06:46 +08:00
if ( session . upstream && session . upstream . readyState === WebSocket . OPEN ) {
session . upstream . close ( ) ;
}
2026-04-03 10:19:16 +08:00
// 会话结束时持久化摘要到 MySQL
persistFinalSummary ( session ) . catch ( ( err ) => {
console . warn ( '[NativeVoice] persistFinalSummary failed:' , err . message ) ;
} ) ;
2026-03-13 13:06:46 +08:00
sessions . delete ( session . sessionId ) ;
} ) ;
}
function createUpstreamConnection ( session ) {
const upstream = new WebSocket ( 'wss://openspeech.bytedance.com/api/v3/realtime/dialogue' , {
headers : {
'X-Api-Resource-Id' : 'volc.speech.dialog' ,
'X-Api-Access-Key' : process . env . VOLC _S2S _TOKEN ,
'X-Api-App-Key' : process . env . VOLC _DIALOG _APP _KEY || 'PlgvMymc7f3tQnJ6' ,
'X-Api-App-ID' : process . env . VOLC _S2S _APP _ID ,
'X-Api-Connect-Id' : session . sessionId ,
} ,
} ) ;
upstream . on ( 'open' , ( ) => {
upstream . send ( createStartConnectionMessage ( ) ) ;
upstream . send ( createStartSessionMessage ( session . sessionId , buildStartSessionPayload ( session ) ) ) ;
} ) ;
upstream . on ( 'message' , ( data , isBinary ) => {
if ( ! isBinary && typeof data === 'string' ) {
sendJson ( session . client , { type : 'server_text' , text : data } ) ;
return ;
}
handleUpstreamMessage ( session , Buffer . isBuffer ( data ) ? data : Buffer . from ( data ) ) ;
} ) ;
upstream . on ( 'error' , ( error ) => {
2026-03-16 14:43:51 +08:00
console . error ( '[NativeVoice] upstream ws error:' , error . message ) ;
sendJson ( session . client , { type : 'error' , error : ` 语音服务连接异常: ${ error . message } ` } ) ;
2026-03-13 13:06:46 +08:00
} ) ;
2026-03-16 14:43:51 +08:00
upstream . on ( 'close' , ( code ) => {
console . log ( ` [NativeVoice] upstream closed session= ${ session . sessionId } code= ${ code } ` ) ;
session . upstreamReady = false ;
2026-03-31 09:46:40 +08:00
clearInterval ( session . _audioKeepaliveTimer ) ;
session . _audioKeepaliveTimer = null ;
2026-03-16 14:43:51 +08:00
sendJson ( session . client , { type : 'upstream_closed' , code } ) ;
setTimeout ( ( ) => {
if ( session . client && session . client . readyState === WebSocket . OPEN ) {
session . client . close ( ) ;
}
} , 3000 ) ;
2026-03-13 13:06:46 +08:00
} ) ;
return upstream ;
}
function createSession ( client , sessionId ) {
2026-03-23 13:58:41 +08:00
const assistantProfile = resolveAssistantProfile ( ) ;
2026-03-13 13:06:46 +08:00
const session = {
sessionId ,
client ,
upstream : null ,
upstreamReady : false ,
isSendingChatTTSText : false ,
latestUserText : '' ,
2026-03-18 17:43:13 +08:00
latestUserTurnSeq : 0 ,
2026-03-13 13:06:46 +08:00
queuedUserText : '' ,
2026-03-18 17:43:13 +08:00
queuedUserTurnSeq : 0 ,
2026-03-13 13:06:46 +08:00
processingReply : false ,
2026-03-16 14:43:51 +08:00
blockUpstreamAudio : false ,
2026-03-13 13:06:46 +08:00
directSpeakUntil : 0 ,
queuedReplyTimer : null ,
lastPersistedAssistantText : '' ,
lastPersistedAssistantAt : 0 ,
assistantStreamBuffer : '' ,
assistantStreamReplyId : '' ,
currentTtsType : '' ,
2026-03-23 13:58:41 +08:00
currentSpeechText : '' ,
greetingProtectionUntil : 0 ,
_echoLogOnce : false ,
_fillerActive : false ,
_pendingExternalRagReply : false ,
2026-04-03 10:19:16 +08:00
_pendingEvidencePack : null ,
2026-03-23 13:58:41 +08:00
_lastPartialAt : 0 ,
pendingKbPrequery : null ,
_kbPrequeryText : '' ,
_kbPrequeryStartedAt : 0 ,
_lastKbTopic : '' ,
_lastKbHitAt : 0 ,
assistantProfile ,
2026-04-03 10:19:16 +08:00
botName : getAssistantDisplayName ( assistantProfile ) ,
2026-03-23 13:58:41 +08:00
systemRole : buildVoiceSystemRole ( assistantProfile ) ,
speakingStyle : DEFAULT _VOICE _SPEAKING _STYLE ,
2026-03-16 14:43:51 +08:00
speaker : process . env . VOLC _S2S _SPEAKER _ID || 'zh_female_vv_jupiter_bigtts' ,
modelVersion : 'O' ,
2026-03-23 13:58:41 +08:00
greetingText : buildVoiceGreeting ( assistantProfile ) ,
2026-03-13 13:06:46 +08:00
hasSentGreeting : false ,
greetingTimer : null ,
greetingAckTimer : null ,
pendingGreetingAck : false ,
greetingRetryCount : 0 ,
readyTimer : null ,
readySent : false ,
handoffSummary : '' ,
handoffSummaryUsed : false ,
awaitingUpstreamReply : false ,
pendingAssistantSource : null ,
pendingAssistantToolName : null ,
pendingAssistantMeta : null ,
2026-03-23 13:58:41 +08:00
pendingAssistantTurnSeq : 0 ,
lastDeliveredAssistantTurnSeq : 0 ,
2026-03-17 11:00:09 +08:00
suppressReplyTimer : null ,
suppressUpstreamUntil : 0 ,
2026-03-16 14:43:51 +08:00
idleTimer : null ,
lastActivityAt : Date . now ( ) ,
2026-03-17 11:00:09 +08:00
_lastBargeInResetAt : 0 ,
_audioBlockLogOnce : false ,
2026-03-23 13:58:41 +08:00
_lastFinalNormalized : '' ,
_lastFinalAt : 0 ,
2026-03-31 09:46:40 +08:00
_audioKeepaliveTimer : null ,
2026-04-03 10:19:16 +08:00
_turnCount : 0 ,
2026-03-13 13:06:46 +08:00
} ;
sessions . set ( sessionId , session ) ;
attachClientHandlers ( session ) ;
return session ;
}
function setupNativeVoiceGateway ( server ) {
const wss = new WebSocketServer ( { server , path : '/ws/realtime-dialog' } ) ;
wss . on ( 'connection' , async ( client , req ) => {
const parsed = url . parse ( req . url , true ) ;
const sessionId = parsed . query ? . sessionId ;
if ( ! sessionId ) {
client . close ( ) ;
return ;
}
const userId = parsed . query ? . userId || null ;
const session = createSession ( client , sessionId ) ;
session . userId = userId ;
try {
await db . createSession ( sessionId , userId , 'voice' ) ;
} catch ( error ) {
console . warn ( '[NativeVoice][DB] createSession failed:' , error . message ) ;
}
sendJson ( client , { type : 'connected' , sessionId } ) ;
} ) ;
return wss ;
}
module . exports = {
setupNativeVoiceGateway ,
} ;