feat(s2s): add S2S text dialog via /ws/realtime-text + event 501 ChatTextQuery
Dual-channel S2S architecture with full isolation between voice and text links: Backend (Java): - VolcRealtimeProtocol: add createChatTextQueryMessage (event 501) - VoiceSessionState: add textMode / playAudioReply / disableGreeting fields - VoiceWebSocketConfig: register second path /ws/realtime-text (same handler) - VoiceWebSocketHandler: detect text mode from URL path - VoiceGatewayService: * afterConnectionEstablished: overload with textMode flag * handleStart: parse playAudioReply / disableGreeting from client * buildStartSessionPayload: inject input_mod=text for text mode * handleDirectText: text mode sends event 501 directly, skip processReply * handleBinaryMessage: reject client audio in text mode * handleUpstreamBinary: drop S2S audio if text mode + no playback * startAudioKeepalive: skip entirely in text mode (no audio channel) * sendGreeting: skip greeting if disableGreeting=true Frontend (test2 + delivery): - nativeVoiceService: connect accepts clientMode/playAudioReply/disableGreeting * resolveWebSocketUrl accepts wsPath param * Text mode: no microphone capture, no playback context (unless playAudioReply) * New sendText() method for event 501 payload * handleAudioMessage drops audio in text mode without playback * Export NativeVoiceService class for multi-instance usage - ChatPanel (test2): new useS2S / playAudioReply props * useS2S=true: creates NativeVoiceService instance, connects to /ws/realtime-text * subtitle events drive streaming UI, assistant_pending drives loading state * handleSend routes to WebSocket in S2S mode, HTTP/SSE in Coze mode * Voice link code path zero-changed Verification: mvn test VoiceGatewaySmokeTest 20/20 pass, voice link regression-free
This commit is contained in:
@@ -2,8 +2,9 @@ import { useState, useRef, useEffect, useCallback } from 'react';
|
||||
import { Send, Bot, User, Loader2, ArrowLeft, Sparkles, Wrench, StopCircle } from 'lucide-react';
|
||||
import { startChatSession, sendMessageStream } from '../services/chatApi';
|
||||
import { getSessionHistory } from '../services/voiceApi';
|
||||
import { NativeVoiceService } from '../services/nativeVoiceService';
|
||||
|
||||
export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack, onMessagesChange }) {
|
||||
export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack, onMessagesChange, useS2S = false, playAudioReply = false }) {
|
||||
const [messages, setMessages] = useState([]);
|
||||
const [input, setInput] = useState('');
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
@@ -14,51 +15,143 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
|
||||
const scrollRef = useRef(null);
|
||||
const inputRef = useRef(null);
|
||||
const abortRef = useRef(null);
|
||||
const s2sServiceRef = useRef(null);
|
||||
const s2sStreamingIdRef = useRef(null);
|
||||
|
||||
// S2S text mode: establish WebSocket to /ws/realtime-text
|
||||
useEffect(() => {
|
||||
if (!useS2S || !sessionId) {
|
||||
return undefined;
|
||||
}
|
||||
const svc = new NativeVoiceService();
|
||||
s2sServiceRef.current = svc;
|
||||
|
||||
svc.on('onSubtitle', (data) => {
|
||||
if (!data || !data.role) return;
|
||||
if (data.role === 'user') {
|
||||
// User subtitle is just an echo of what we already inserted; skip
|
||||
return;
|
||||
}
|
||||
// assistant subtitle: streaming chunks (isFinal=false) or final (isFinal=true)
|
||||
const assistantId = s2sStreamingIdRef.current;
|
||||
if (!assistantId) return;
|
||||
setMessages((prev) => prev.map((m) => (
|
||||
m.id === assistantId
|
||||
? { ...m, content: data.text || '', streaming: !data.isFinal }
|
||||
: m
|
||||
)));
|
||||
if (data.isFinal) {
|
||||
setIsLoading(false);
|
||||
setStreamingId(null);
|
||||
s2sStreamingIdRef.current = null;
|
||||
inputRef.current?.focus();
|
||||
}
|
||||
});
|
||||
svc.on('onAssistantPending', (active) => {
|
||||
setIsLoading(!!active);
|
||||
});
|
||||
svc.on('onError', (err) => {
|
||||
setError(err?.message || 'S2S 文字模式错误');
|
||||
setIsLoading(false);
|
||||
setStreamingId(null);
|
||||
s2sStreamingIdRef.current = null;
|
||||
});
|
||||
svc.on('onIdleTimeout', () => {
|
||||
setError('S2S 连接超时,已断开。请刷新页面重连');
|
||||
setIsInitialized(false);
|
||||
});
|
||||
svc.on('onConnectionStateChange', (state) => {
|
||||
if (state === 'connected') {
|
||||
// wait for onReady (handled via promise in connect)
|
||||
} else if (state === 'disconnected' || state === 'error') {
|
||||
setIsInitialized(false);
|
||||
}
|
||||
});
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
await svc.connect({
|
||||
sessionId,
|
||||
userId: settings?.userId || '',
|
||||
botName: settings?.botName || '大沃',
|
||||
speaker: settings?.speaker || 'zh_female_vv_jupiter_bigtts',
|
||||
modelVersion: settings?.modelVersion || 'O',
|
||||
clientMode: 'text',
|
||||
playAudioReply: !!playAudioReply,
|
||||
disableGreeting: true,
|
||||
});
|
||||
setIsInitialized(true);
|
||||
} catch (e) {
|
||||
setError(`S2S 连接失败:${e?.message || e}`);
|
||||
}
|
||||
})();
|
||||
|
||||
return () => {
|
||||
svc.disconnect().catch(() => {});
|
||||
s2sServiceRef.current = null;
|
||||
s2sStreamingIdRef.current = null;
|
||||
};
|
||||
}, [useS2S, sessionId, settings?.userId, settings?.botName, settings?.speaker, settings?.modelVersion, playAudioReply]);
|
||||
|
||||
// 初始化:创建聊天会话,优先从数据库加载完整历史
|
||||
useEffect(() => {
|
||||
if (useS2S) {
|
||||
// S2S mode handles init in its own effect
|
||||
return;
|
||||
}
|
||||
async function init() {
|
||||
// 1. 从数据库加载历史(独立于 Coze 会话,不受其失败影响)
|
||||
let historyMsgs = [];
|
||||
try {
|
||||
// 启动后端聊天会话(后端会从 DB 加载历史注入 Coze 上下文)
|
||||
await startChatSession(sessionId, voiceSubtitles);
|
||||
setIsInitialized(true);
|
||||
|
||||
// 从数据库加载完整对话历史(包含语音通话中的工具结果)
|
||||
let historyMsgs = [];
|
||||
try {
|
||||
const historyData = await getSessionHistory(sessionId, 20);
|
||||
if (historyData?.messages?.length > 0) {
|
||||
historyMsgs = historyData.messages.map((m, i) => ({
|
||||
id: `history-${i}`,
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
fromVoice: true,
|
||||
}));
|
||||
console.log(`[ChatPanel] Loaded ${historyMsgs.length} messages from DB`);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[ChatPanel] DB history load failed, falling back to subtitles:', e.message);
|
||||
}
|
||||
|
||||
// 如果数据库没有历史,回退到 voiceSubtitles
|
||||
if (historyMsgs.length === 0 && voiceSubtitles && voiceSubtitles.length > 0) {
|
||||
historyMsgs = voiceSubtitles.map((s, i) => ({
|
||||
id: `voice-${i}`,
|
||||
role: s.role === 'user' ? 'user' : 'assistant',
|
||||
content: s.text,
|
||||
const historyData = await getSessionHistory(sessionId, 20);
|
||||
if (historyData?.messages?.length > 0) {
|
||||
historyMsgs = historyData.messages.map((m, i) => ({
|
||||
id: `history-${i}`,
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
fromVoice: true,
|
||||
}));
|
||||
console.log(`[ChatPanel] Loaded ${historyMsgs.length} messages from DB`);
|
||||
}
|
||||
|
||||
if (historyMsgs.length > 0) {
|
||||
setMessages(historyMsgs);
|
||||
}
|
||||
|
||||
inputRef.current?.focus();
|
||||
} catch (err) {
|
||||
console.error('[ChatPanel] Init failed:', err);
|
||||
setError('聊天会话初始化失败');
|
||||
} catch (e) {
|
||||
console.warn('[ChatPanel] DB history load failed, falling back to subtitles:', e.message);
|
||||
}
|
||||
|
||||
// 如果数据库没有历史,回退到 voiceSubtitles
|
||||
if (historyMsgs.length === 0 && voiceSubtitles && voiceSubtitles.length > 0) {
|
||||
historyMsgs = voiceSubtitles.map((s, i) => ({
|
||||
id: `voice-${i}`,
|
||||
role: s.role === 'user' ? 'user' : 'assistant',
|
||||
content: s.text,
|
||||
fromVoice: true,
|
||||
}));
|
||||
}
|
||||
|
||||
if (historyMsgs.length > 0) {
|
||||
setMessages(historyMsgs);
|
||||
}
|
||||
|
||||
// 2. 启动后端聊天会话(自动重试3次,间隔2秒)
|
||||
let initOk = false;
|
||||
for (let attempt = 0; attempt < 3 && !initOk; attempt++) {
|
||||
try {
|
||||
if (attempt > 0) {
|
||||
console.log(`[ChatPanel] Retrying init (attempt ${attempt + 1}/3)...`);
|
||||
await new Promise(r => setTimeout(r, 2000));
|
||||
}
|
||||
await startChatSession(sessionId, voiceSubtitles);
|
||||
initOk = true;
|
||||
setIsInitialized(true);
|
||||
setError(null);
|
||||
} catch (err) {
|
||||
console.error(`[ChatPanel] Init attempt ${attempt + 1} failed:`, err.message);
|
||||
if (attempt === 2) {
|
||||
setError('聊天会话初始化失败,点击重试');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inputRef.current?.focus();
|
||||
}
|
||||
init();
|
||||
}, [sessionId, voiceSubtitles]);
|
||||
@@ -108,6 +201,20 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
|
||||
// 先插入一个空的 assistant 消息用于流式填充
|
||||
setMessages((prev) => [...prev, { id: assistantId, role: 'assistant', content: '', streaming: true }]);
|
||||
|
||||
// S2S text mode: send via WebSocket, subtitle/pending events drive UI
|
||||
if (useS2S) {
|
||||
const svc = s2sServiceRef.current;
|
||||
if (!svc) {
|
||||
setError('S2S 服务未就绪');
|
||||
setIsLoading(false);
|
||||
setStreamingId(null);
|
||||
return;
|
||||
}
|
||||
s2sStreamingIdRef.current = assistantId;
|
||||
svc.sendText(text);
|
||||
return;
|
||||
}
|
||||
|
||||
const abort = sendMessageStream(sessionId, text, {
|
||||
onChunk: (chunk) => {
|
||||
setMessages((prev) =>
|
||||
@@ -117,6 +224,12 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
|
||||
onToolCall: (tools) => {
|
||||
setToolsInUse(tools);
|
||||
},
|
||||
onStreamReset: () => {
|
||||
// 内容安全拦截:清空已累积的流式文本,等待 done 事件的安全回复
|
||||
setMessages((prev) =>
|
||||
prev.map((m) => (m.id === assistantId ? { ...m, content: '' } : m))
|
||||
);
|
||||
},
|
||||
onDone: (fullContent) => {
|
||||
setMessages((prev) =>
|
||||
prev.map((m) => (m.id === assistantId ? { ...m, content: fullContent, streaming: false } : m))
|
||||
@@ -137,7 +250,7 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
|
||||
});
|
||||
|
||||
abortRef.current = abort;
|
||||
}, [input, isLoading, sessionId]);
|
||||
}, [input, isLoading, sessionId, useS2S]);
|
||||
|
||||
const handleKeyDown = (e) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
@@ -164,7 +277,9 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="text-sm font-semibold text-white leading-tight">{settings.botName}</h3>
|
||||
<p className="text-[10px] text-slate-500 leading-tight">文字对话模式 · 方舟 LLM</p>
|
||||
<p className="text-[10px] text-slate-500 leading-tight">
|
||||
{useS2S ? '文字对话模式 · S2S' : '文字对话模式 · 方舟 LLM'}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -262,6 +377,17 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
|
||||
{error && (
|
||||
<div className="mx-4 mb-2 px-3 py-1.5 rounded-lg bg-red-500/10 border border-red-500/20 text-red-400 text-xs">
|
||||
{error}
|
||||
{!isInitialized && (
|
||||
<button onClick={async () => {
|
||||
setError(null);
|
||||
try {
|
||||
await startChatSession(sessionId, voiceSubtitles);
|
||||
setIsInitialized(true);
|
||||
} catch (e) {
|
||||
setError('重试失败,请检查网络后再试');
|
||||
}
|
||||
}} className="ml-2 underline hover:text-red-300 font-medium">重试</button>
|
||||
)}
|
||||
<button onClick={() => setError(null)} className="ml-2 underline hover:text-red-300">关闭</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -25,7 +25,7 @@ class NativeVoiceService {
|
||||
};
|
||||
}
|
||||
|
||||
resolveWebSocketUrl(sessionId, userId) {
|
||||
resolveWebSocketUrl(sessionId, userId, wsPath = '/ws/realtime-dialog') {
|
||||
const query = new URLSearchParams({
|
||||
sessionId,
|
||||
userId: userId || '',
|
||||
@@ -43,16 +43,16 @@ class NativeVoiceService {
|
||||
} else if (base.endsWith('/api')) {
|
||||
base = base.slice(0, -'/api'.length);
|
||||
}
|
||||
return `${base}/ws/realtime-dialog?${query.toString()}`;
|
||||
return `${base}${wsPath}?${query.toString()}`;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `ws://${hostname || '127.0.0.1'}:3012/ws/realtime-dialog?${query.toString()}`;
|
||||
return `ws://${hostname || '127.0.0.1'}:3012${wsPath}?${query.toString()}`;
|
||||
}
|
||||
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
return `${protocol}//${window.location.host}/ws/realtime-dialog?${query.toString()}`;
|
||||
return `${protocol}//${window.location.host}${wsPath}?${query.toString()}`;
|
||||
}
|
||||
|
||||
emitConnectionState(state) {
|
||||
@@ -80,43 +80,66 @@ class NativeVoiceService {
|
||||
}
|
||||
}
|
||||
|
||||
async connect({ sessionId, userId, botName, systemRole, speakingStyle, modelVersion, speaker, greetingText }) {
|
||||
async connect({
|
||||
sessionId,
|
||||
userId,
|
||||
botName,
|
||||
systemRole,
|
||||
speakingStyle,
|
||||
modelVersion,
|
||||
speaker,
|
||||
greetingText,
|
||||
clientMode = 'voice',
|
||||
playAudioReply = false,
|
||||
disableGreeting = false,
|
||||
} = {}) {
|
||||
await this.disconnect();
|
||||
const wsUrl = this.resolveWebSocketUrl(sessionId, userId);
|
||||
this.clientMode = clientMode;
|
||||
this.playAudioReply = playAudioReply;
|
||||
const wsPath = clientMode === 'text' ? '/ws/realtime-text' : '/ws/realtime-dialog';
|
||||
const wsUrl = this.resolveWebSocketUrl(sessionId, userId, wsPath);
|
||||
this.emitConnectionState('connecting');
|
||||
this.playbackContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
if (this.playbackContext.state === 'suspended') {
|
||||
await this.playbackContext.resume().catch(() => {});
|
||||
}
|
||||
this.playbackTime = this.playbackContext.currentTime;
|
||||
|
||||
// 安全上下文检查: getUserMedia 需要 HTTPS 或 localhost
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
const errMsg = window.isSecureContext === false
|
||||
? '麦克风访问需要 HTTPS 连接,请使用 https:// 地址访问'
|
||||
: '当前浏览器不支持麦克风访问';
|
||||
this.emitConnectionState('error', errMsg);
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
|
||||
// 并行: 同时预获取麦克风和建立WS连接,节省500ms+
|
||||
const micPromise = navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
noiseSuppression: true,
|
||||
echoCancellation: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
video: false,
|
||||
}).catch((err) => {
|
||||
console.warn('[NativeVoice] Pre-fetch getUserMedia failed:', err.name, err.message);
|
||||
if (err.name === 'NotAllowedError' || err.message?.includes('Permission denied')) {
|
||||
const msg = '麦克风权限被拒绝,请在浏览器设置中允许本站访问麦克风后重试';
|
||||
this.emitConnectionState('error', msg);
|
||||
throw new Error(msg);
|
||||
// Audio playback context: only needed if we will receive audio (voice mode or text+playAudio)
|
||||
const needsPlayback = clientMode !== 'text' || playAudioReply;
|
||||
if (needsPlayback) {
|
||||
this.playbackContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
if (this.playbackContext.state === 'suspended') {
|
||||
await this.playbackContext.resume().catch(() => {});
|
||||
}
|
||||
return null;
|
||||
});
|
||||
this.playbackTime = this.playbackContext.currentTime;
|
||||
}
|
||||
|
||||
// Microphone capture: only needed in voice mode
|
||||
let micPromise = Promise.resolve(null);
|
||||
if (clientMode !== 'text') {
|
||||
// 安全上下文检查: getUserMedia 需要 HTTPS 或 localhost
|
||||
if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
|
||||
const errMsg = window.isSecureContext === false
|
||||
? '麦克风访问需要 HTTPS 连接,请使用 https:// 地址访问'
|
||||
: '当前浏览器不支持麦克风访问';
|
||||
this.emitConnectionState('error', errMsg);
|
||||
throw new Error(errMsg);
|
||||
}
|
||||
// 并行: 同时预获取麦克风和建立WS连接,节省500ms+
|
||||
micPromise = navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
noiseSuppression: true,
|
||||
echoCancellation: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
video: false,
|
||||
}).catch((err) => {
|
||||
console.warn('[NativeVoice] Pre-fetch getUserMedia failed:', err.name, err.message);
|
||||
if (err.name === 'NotAllowedError' || err.message?.includes('Permission denied')) {
|
||||
const msg = '麦克风权限被拒绝,请在浏览器设置中允许本站访问麦克风后重试';
|
||||
this.emitConnectionState('error', msg);
|
||||
throw new Error(msg);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
const CONNECTION_TIMEOUT_MS = 12000;
|
||||
|
||||
@@ -151,6 +174,9 @@ class NativeVoiceService {
|
||||
modelVersion,
|
||||
speaker,
|
||||
greetingText,
|
||||
clientMode,
|
||||
playAudioReply,
|
||||
disableGreeting: clientMode === 'text' ? (disableGreeting !== false) : disableGreeting,
|
||||
}));
|
||||
};
|
||||
|
||||
@@ -187,11 +213,21 @@ class NativeVoiceService {
|
||||
};
|
||||
});
|
||||
|
||||
// 文字模式:不启动麦克风采集
|
||||
if (clientMode === 'text') {
|
||||
return;
|
||||
}
|
||||
// 使用预获取的mediaStream(已并行获取),避免重复申请
|
||||
const preFetchedStream = await micPromise;
|
||||
await this.startCapture(preFetchedStream);
|
||||
}
|
||||
|
||||
sendText(text) {
|
||||
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(JSON.stringify({ type: 'text', text: String(text || '') }));
|
||||
}
|
||||
}
|
||||
|
||||
handleJsonMessage(raw) {
|
||||
try {
|
||||
const msg = JSON.parse(raw);
|
||||
@@ -246,6 +282,10 @@ class NativeVoiceService {
|
||||
}
|
||||
|
||||
handleAudioMessage(arrayBuffer) {
|
||||
// Text mode without playAudioReply: drop all incoming audio silently
|
||||
if (this.clientMode === 'text' && !this.playAudioReply) {
|
||||
return;
|
||||
}
|
||||
if (!this.playbackContext) {
|
||||
return;
|
||||
}
|
||||
@@ -441,3 +481,4 @@ class NativeVoiceService {
|
||||
|
||||
const nativeVoiceService = new NativeVoiceService();
|
||||
export default nativeVoiceService;
|
||||
export { NativeVoiceService };
|
||||
|
||||
Reference in New Issue
Block a user