fix(voice-gateway): S2S idle timeout + upstream send lock + iOS AudioContext suspended + port 3012→3013

- P0: S2S DialogAudioIdleTimeoutError now notifies client instead of force-closing, sets upstreamReady=false and cancels keepalive
- P0: Reduce audioKeepaliveIntervalMs from 20s to 8s to prevent S2S idle timeout
- P1: Add upstreamSendLock to prevent concurrent IllegalStateException: Send pending
- P1: iOS AudioContext suspended handling - buffer audio chunks and try resume after user interaction
- P1: disconnect() clears pendingAudioChunks and _resuming to prevent memory leak
- Fix: Frontend hardcoded port 3012→3013 in videoApi.js and vite.config.js
- Add complete Java backend source code to git tracking
This commit is contained in:
User
2026-04-16 19:16:11 +08:00
parent fe25229de7
commit ff6a63147b
93 changed files with 10557 additions and 23 deletions

View File

@@ -0,0 +1,122 @@
import axios from 'axios';
function resolveApiBaseURL(configured, path) {
if (configured) {
return configured;
}
if (typeof window === 'undefined') {
return path;
}
const hostname = window.location.hostname;
const port = window.location.port;
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
if (window.location.protocol === 'file:') {
return `http://127.0.0.1:3013${path}`;
}
if (!port || port === '80' || port === '443' || port === '3013') {
return path;
}
return `${protocol}//${hostname || '127.0.0.1'}:3013${path}`;
}
const chatApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_CHAT_API_BASE_URL, '/api/chat');
const api = axios.create({
baseURL: chatApiBaseURL,
timeout: 30000,
});
export async function startChatSession(sessionId, voiceSubtitles = [], systemPrompt = '') {
const { data } = await api.post('/start', { sessionId, voiceSubtitles, systemPrompt });
return data;
}
export async function sendMessage(sessionId, message) {
const { data } = await api.post('/send', { sessionId, message });
return data;
}
/**
* SSE 流式发送消息,逐块回调
* @param {string} sessionId
* @param {string} message
* @param {object} callbacks - { onChunk, onToolCall, onDone, onError }
* @returns {function} abort - 调用可取消请求
*/
export function sendMessageStream(sessionId, message, { onChunk, onToolCall, onStreamReset, onDone, onError }) {
const controller = new AbortController();
(async () => {
try {
const response = await fetch(`${chatApiBaseURL}/send-stream`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ sessionId, message }),
signal: controller.signal,
});
if (!response.ok) {
const err = await response.json().catch(() => ({ error: response.statusText }));
onError?.(err.error || 'Request failed');
return;
}
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split('\n\n');
buffer = lines.pop() || '';
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed.startsWith('data: ')) continue;
try {
const data = JSON.parse(trimmed.slice(6));
switch (data.type) {
case 'chunk':
onChunk?.(data.content);
break;
case 'tool_call':
onToolCall?.(data.tools);
break;
case 'stream_reset':
onStreamReset?.(data.reason);
break;
case 'done':
onDone?.(data.content);
break;
case 'error':
onError?.(data.error);
break;
}
} catch (e) {
// skip malformed SSE
}
}
}
} catch (err) {
if (err.name !== 'AbortError') {
onError?.(err.message || 'Stream failed');
}
}
})();
return () => controller.abort();
}
export async function getChatHistory(sessionId) {
const { data } = await api.get(`/history/${sessionId}`);
return data.data;
}
export async function deleteChatSession(sessionId) {
const { data } = await api.delete(`/${sessionId}`);
return data;
}

View File

@@ -0,0 +1,426 @@
class NativeVoiceService {
constructor() {
this.ws = null;
this.mediaStream = null;
this.captureContext = null;
this.captureSource = null;
this.captureProcessor = null;
this.captureSilenceGain = null;
this.playbackContext = null;
this.playbackTime = 0;
this.activeSources = new Set();
this.pendingSamples = [];
this.pendingAudioChunks = [];
this._resuming = false;
this.readyResolver = null;
this.readyRejector = null;
this.callbacks = {
onSubtitle: null,
onConnectionStateChange: null,
onError: null,
onAssistantPending: null,
onDiagnostic: null,
onIdleTimeout: null,
onProductLink: null,
};
}
resolveWebSocketUrl(sessionId, userId) {
const query = new URLSearchParams({
sessionId,
userId: userId || '',
});
const configuredBase = import.meta.env.VITE_VOICE_WS_BASE_URL || import.meta.env.VITE_VOICE_API_BASE_URL || '';
if (configuredBase && !configuredBase.startsWith('/')) {
let base = configuredBase.replace(/\/$/, '');
if (base.startsWith('https://')) {
base = `wss://${base.slice('https://'.length)}`;
} else if (base.startsWith('http://')) {
base = `ws://${base.slice('http://'.length)}`;
}
if (base.endsWith('/api/voice')) {
base = base.slice(0, -'/api/voice'.length);
} else if (base.endsWith('/api')) {
base = base.slice(0, -'/api'.length);
}
return `${base}/ws/realtime-dialog?${query.toString()}`;
}
const hostname = window.location.hostname;
const port = window.location.port;
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3013') {
return `ws://${hostname || '127.0.0.1'}:3013/ws/realtime-dialog?${query.toString()}`;
}
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
return `${protocol}//${window.location.host}/ws/realtime-dialog?${query.toString()}`;
}
emitConnectionState(state) {
this.callbacks.onConnectionStateChange?.(state);
}
emitDiagnostic(type, payload) {
this.callbacks.onDiagnostic?.({ type, payload, timestamp: Date.now() });
}
resetPlaybackQueue() {
this.activeSources.forEach((source) => {
try {
source.stop();
} catch (_) {}
try {
source.disconnect();
} catch (_) {}
});
this.activeSources.clear();
if (this.playbackContext) {
this.playbackTime = this.playbackContext.currentTime + 0.02;
} else {
this.playbackTime = 0;
}
}
async connect({ sessionId, userId, botName, systemRole, speakingStyle, modelVersion, speaker, greetingText }) {
await this.disconnect();
const wsUrl = this.resolveWebSocketUrl(sessionId, userId);
this.emitConnectionState('connecting');
this.playbackContext = new (window.AudioContext || window.webkitAudioContext)();
if (this.playbackContext.state === 'suspended') {
await this.playbackContext.resume().catch(() => {});
}
this.playbackTime = this.playbackContext.currentTime;
// 并行: 同时预获取麦克风和建立WS连接节省500ms+
const micPromise = navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
noiseSuppression: true,
echoCancellation: true,
autoGainControl: true,
},
video: false,
}).catch((err) => {
console.warn('[NativeVoice] Pre-fetch getUserMedia failed:', err.message);
return null;
});
const CONNECTION_TIMEOUT_MS = 12000;
await new Promise((resolve, reject) => {
this.readyResolver = resolve;
this.readyRejector = reject;
const ws = new WebSocket(wsUrl);
ws.binaryType = 'arraybuffer';
this.ws = ws;
// 超时兜底:避免无限等待
const timeoutId = setTimeout(() => {
if (this.readyResolver) {
console.warn(`[NativeVoice] Connection timeout (${CONNECTION_TIMEOUT_MS}ms), forcing ready`);
this.readyResolver();
this.readyResolver = null;
this.readyRejector = null;
}
}, CONNECTION_TIMEOUT_MS);
const clearTimeoutOnSettle = () => clearTimeout(timeoutId);
ws.onopen = () => {
this.emitConnectionState('connected');
ws.send(JSON.stringify({
type: 'start',
sessionId,
userId,
botName,
systemRole,
speakingStyle,
modelVersion,
speaker,
greetingText,
}));
};
ws.onerror = () => {
clearTimeoutOnSettle();
const error = new Error('WebSocket connection failed');
this.callbacks.onError?.(error);
this.readyRejector?.(error);
this.readyResolver = null;
this.readyRejector = null;
reject(error);
};
ws.onclose = () => {
clearTimeoutOnSettle();
this.emitConnectionState('disconnected');
if (this.readyRejector) {
this.readyRejector(new Error('WebSocket closed before ready'));
this.readyResolver = null;
this.readyRejector = null;
}
};
ws.onmessage = (event) => {
if (typeof event.data === 'string') {
const peek = event.data;
if (peek.includes('"ready"')) {
clearTimeoutOnSettle();
}
this.handleJsonMessage(peek);
return;
}
this.handleAudioMessage(event.data);
};
});
// 使用预获取的mediaStream已并行获取避免重复申请
const preFetchedStream = await micPromise;
await this.startCapture(preFetchedStream);
}
handleJsonMessage(raw) {
try {
const msg = JSON.parse(raw);
if (msg.type === 'ready') {
this.readyResolver?.();
this.readyResolver = null;
this.readyRejector = null;
return;
}
if (msg.type === 'subtitle') {
this.callbacks.onSubtitle?.({
text: msg.text,
role: msg.role,
isFinal: !!msg.isFinal,
sequence: msg.sequence,
});
return;
}
if (msg.type === 'tts_reset') {
this.resetPlaybackQueue();
this.emitDiagnostic('tts_reset', msg);
return;
}
if (msg.type === 'assistant_pending') {
this.callbacks.onAssistantPending?.(!!msg.active);
return;
}
if (msg.type === 'idle_timeout') {
this.callbacks.onIdleTimeout?.(msg.timeout || 300000);
return;
}
if (msg.type === 'product_link') {
this.callbacks.onProductLink?.({
product: msg.product,
link: msg.link,
description: msg.description,
});
return;
}
if (msg.type === 'upstream_closed') {
this.callbacks.onError?.(new Error('语音服务已断开,请重新开始通话'));
return;
}
if (msg.type === 'error') {
this.callbacks.onError?.(new Error(msg.error || 'native voice error'));
return;
}
this.emitDiagnostic('ws_message', msg);
} catch (error) {
this.emitDiagnostic('ws_raw_text', raw);
}
}
handleAudioMessage(arrayBuffer) {
if (!this.playbackContext) {
return;
}
if (this.playbackContext.state === 'suspended') {
this.pendingAudioChunks.push(arrayBuffer);
this._tryResumePlayback();
return;
}
this._playPcm(arrayBuffer);
}
_playPcm(arrayBuffer) {
try {
const pcm16 = new Int16Array(arrayBuffer);
if (!pcm16.length) {
return;
}
const audioBuffer = this.playbackContext.createBuffer(1, pcm16.length, 24000);
const channel = audioBuffer.getChannelData(0);
for (let i = 0; i < pcm16.length; i += 1) {
channel[i] = pcm16[i] / 32768;
}
const source = this.playbackContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(this.playbackContext.destination);
this.activeSources.add(source);
source.onended = () => {
this.activeSources.delete(source);
try {
source.disconnect();
} catch (_) {}
};
const now = this.playbackContext.currentTime;
if (this.playbackTime < now) {
this.playbackTime = now + 0.02;
}
source.start(this.playbackTime);
this.playbackTime += audioBuffer.duration;
this.emitDiagnostic('audio_chunk', { samples: pcm16.length, duration: audioBuffer.duration });
} catch (err) {
console.warn('[NativeVoice] playPcm failed:', err.message);
}
}
async _tryResumePlayback() {
if (this._resuming) return;
this._resuming = true;
try {
await this.playbackContext.resume();
while (this.pendingAudioChunks.length > 0) {
this._playPcm(this.pendingAudioChunks.shift());
}
} catch (e) {
console.warn('[NativeVoice] resume failed:', e.message);
} finally {
this._resuming = false;
}
}
async startCapture(preFetchedStream) {
this.mediaStream = preFetchedStream || await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
noiseSuppression: true,
echoCancellation: true,
autoGainControl: true,
},
video: false,
});
this.captureContext = new (window.AudioContext || window.webkitAudioContext)();
this.captureSource = this.captureContext.createMediaStreamSource(this.mediaStream);
this.captureProcessor = this.captureContext.createScriptProcessor(4096, 1, 1);
this.captureSilenceGain = this.captureContext.createGain();
this.captureSilenceGain.gain.value = 0;
this.captureProcessor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0);
const downsampled = this.downsampleBuffer(input, this.captureContext.sampleRate, 16000);
for (let i = 0; i < downsampled.length; i += 1) {
this.pendingSamples.push(downsampled[i]);
}
while (this.pendingSamples.length >= 320) {
const chunk = this.pendingSamples.splice(0, 320);
const pcm = new Int16Array(chunk.length);
for (let i = 0; i < chunk.length; i += 1) {
const sample = Math.max(-1, Math.min(1, chunk[i]));
pcm[i] = sample < 0 ? sample * 32768 : sample * 32767;
}
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(pcm.buffer);
}
}
};
this.captureSource.connect(this.captureProcessor);
this.captureProcessor.connect(this.captureSilenceGain);
this.captureSilenceGain.connect(this.captureContext.destination);
}
downsampleBuffer(buffer, inputRate, outputRate) {
if (outputRate >= inputRate) {
return Array.from(buffer);
}
const sampleRateRatio = inputRate / outputRate;
const newLength = Math.round(buffer.length / sampleRateRatio);
const result = new Array(newLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
let accum = 0;
let count = 0;
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i += 1) {
accum += buffer[i];
count += 1;
}
result[offsetResult] = count > 0 ? accum / count : 0;
offsetResult += 1;
offsetBuffer = nextOffsetBuffer;
}
return result;
}
async setMuted(muted) {
this.mediaStream?.getAudioTracks().forEach((track) => {
track.enabled = !muted;
});
}
requestGreetingReplay() {
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify({ type: 'replay_greeting' }));
this.emitDiagnostic('replay_greeting', { sent: true });
}
}
async disconnect() {
if (this.captureProcessor) {
this.captureProcessor.disconnect();
this.captureProcessor.onaudioprocess = null;
this.captureProcessor = null;
}
if (this.captureSource) {
this.captureSource.disconnect();
this.captureSource = null;
}
if (this.captureSilenceGain) {
this.captureSilenceGain.disconnect();
this.captureSilenceGain = null;
}
if (this.captureContext) {
await this.captureContext.close().catch(() => {});
this.captureContext = null;
}
if (this.mediaStream) {
this.mediaStream.getTracks().forEach((track) => track.stop());
this.mediaStream = null;
}
if (this.ws) {
try {
if (this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify({ type: 'stop' }));
this.ws.close();
}
} catch (_) {}
this.ws = null;
}
if (this.playbackContext) {
this.resetPlaybackQueue();
await this.playbackContext.close().catch(() => {});
this.playbackContext = null;
}
this.playbackTime = 0;
this.pendingSamples = [];
this.pendingAudioChunks = [];
this._resuming = false;
this.emitConnectionState('disconnected');
}
on(event, callback) {
if (event in this.callbacks) {
this.callbacks[event] = callback;
}
}
off(event) {
if (event in this.callbacks) {
this.callbacks[event] = null;
}
}
}
const nativeVoiceService = new NativeVoiceService();
export default nativeVoiceService;

View File

@@ -0,0 +1,80 @@
import axios from 'axios';
function resolveApiBaseURL(configured, path) {
if (configured) {
return configured;
}
if (typeof window === 'undefined') {
return path;
}
const hostname = window.location.hostname;
const port = window.location.port;
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
if (window.location.protocol === 'file:') {
return `http://127.0.0.1:3013${path}`;
}
if (!port || port === '80' || port === '443' || port === '3013') {
return path;
}
return `${protocol}//${hostname || '127.0.0.1'}:3013${path}`;
}
const videoApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_VIDEO_API_BASE_URL, '/api/video');
const api = axios.create({
baseURL: videoApiBaseURL,
timeout: 60000,
});
/**
* 提交视频生成任务multipart/form-data
*/
export async function generateVideo({ prompt, product, username, template, size, seconds, image }) {
const formData = new FormData();
if (prompt) formData.append('prompt', prompt);
if (product) formData.append('product', product);
if (username) formData.append('username', username);
if (template) formData.append('template', template);
if (size) formData.append('size', size);
if (seconds) formData.append('seconds', String(seconds));
if (image) formData.append('image', image);
const { data } = await api.post('/generate', formData, {
headers: { 'Content-Type': 'multipart/form-data' },
});
return data;
}
/**
* 查询任务状态
*/
export async function getTaskStatus(taskId) {
const { data } = await api.get(`/task/${taskId}`);
return data;
}
/**
* 获取视频历史
*/
export async function getVideoHistory({ username, limit = 20, offset = 0 } = {}) {
const params = { limit, offset };
if (username) params.username = username;
const { data } = await api.get('/history', { params });
return data;
}
/**
* 获取管理配置
*/
export async function getAdminConfig() {
const { data } = await api.get('/admin/config');
return data;
}
/**
* 更新模型配置
*/
export async function updateAdminConfig(model) {
const { data } = await api.post('/admin/config', { model });
return data;
}

View File

@@ -0,0 +1,61 @@
import axios from 'axios';
function resolveApiBaseURL(configured, path) {
if (configured) {
return configured;
}
if (typeof window === 'undefined') {
return path;
}
const hostname = window.location.hostname;
const port = window.location.port;
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
if (window.location.protocol === 'file:') {
return `http://127.0.0.1:3013${path}`;
}
if (!port || port === '80' || port === '443' || port === '3013') {
return path;
}
return `${protocol}//${hostname || '127.0.0.1'}:3013${path}`;
}
const voiceApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_VOICE_API_BASE_URL, '/api/voice');
const sessionApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_SESSION_API_BASE_URL, '/api/session');
const api = axios.create({
baseURL: voiceApiBaseURL,
timeout: 10000,
});
export async function getVoiceConfig() {
const { data } = await api.get('/config');
return data.data;
}
// ========== 会话历史 API ==========
const sessionApi = axios.create({
baseURL: sessionApiBaseURL,
timeout: 10000,
});
export async function getSessionHistory(sessionId, limit = 20) {
const { data } = await sessionApi.get(`/${sessionId}/history`, { params: { limit } });
return data.data;
}
export async function switchSessionMode(sessionId, targetMode) {
const { data } = await sessionApi.post(`/${sessionId}/switch`, { targetMode });
return data.data;
}
export async function getSessionList(userId, limit = 50) {
const params = { limit };
if (userId) params.userId = userId;
const { data } = await sessionApi.get('/list', { params });
return data.data;
}
export async function deleteSessionById(sessionId) {
const { data } = await sessionApi.delete(`/${sessionId}`);
return data;
}

View File

@@ -0,0 +1,27 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
import tailwindcss from '@tailwindcss/vite';
const backendTarget = 'http://localhost:3013';
export default defineConfig({
plugins: [react(), tailwindcss()],
build: {
outDir: 'dist',
sourcemap: false,
},
server: {
port: 5174,
proxy: {
'/api': {
target: backendTarget,
changeOrigin: true,
},
'/ws': {
target: backendTarget,
changeOrigin: true,
ws: true,
},
},
},
});