feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目

This commit is contained in:
User
2026-03-13 13:06:46 +08:00
parent 9dab61345c
commit 5521b673f5
215 changed files with 7626 additions and 1876 deletions

View File

@@ -1,7 +1,26 @@
import axios from 'axios';
function resolveApiBaseURL(configured, path) {
if (configured) {
return configured;
}
if (typeof window === 'undefined') {
return path;
}
const hostname = window.location.hostname;
const port = window.location.port;
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
return `${protocol}//${hostname || '127.0.0.1'}:3012${path}`;
}
return path;
}
const chatApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_CHAT_API_BASE_URL, '/api/chat');
const api = axios.create({
baseURL: '/api/chat',
baseURL: chatApiBaseURL,
timeout: 30000,
});
@@ -27,7 +46,7 @@ export function sendMessageStream(sessionId, message, { onChunk, onToolCall, onD
(async () => {
try {
const response = await fetch('/api/chat/send-stream', {
const response = await fetch(`${chatApiBaseURL}/send-stream`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ sessionId, message }),

View File

@@ -0,0 +1,332 @@
class NativeVoiceService {
constructor() {
this.ws = null;
this.mediaStream = null;
this.captureContext = null;
this.captureSource = null;
this.captureProcessor = null;
this.captureSilenceGain = null;
this.playbackContext = null;
this.playbackTime = 0;
this.activeSources = new Set();
this.pendingSamples = [];
this.readyResolver = null;
this.readyRejector = null;
this.callbacks = {
onSubtitle: null,
onConnectionStateChange: null,
onError: null,
onAssistantPending: null,
onDiagnostic: null,
};
}
resolveWebSocketUrl(sessionId, userId) {
const query = new URLSearchParams({
sessionId,
userId: userId || '',
});
const configuredBase = import.meta.env.VITE_VOICE_WS_BASE_URL || import.meta.env.VITE_VOICE_API_BASE_URL || '';
if (configuredBase && !configuredBase.startsWith('/')) {
let base = configuredBase.replace(/\/$/, '');
if (base.startsWith('https://')) {
base = `wss://${base.slice('https://'.length)}`;
} else if (base.startsWith('http://')) {
base = `ws://${base.slice('http://'.length)}`;
}
if (base.endsWith('/api/voice')) {
base = base.slice(0, -'/api/voice'.length);
} else if (base.endsWith('/api')) {
base = base.slice(0, -'/api'.length);
}
return `${base}/ws/realtime-dialog?${query.toString()}`;
}
const hostname = window.location.hostname;
const port = window.location.port;
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
return `ws://${hostname || '127.0.0.1'}:3012/ws/realtime-dialog?${query.toString()}`;
}
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
return `${protocol}//${window.location.host}/ws/realtime-dialog?${query.toString()}`;
}
emitConnectionState(state) {
this.callbacks.onConnectionStateChange?.(state);
}
emitDiagnostic(type, payload) {
this.callbacks.onDiagnostic?.({ type, payload, timestamp: Date.now() });
}
resetPlaybackQueue() {
this.activeSources.forEach((source) => {
try {
source.stop();
} catch (_) {}
try {
source.disconnect();
} catch (_) {}
});
this.activeSources.clear();
if (this.playbackContext) {
this.playbackTime = this.playbackContext.currentTime + 0.02;
} else {
this.playbackTime = 0;
}
}
async connect({ sessionId, userId, botName, systemRole, speakingStyle, modelVersion, speaker }) {
await this.disconnect();
const wsUrl = this.resolveWebSocketUrl(sessionId, userId);
this.emitConnectionState('connecting');
this.playbackContext = new (window.AudioContext || window.webkitAudioContext)();
if (this.playbackContext.state === 'suspended') {
await this.playbackContext.resume().catch(() => {});
}
this.playbackTime = this.playbackContext.currentTime;
await new Promise((resolve, reject) => {
this.readyResolver = resolve;
this.readyRejector = reject;
const ws = new WebSocket(wsUrl);
ws.binaryType = 'arraybuffer';
this.ws = ws;
ws.onopen = () => {
this.emitConnectionState('connected');
ws.send(JSON.stringify({
type: 'start',
sessionId,
userId,
botName,
systemRole,
speakingStyle,
modelVersion,
speaker,
}));
};
ws.onerror = () => {
const error = new Error('WebSocket connection failed');
this.callbacks.onError?.(error);
this.readyRejector?.(error);
this.readyResolver = null;
this.readyRejector = null;
reject(error);
};
ws.onclose = () => {
this.emitConnectionState('disconnected');
if (this.readyRejector) {
this.readyRejector(new Error('WebSocket closed before ready'));
this.readyResolver = null;
this.readyRejector = null;
}
};
ws.onmessage = (event) => {
if (typeof event.data === 'string') {
this.handleJsonMessage(event.data);
return;
}
this.handleAudioMessage(event.data);
};
});
await this.startCapture();
}
handleJsonMessage(raw) {
try {
const msg = JSON.parse(raw);
if (msg.type === 'ready') {
this.readyResolver?.();
this.readyResolver = null;
this.readyRejector = null;
return;
}
if (msg.type === 'subtitle') {
this.callbacks.onSubtitle?.({
text: msg.text,
role: msg.role,
isFinal: !!msg.isFinal,
sequence: msg.sequence,
});
return;
}
if (msg.type === 'tts_reset') {
this.resetPlaybackQueue();
this.emitDiagnostic('tts_reset', msg);
return;
}
if (msg.type === 'assistant_pending') {
this.callbacks.onAssistantPending?.(!!msg.active);
return;
}
if (msg.type === 'error') {
this.callbacks.onError?.(new Error(msg.error || 'native voice error'));
return;
}
this.emitDiagnostic('ws_message', msg);
} catch (error) {
this.emitDiagnostic('ws_raw_text', raw);
}
}
handleAudioMessage(arrayBuffer) {
if (!this.playbackContext) {
return;
}
const pcm16 = new Int16Array(arrayBuffer);
if (!pcm16.length) {
return;
}
const audioBuffer = this.playbackContext.createBuffer(1, pcm16.length, 24000);
const channel = audioBuffer.getChannelData(0);
for (let i = 0; i < pcm16.length; i += 1) {
channel[i] = pcm16[i] / 32768;
}
const source = this.playbackContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(this.playbackContext.destination);
this.activeSources.add(source);
source.onended = () => {
this.activeSources.delete(source);
try {
source.disconnect();
} catch (_) {}
};
const now = this.playbackContext.currentTime;
if (this.playbackTime < now) {
this.playbackTime = now + 0.02;
}
source.start(this.playbackTime);
this.playbackTime += audioBuffer.duration;
this.emitDiagnostic('audio_chunk', { samples: pcm16.length, duration: audioBuffer.duration });
}
async startCapture() {
this.mediaStream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
noiseSuppression: true,
echoCancellation: true,
autoGainControl: true,
},
video: false,
});
this.captureContext = new (window.AudioContext || window.webkitAudioContext)();
this.captureSource = this.captureContext.createMediaStreamSource(this.mediaStream);
this.captureProcessor = this.captureContext.createScriptProcessor(4096, 1, 1);
this.captureSilenceGain = this.captureContext.createGain();
this.captureSilenceGain.gain.value = 0;
this.captureProcessor.onaudioprocess = (event) => {
const input = event.inputBuffer.getChannelData(0);
const downsampled = this.downsampleBuffer(input, this.captureContext.sampleRate, 16000);
for (let i = 0; i < downsampled.length; i += 1) {
this.pendingSamples.push(downsampled[i]);
}
while (this.pendingSamples.length >= 320) {
const chunk = this.pendingSamples.splice(0, 320);
const pcm = new Int16Array(chunk.length);
for (let i = 0; i < chunk.length; i += 1) {
const sample = Math.max(-1, Math.min(1, chunk[i]));
pcm[i] = sample < 0 ? sample * 32768 : sample * 32767;
}
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
this.ws.send(pcm.buffer);
}
}
};
this.captureSource.connect(this.captureProcessor);
this.captureProcessor.connect(this.captureSilenceGain);
this.captureSilenceGain.connect(this.captureContext.destination);
}
downsampleBuffer(buffer, inputRate, outputRate) {
if (outputRate >= inputRate) {
return Array.from(buffer);
}
const sampleRateRatio = inputRate / outputRate;
const newLength = Math.round(buffer.length / sampleRateRatio);
const result = new Array(newLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
let accum = 0;
let count = 0;
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i += 1) {
accum += buffer[i];
count += 1;
}
result[offsetResult] = count > 0 ? accum / count : 0;
offsetResult += 1;
offsetBuffer = nextOffsetBuffer;
}
return result;
}
async setMuted(muted) {
this.mediaStream?.getAudioTracks().forEach((track) => {
track.enabled = !muted;
});
}
async disconnect() {
if (this.captureProcessor) {
this.captureProcessor.disconnect();
this.captureProcessor.onaudioprocess = null;
this.captureProcessor = null;
}
if (this.captureSource) {
this.captureSource.disconnect();
this.captureSource = null;
}
if (this.captureSilenceGain) {
this.captureSilenceGain.disconnect();
this.captureSilenceGain = null;
}
if (this.captureContext) {
await this.captureContext.close().catch(() => {});
this.captureContext = null;
}
if (this.mediaStream) {
this.mediaStream.getTracks().forEach((track) => track.stop());
this.mediaStream = null;
}
if (this.ws) {
try {
if (this.ws.readyState === WebSocket.OPEN) {
this.ws.send(JSON.stringify({ type: 'stop' }));
this.ws.close();
}
} catch (_) {}
this.ws = null;
}
if (this.playbackContext) {
this.resetPlaybackQueue();
await this.playbackContext.close().catch(() => {});
this.playbackContext = null;
}
this.playbackTime = 0;
this.pendingSamples = [];
this.emitConnectionState('disconnected');
}
on(event, callback) {
if (event in this.callbacks) {
this.callbacks[event] = callback;
}
}
off(event) {
if (event in this.callbacks) {
this.callbacks[event] = null;
}
}
}
const nativeVoiceService = new NativeVoiceService();
export default nativeVoiceService;

View File

@@ -1,323 +0,0 @@
/**
* 火山引擎 RTC SDK 封装
* 负责 WebRTC 音频流的建立和管理
*/
class RTCService {
constructor() {
this.engine = null;
this.joined = false;
this.callbacks = {
onSubtitle: null,
onAudioStatus: null,
onConnectionStateChange: null,
onError: null,
onUserJoined: null,
onUserLeft: null,
onToolCall: null,
onRoomMessage: null,
};
}
async init(appId) {
if (this.engine) {
this.destroy();
}
try {
const VERTC = await import('@volcengine/rtc');
const createEngine = VERTC.default?.createEngine || VERTC.createEngine;
const events = VERTC.default?.events || VERTC.events;
if (!createEngine) {
throw new Error('Failed to load RTC SDK: createEngine not found');
}
this.engine = createEngine(appId);
this.events = events;
this.engine.on(events.onConnectionStateChanged, (state) => {
console.log('[RTC] Connection state:', state);
this.callbacks.onConnectionStateChange?.(state);
});
if (events.onSubtitleStateChanged) {
this.engine.on(events.onSubtitleStateChanged, (state) => {
console.log('[RTC] Subtitle state changed:', state);
});
}
if (events.onSubtitleMessageReceived) {
this.engine.on(events.onSubtitleMessageReceived, (subtitles) => {
console.log('[RTC] Subtitle received:', subtitles.length, 'items');
subtitles.forEach((sub) => {
// bot 的 userId 以 'bot_' 开头,无 userId 或 bot_ 开头都是 assistant
const isBot = !sub.userId || sub.userId.startsWith('bot_');
this.callbacks.onSubtitle?.({
text: sub.text,
role: isBot ? 'assistant' : 'user',
isFinal: sub.definite,
sequence: sub.sequence,
});
});
});
}
this.engine.on(events.onUserJoined, (info) => {
console.log('[RTC] User joined:', info.userInfo?.userId);
this.callbacks.onUserJoined?.(info);
});
this.engine.on(events.onUserLeave, (info) => {
console.log('[RTC] User left:', info.userInfo?.userId);
this.callbacks.onUserLeft?.(info);
});
this.engine.on(events.onError, (error) => {
console.error('[RTC] Error:', error);
this.callbacks.onError?.(error);
});
// === Function Calling: 监听房间消息SDK 回调参数是单个 event 对象) ===
if (events.onRoomBinaryMessageReceived) {
this.engine.on(events.onRoomBinaryMessageReceived, (event) => {
try {
const uid = event.uid || event.userId || 'unknown';
const raw = event.message;
const text = typeof raw === 'string' ? raw : new TextDecoder().decode(raw);
console.log('[RTC][FC] Room binary from', uid, ':', text.substring(0, 500));
this.callbacks.onRoomMessage?.({ uid, text });
const parsed = JSON.parse(text);
this._handleRoomMessage(uid, parsed);
} catch (e) {
console.log('[RTC][FC] Room binary (non-JSON):', e.message);
}
});
}
if (events.onRoomMessageReceived) {
this.engine.on(events.onRoomMessageReceived, (event) => {
const uid = event.uid || event.userId || 'unknown';
const msg = event.message || '';
console.log('[RTC][FC] Room text from', uid, ':', String(msg).substring(0, 500));
this.callbacks.onRoomMessage?.({ uid, text: String(msg) });
try {
const parsed = JSON.parse(msg);
this._handleRoomMessage(uid, parsed);
} catch (e) {
console.log('[RTC][FC] Room text (non-JSON):', e.message);
}
});
}
if (events.onUserBinaryMessageReceived) {
this.engine.on(events.onUserBinaryMessageReceived, (event) => {
try {
const uid = event.uid || event.userId || 'unknown';
const raw = event.message;
const text = typeof raw === 'string' ? raw : new TextDecoder().decode(raw);
console.log('[RTC][FC] User binary from', uid, ':', text.substring(0, 500));
this.callbacks.onRoomMessage?.({ uid, text });
const parsed = JSON.parse(text);
this._handleRoomMessage(uid, parsed);
} catch (e) {
console.log('[RTC][FC] User binary (non-JSON):', e.message);
}
});
}
if (events.onUserMessageReceived) {
this.engine.on(events.onUserMessageReceived, (event) => {
const uid = event.uid || event.userId || 'unknown';
const msg = event.message || '';
console.log('[RTC][FC] User text from', uid, ':', String(msg).substring(0, 500));
this.callbacks.onRoomMessage?.({ uid, text: String(msg) });
try {
const parsed = JSON.parse(msg);
this._handleRoomMessage(uid, parsed);
} catch (e) {
console.log('[RTC][FC] User text (non-JSON):', e.message);
}
});
}
// === 诊断事件 ===
if (events.onUserPublishStream) {
this.engine.on(events.onUserPublishStream, (info) => {
console.log('[RTC][DIAG] Remote user published stream:', info.userId, 'mediaType:', info.mediaType);
});
}
if (events.onUserUnpublishStream) {
this.engine.on(events.onUserUnpublishStream, (info) => {
console.log('[RTC][DIAG] Remote user unpublished stream:', info.userId, 'mediaType:', info.mediaType);
});
}
if (events.onAutoplayFailed) {
this.engine.on(events.onAutoplayFailed, (info) => {
console.error('[RTC][DIAG] ❌ Autoplay FAILED! Audio blocked by browser:', info);
});
}
if (events.onPlayerEvent) {
this.engine.on(events.onPlayerEvent, (info) => {
console.log('[RTC][DIAG] Player event:', info);
});
}
if (events.onRemoteStreamStats) {
this.engine.on(events.onRemoteStreamStats, (stats) => {
if (stats.audioRecvBytes > 0) {
console.log('[RTC][DIAG] Receiving audio from:', stats.uid, 'bytes:', stats.audioRecvBytes);
}
});
}
// 启用音频属性报告,检测是否有远端音频
try {
this.engine.enableAudioPropertiesReport?.({ interval: 3000 });
if (events.onRemoteAudioPropertiesReport) {
this.engine.on(events.onRemoteAudioPropertiesReport, (infos) => {
infos?.forEach((info) => {
if (info.audioPropertiesInfo?.linearVolume > 0) {
console.log('[RTC][DIAG] 🔊 Remote audio detected! user:', info.streamKey?.userId, 'volume:', info.audioPropertiesInfo.linearVolume);
}
});
});
}
if (events.onLocalAudioPropertiesReport) {
this.engine.on(events.onLocalAudioPropertiesReport, (infos) => {
infos?.forEach((info) => {
if (info.audioPropertiesInfo?.linearVolume > 0) {
console.log('[RTC][DIAG] 🎤 Local mic active, volume:', info.audioPropertiesInfo.linearVolume);
}
});
});
}
} catch (e) {
console.warn('[RTC][DIAG] enableAudioPropertiesReport not available:', e.message);
}
console.log('[RTC] Engine initialized with diagnostic listeners');
console.log('[RTC] Available events:', Object.keys(events).filter(k => k.startsWith('on')).join(', '));
return true;
} catch (error) {
console.error('[RTC] Init failed:', error);
throw error;
}
}
async joinRoom(roomId, userId, token) {
if (!this.engine) throw new Error('Engine not initialized');
await this.engine.joinRoom(
token,
roomId,
{ userId },
{
isAutoPublish: true,
isAutoSubscribeAudio: true,
isAutoSubscribeVideo: false,
}
);
await this.engine.startAudioCapture();
// 激活字幕接收(必须在 joinRoom 之后调用)
try {
await this.engine.startSubtitle({});
console.log('[RTC] Subtitle enabled');
} catch (e) {
console.warn('[RTC] startSubtitle failed:', e.message || e);
}
this.joined = true;
console.log(`[RTC] Joined room ${roomId} as ${userId}`);
}
async leaveRoom() {
if (!this.engine || !this.joined) return;
try {
await this.engine.stopAudioCapture();
await this.engine.leaveRoom();
this.joined = false;
console.log('[RTC] Left room');
} catch (e) {
console.warn('[RTC] Leave room error:', e);
}
}
async setMuted(muted) {
if (!this.engine) return;
if (muted) {
await this.engine.stopAudioCapture();
} else {
await this.engine.startAudioCapture();
}
}
_handleRoomMessage(uid, parsed) {
console.log('[RTC][FC] Parsed message type:', parsed.type || parsed.event || 'unknown', 'from:', uid);
// 尝试多种可能的 tool call 消息格式
let toolCalls = null;
// 格式1: { type: "function_call", data: { tool_calls: [...] } }
if (parsed.type === 'function_call' && parsed.data?.tool_calls) {
toolCalls = parsed.data.tool_calls;
}
// 格式2: { event: "function_call", tool_calls: [...] }
else if (parsed.event === 'function_call' && parsed.tool_calls) {
toolCalls = parsed.tool_calls;
}
// 格式3: { type: "conversation", data: { event: "function_call", ... } }
else if (parsed.type === 'conversation' && parsed.data?.event === 'function_call') {
toolCalls = parsed.data.tool_calls || [parsed.data];
}
// 格式4: 直接是 tool_calls 数组
else if (parsed.tool_calls) {
toolCalls = parsed.tool_calls;
}
// 格式5: 单个 function_call 对象
else if (parsed.function?.name || parsed.function_name) {
toolCalls = [parsed];
}
if (toolCalls && toolCalls.length > 0) {
console.log('[RTC][FC] ✅ Tool calls detected:', toolCalls.length);
toolCalls.forEach((tc) => {
const callId = tc.id || tc.tool_call_id || `tc_${Date.now()}`;
const funcName = tc.function?.name || tc.function_name || 'unknown';
const args = tc.function?.arguments || tc.arguments || '{}';
console.log(`[RTC][FC] Tool call: ${funcName}(${args}), id=${callId}`);
this.callbacks.onToolCall?.({ tool_call_id: callId, function_name: funcName, arguments: args });
});
} else {
console.log('[RTC][FC] Message is not a tool call, full payload:', JSON.stringify(parsed).substring(0, 300));
}
}
on(event, callback) {
if (event in this.callbacks) {
this.callbacks[event] = callback;
}
}
off(event) {
if (event in this.callbacks) {
this.callbacks[event] = null;
}
}
destroy() {
if (this.engine) {
try {
if (this.joined) {
this.engine.stopAudioCapture().catch(() => {});
this.engine.leaveRoom().catch(() => {});
}
this.engine.destroyEngine?.();
} catch (e) {
console.warn('[RTC] Destroy error:', e);
}
this.engine = null;
this.joined = false;
}
}
}
const rtcService = new RTCService();
export default rtcService;

View File

@@ -1,7 +1,27 @@
import axios from 'axios';
function resolveApiBaseURL(configured, path) {
if (configured) {
return configured;
}
if (typeof window === 'undefined') {
return path;
}
const hostname = window.location.hostname;
const port = window.location.port;
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
return `${protocol}//${hostname || '127.0.0.1'}:3012${path}`;
}
return path;
}
const voiceApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_VOICE_API_BASE_URL, '/api/voice');
const sessionApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_SESSION_API_BASE_URL, '/api/session');
const api = axios.create({
baseURL: '/api/voice',
baseURL: voiceApiBaseURL,
timeout: 10000,
});
@@ -10,64 +30,9 @@ export async function getVoiceConfig() {
return data.data;
}
export async function prepareVoiceChat(params) {
const { data } = await api.post('/prepare', params);
return data;
}
export async function startVoiceChat(params) {
const { data } = await api.post('/start', params);
return data;
}
export async function stopVoiceChat(sessionId) {
const { data } = await api.post('/stop', { sessionId });
return data;
}
export async function sendSubtitle(params) {
const { data } = await api.post('/subtitle', params);
return data;
}
export async function getSubtitles(sessionId) {
const { data } = await api.get(`/subtitles/${sessionId}`);
return data.data;
}
export async function getActiveSessions() {
const { data } = await api.get('/sessions');
return data.data;
}
export async function forwardRoomMessage({ roomId, uid, text }) {
const { data } = await api.post('/room_message', { roomId, uid, text });
return data;
}
export async function executeToolCall({ sessionId, toolCallId, functionName, arguments: args }) {
const { data } = await api.post('/tool-callback', {
sessionId,
tool_call_id: toolCallId,
function_name: functionName,
arguments: args,
});
return data;
}
export async function executeFcCallback({ roomId, taskId, type, message }) {
const { data } = await api.post('/fc_callback', {
RoomID: roomId,
TaskID: taskId,
Type: type,
Message: message,
});
return data;
}
// ========== 会话历史 API ==========
const sessionApi = axios.create({
baseURL: '/api/session',
baseURL: sessionApiBaseURL,
timeout: 10000,
});
@@ -80,3 +45,15 @@ export async function switchSessionMode(sessionId, targetMode) {
const { data } = await sessionApi.post(`/${sessionId}/switch`, { targetMode });
return data.data;
}
export async function getSessionList(userId, limit = 50) {
const params = { limit };
if (userId) params.userId = userId;
const { data } = await sessionApi.get('/list', { params });
return data.data;
}
export async function deleteSessionById(sessionId) {
const { data } = await sessionApi.delete(`/${sessionId}`);
return data;
}