feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
import { useState, useEffect, useCallback, useRef } from 'react';
|
||||
import { Settings2, Zap, Mic, MessageSquare } from 'lucide-react';
|
||||
import { Settings2, Zap, Mic, MessageSquare, History, Plus } from 'lucide-react';
|
||||
import VoicePanel from './components/VoicePanel';
|
||||
import ChatPanel from './components/ChatPanel';
|
||||
import SettingsPanel from './components/SettingsPanel';
|
||||
import { getVoiceConfig } from './services/voiceApi';
|
||||
import SessionHistoryPanel from './components/SessionHistoryPanel';
|
||||
|
||||
export default function App() {
|
||||
const [showSettings, setShowSettings] = useState(false);
|
||||
const [showHistory, setShowHistory] = useState(false);
|
||||
const [voiceConfig, setVoiceConfig] = useState(null);
|
||||
// 'voice' | 'chat'
|
||||
const [mode, setMode] = useState('voice');
|
||||
@@ -20,6 +22,7 @@ export default function App() {
|
||||
botName: '小智',
|
||||
systemRole: '你是一个友善的智能助手,名叫小智。你擅长帮用户解答各类问题。',
|
||||
speakingStyle: '请使用温和、清晰的口吻。',
|
||||
greetingText: '你好,我是你的智能语音助手,有什么可以帮你的吗?',
|
||||
modelVersion: '1.2.1.0',
|
||||
speaker: 'zh_female_vv_jupiter_bigtts',
|
||||
enableWebSearch: false,
|
||||
@@ -51,16 +54,47 @@ export default function App() {
|
||||
setMode('voice');
|
||||
}, [currentSessionId]);
|
||||
|
||||
// 直接进入文字模式(新会话)
|
||||
// 切换到文字模式(复用已有 sessionId,没有时新建)
|
||||
const handleStartChat = useCallback(() => {
|
||||
const newSid = `chat_${Date.now().toString(36)}`;
|
||||
setCurrentSessionId(newSid);
|
||||
const sid = currentSessionId || `chat_${Date.now().toString(36)}`;
|
||||
setCurrentSessionId(sid);
|
||||
setHandoff({
|
||||
sessionId: newSid,
|
||||
sessionId: sid,
|
||||
subtitles: [],
|
||||
});
|
||||
setMode('chat');
|
||||
console.log(`[App] New chat session: ${newSid}`);
|
||||
console.log(`[App] Switch to chat, sessionId=${sid}`);
|
||||
}, [currentSessionId]);
|
||||
|
||||
// 语音会话创建时同步 sessionId 到 App 状态
|
||||
const handleSessionCreated = useCallback((sessionId) => {
|
||||
if (sessionId && sessionId !== currentSessionId) {
|
||||
setCurrentSessionId(sessionId);
|
||||
console.log(`[App] Voice session synced: ${sessionId}`);
|
||||
}
|
||||
}, [currentSessionId]);
|
||||
|
||||
// 新建会话:重置所有状态
|
||||
const handleNewSession = useCallback(() => {
|
||||
setCurrentSessionId(null);
|
||||
setHandoff(null);
|
||||
setChatMessages([]);
|
||||
setMode('voice');
|
||||
console.log('[App] New session created');
|
||||
}, []);
|
||||
|
||||
// 从历史记录中选择会话
|
||||
const handleSelectSession = useCallback((session) => {
|
||||
const sid = session.id;
|
||||
setCurrentSessionId(sid);
|
||||
setChatMessages([]);
|
||||
// 根据会话最后的模式决定打开方式,默认用文字模式查看历史
|
||||
setHandoff({
|
||||
sessionId: sid,
|
||||
subtitles: [],
|
||||
});
|
||||
setMode('chat');
|
||||
console.log(`[App] Selected session: ${sid}, mode: ${session.mode}`);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
@@ -78,7 +112,7 @@ export default function App() {
|
||||
</h1>
|
||||
<p className="text-[11px] text-slate-400 leading-tight">
|
||||
{mode === 'voice'
|
||||
? '混合编排模式 · OutputMode=1'
|
||||
? '直连 S2S 语音 · ChatTTSText'
|
||||
: handoff?.subtitles?.length > 0
|
||||
? '语音转接 · 上下文已延续'
|
||||
: '方舟 LLM · Function Calling'}
|
||||
@@ -86,6 +120,22 @@ export default function App() {
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5">
|
||||
{/* History button */}
|
||||
<button
|
||||
onClick={() => setShowHistory(true)}
|
||||
className="p-2 rounded-lg hover:bg-slate-700/50 text-slate-400 hover:text-white transition-colors mr-1"
|
||||
title="会话历史"
|
||||
>
|
||||
<History className="w-4 h-4" />
|
||||
</button>
|
||||
{/* New session button */}
|
||||
<button
|
||||
onClick={handleNewSession}
|
||||
className="p-2 rounded-lg hover:bg-slate-700/50 text-slate-400 hover:text-white transition-colors"
|
||||
title="新建会话"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
</button>
|
||||
{/* Mode toggle buttons */}
|
||||
<div className="flex items-center bg-slate-800/60 rounded-lg border border-slate-700/40 p-0.5 mr-2">
|
||||
<button
|
||||
@@ -136,23 +186,23 @@ export default function App() {
|
||||
{mode === 'voice' ? (
|
||||
<>
|
||||
{/* Voice Panel */}
|
||||
<VoicePanel settings={settings} onVoiceEnd={handleVoiceEnd} chatHistory={chatMessages} sessionId={currentSessionId} />
|
||||
<VoicePanel settings={settings} onVoiceEnd={handleVoiceEnd} chatHistory={chatMessages} sessionId={currentSessionId} onSessionCreated={handleSessionCreated} />
|
||||
|
||||
{/* Architecture Info */}
|
||||
<div className="mt-6 p-4 rounded-xl bg-slate-800/40 border border-slate-700/40">
|
||||
<h3 className="text-xs font-semibold text-slate-400 uppercase tracking-wider mb-3">方案B 混合编排架构</h3>
|
||||
<h3 className="text-xs font-semibold text-slate-400 uppercase tracking-wider mb-3">RTC 直路由语音架构</h3>
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-3 text-xs">
|
||||
<div className="p-3 rounded-lg bg-slate-700/30 border border-slate-600/30">
|
||||
<div className="text-emerald-400 font-medium mb-1">闲聊场景</div>
|
||||
<div className="text-slate-400">端到端模型直接回复 · ~300-800ms</div>
|
||||
<div className="text-emerald-400 font-medium mb-1">上行链路</div>
|
||||
<div className="text-slate-400">浏览器 RTC 麦克风 → 房间字幕/消息 → 后端前置路由</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-slate-700/30 border border-slate-600/30">
|
||||
<div className="text-amber-400 font-medium mb-1">工具调用场景</div>
|
||||
<div className="text-slate-400">LLM 决策 + Function Calling · ~1-2s</div>
|
||||
<div className="text-amber-400 font-medium mb-1">应答链路</div>
|
||||
<div className="text-slate-400">知识库/工具结果 → ExternalTextToSpeech → 语音播报</div>
|
||||
</div>
|
||||
<div className="p-3 rounded-lg bg-slate-700/30 border border-slate-600/30">
|
||||
<div className="text-violet-400 font-medium mb-1">自动切换</div>
|
||||
<div className="text-slate-400">系统自动判断走 S2S 或 LLM 分支</div>
|
||||
<div className="text-violet-400 font-medium mb-1">当前目标</div>
|
||||
<div className="text-slate-400">彻底绕开原生链纯 S2S 抢答,保证知识库结果能播报</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -161,6 +211,7 @@ export default function App() {
|
||||
/* Chat Panel */
|
||||
handoff && (
|
||||
<ChatPanel
|
||||
key={handoff.sessionId}
|
||||
sessionId={handoff.sessionId}
|
||||
voiceSubtitles={handoff.subtitles}
|
||||
settings={settings}
|
||||
@@ -170,6 +221,16 @@ export default function App() {
|
||||
)
|
||||
)}
|
||||
</main>
|
||||
|
||||
{/* Session History Sidebar */}
|
||||
{showHistory && (
|
||||
<SessionHistoryPanel
|
||||
currentSessionId={currentSessionId}
|
||||
onSelectSession={handleSelectSession}
|
||||
onNewSession={handleNewSession}
|
||||
onClose={() => setShowHistory(false)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
170
test2/client/src/components/SessionHistoryPanel.jsx
Normal file
170
test2/client/src/components/SessionHistoryPanel.jsx
Normal file
@@ -0,0 +1,170 @@
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import { X, Plus, Trash2, MessageSquare, Mic, Clock, Loader2 } from 'lucide-react';
|
||||
import { getSessionList, deleteSessionById } from '../services/voiceApi';
|
||||
|
||||
export default function SessionHistoryPanel({ currentSessionId, onSelectSession, onNewSession, onClose }) {
|
||||
const [sessions, setSessions] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [deletingId, setDeletingId] = useState(null);
|
||||
|
||||
const loadSessions = useCallback(async () => {
|
||||
setLoading(true);
|
||||
try {
|
||||
const list = await getSessionList(null, 50);
|
||||
setSessions(list || []);
|
||||
} catch (err) {
|
||||
console.warn('[SessionHistory] Load failed:', err.message);
|
||||
setSessions([]);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
loadSessions();
|
||||
}, [loadSessions]);
|
||||
|
||||
const handleDelete = async (e, sessionId) => {
|
||||
e.stopPropagation();
|
||||
if (deletingId) return;
|
||||
setDeletingId(sessionId);
|
||||
try {
|
||||
await deleteSessionById(sessionId);
|
||||
setSessions((prev) => prev.filter((s) => s.id !== sessionId));
|
||||
} catch (err) {
|
||||
console.warn('[SessionHistory] Delete failed:', err.message);
|
||||
} finally {
|
||||
setDeletingId(null);
|
||||
}
|
||||
};
|
||||
|
||||
const formatTime = (timestamp) => {
|
||||
if (!timestamp) return '';
|
||||
const date = new Date(typeof timestamp === 'number' ? timestamp : parseInt(timestamp));
|
||||
const now = new Date();
|
||||
const diffMs = now - date;
|
||||
const diffMin = Math.floor(diffMs / 60000);
|
||||
if (diffMin < 1) return '刚刚';
|
||||
if (diffMin < 60) return `${diffMin}分钟前`;
|
||||
const diffHour = Math.floor(diffMin / 60);
|
||||
if (diffHour < 24) return `${diffHour}小时前`;
|
||||
const diffDay = Math.floor(diffHour / 24);
|
||||
if (diffDay < 7) return `${diffDay}天前`;
|
||||
return `${date.getMonth() + 1}/${date.getDate()}`;
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex">
|
||||
{/* Backdrop */}
|
||||
<div className="absolute inset-0 bg-black/50 backdrop-blur-sm" onClick={onClose} />
|
||||
|
||||
{/* Sidebar */}
|
||||
<div className="relative w-80 max-w-[85vw] bg-slate-900 border-r border-slate-700/50 flex flex-col h-full shadow-2xl animate-slide-in">
|
||||
{/* Header */}
|
||||
<div className="px-4 py-3 border-b border-slate-700/40 flex items-center justify-between flex-shrink-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<Clock className="w-4 h-4 text-slate-400" />
|
||||
<h2 className="text-sm font-semibold text-white">会话历史</h2>
|
||||
</div>
|
||||
<button
|
||||
onClick={onClose}
|
||||
className="p-1.5 rounded-lg hover:bg-slate-700/50 text-slate-400 hover:text-white transition-colors"
|
||||
>
|
||||
<X className="w-4 h-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* New Session Button */}
|
||||
<div className="px-3 py-2 border-b border-slate-700/30 flex-shrink-0">
|
||||
<button
|
||||
onClick={() => {
|
||||
onNewSession();
|
||||
onClose();
|
||||
}}
|
||||
className="w-full flex items-center gap-2 px-3 py-2.5 rounded-xl bg-gradient-to-r from-violet-500/20 to-indigo-500/20 border border-violet-500/30 text-violet-300 hover:from-violet-500/30 hover:to-indigo-500/30 transition-all text-sm font-medium"
|
||||
>
|
||||
<Plus className="w-4 h-4" />
|
||||
新建会话
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* Session List */}
|
||||
<div className="flex-1 overflow-y-auto px-3 py-2 space-y-1">
|
||||
{loading ? (
|
||||
<div className="flex items-center justify-center py-12 text-slate-500">
|
||||
<Loader2 className="w-5 h-5 animate-spin mr-2" />
|
||||
<span className="text-sm">加载中...</span>
|
||||
</div>
|
||||
) : sessions.length === 0 ? (
|
||||
<div className="text-center py-12 text-slate-500 text-sm">
|
||||
暂无会话记录
|
||||
</div>
|
||||
) : (
|
||||
sessions.map((session) => {
|
||||
const isActive = session.id === currentSessionId;
|
||||
return (
|
||||
<div
|
||||
key={session.id}
|
||||
onClick={() => {
|
||||
onSelectSession(session);
|
||||
onClose();
|
||||
}}
|
||||
className={`group relative flex items-start gap-2.5 px-3 py-2.5 rounded-xl cursor-pointer transition-all ${
|
||||
isActive
|
||||
? 'bg-violet-500/15 border border-violet-500/30'
|
||||
: 'hover:bg-slate-800/60 border border-transparent'
|
||||
}`}
|
||||
>
|
||||
{/* Mode icon */}
|
||||
<div className={`w-7 h-7 rounded-full flex items-center justify-center flex-shrink-0 mt-0.5 ${
|
||||
session.mode === 'voice' ? 'bg-emerald-500/15' : 'bg-indigo-500/15'
|
||||
}`}>
|
||||
{session.mode === 'voice' ? (
|
||||
<Mic className={`w-3.5 h-3.5 ${isActive ? 'text-emerald-400' : 'text-emerald-500/70'}`} />
|
||||
) : (
|
||||
<MessageSquare className={`w-3.5 h-3.5 ${isActive ? 'text-indigo-400' : 'text-indigo-500/70'}`} />
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Content */}
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center justify-between gap-1">
|
||||
<span className={`text-xs font-medium truncate ${isActive ? 'text-violet-300' : 'text-slate-300'}`}>
|
||||
{session.lastMessage || (session.mode === 'voice' ? '语音会话' : '文字会话')}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-2 mt-0.5">
|
||||
<span className="text-[10px] text-slate-500">
|
||||
{formatTime(session.updatedAt)}
|
||||
</span>
|
||||
{session.messageCount > 0 && (
|
||||
<span className="text-[10px] text-slate-600">
|
||||
{session.messageCount} 条消息
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Delete button */}
|
||||
{!isActive && (
|
||||
<button
|
||||
onClick={(e) => handleDelete(e, session.id)}
|
||||
className="opacity-0 group-hover:opacity-100 p-1 rounded-md hover:bg-red-500/20 text-slate-600 hover:text-red-400 transition-all flex-shrink-0"
|
||||
title="删除会话"
|
||||
>
|
||||
{deletingId === session.id ? (
|
||||
<Loader2 className="w-3.5 h-3.5 animate-spin" />
|
||||
) : (
|
||||
<Trash2 className="w-3.5 h-3.5" />
|
||||
)}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -80,6 +80,16 @@ export default function SettingsPanel({ settings, onChange, voiceConfig, onClose
|
||||
className="w-full px-3 py-1.5 rounded-lg bg-slate-700/50 border border-slate-600/40 text-sm text-white placeholder-slate-500 focus:outline-none focus:border-violet-500/50"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="block text-xs text-slate-500 mb-1">欢迎语</label>
|
||||
<textarea
|
||||
value={settings.greetingText}
|
||||
onChange={(e) => update('greetingText', e.target.value)}
|
||||
rows={2}
|
||||
className="w-full px-3 py-1.5 rounded-lg bg-slate-700/50 border border-slate-600/40 text-sm text-white placeholder-slate-500 focus:outline-none focus:border-violet-500/50 resize-none"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* 音色 & 模型 */}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import { Mic, MicOff, Phone, PhoneOff, Loader2, MessageSquare } from 'lucide-react';
|
||||
import { useVoiceChat } from '../hooks/useVoiceChat';
|
||||
import { useNativeVoiceChat } from '../hooks/useNativeVoiceChat';
|
||||
import SubtitleDisplay from './SubtitleDisplay';
|
||||
|
||||
export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], sessionId: parentSessionId }) {
|
||||
export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], sessionId: parentSessionId, onSessionCreated }) {
|
||||
const {
|
||||
isActive,
|
||||
isMuted,
|
||||
@@ -15,7 +15,7 @@ export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], ses
|
||||
stop,
|
||||
toggleMute,
|
||||
clearError,
|
||||
} = useVoiceChat();
|
||||
} = useNativeVoiceChat();
|
||||
|
||||
const formatTime = (s) => {
|
||||
const m = Math.floor(s / 60);
|
||||
@@ -23,17 +23,21 @@ export default function VoicePanel({ settings, onVoiceEnd, chatHistory = [], ses
|
||||
return `${m.toString().padStart(2, '0')}:${sec.toString().padStart(2, '0')}`;
|
||||
};
|
||||
|
||||
const handleStart = () => {
|
||||
start({
|
||||
const handleStart = async () => {
|
||||
const result = await start({
|
||||
botName: settings.botName,
|
||||
systemRole: settings.systemRole,
|
||||
speakingStyle: settings.speakingStyle,
|
||||
greetingText: settings.greetingText,
|
||||
modelVersion: settings.modelVersion,
|
||||
speaker: settings.speaker,
|
||||
enableWebSearch: settings.enableWebSearch,
|
||||
chatHistory: chatHistory.length > 0 ? chatHistory.slice(-10) : undefined,
|
||||
parentSessionId,
|
||||
});
|
||||
if (result?.sessionId && onSessionCreated) {
|
||||
onSessionCreated(result.sessionId);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
|
||||
148
test2/client/src/hooks/useNativeVoiceChat.js
Normal file
148
test2/client/src/hooks/useNativeVoiceChat.js
Normal file
@@ -0,0 +1,148 @@
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import nativeVoiceService from '../services/nativeVoiceService';
|
||||
|
||||
export function useNativeVoiceChat() {
|
||||
const [isActive, setIsActive] = useState(false);
|
||||
const [isMuted, setIsMuted] = useState(false);
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [subtitles, setSubtitles] = useState([]);
|
||||
const [connectionState, setConnectionState] = useState('disconnected');
|
||||
const [error, setError] = useState(null);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const sessionRef = useRef(null);
|
||||
const timerRef = useRef(null);
|
||||
const greetingUtteranceRef = useRef(null);
|
||||
|
||||
const stopGreeting = useCallback(() => {
|
||||
if (typeof window !== 'undefined' && 'speechSynthesis' in window) {
|
||||
window.speechSynthesis.cancel();
|
||||
}
|
||||
greetingUtteranceRef.current = null;
|
||||
}, []);
|
||||
|
||||
const playGreeting = useCallback((text) => {
|
||||
const greetingText = String(text || '').trim();
|
||||
if (!greetingText || typeof window === 'undefined' || !('speechSynthesis' in window) || typeof window.SpeechSynthesisUtterance === 'undefined') {
|
||||
return;
|
||||
}
|
||||
stopGreeting();
|
||||
const utterance = new window.SpeechSynthesisUtterance(greetingText);
|
||||
utterance.lang = 'zh-CN';
|
||||
utterance.rate = 1;
|
||||
utterance.pitch = 1;
|
||||
greetingUtteranceRef.current = utterance;
|
||||
window.speechSynthesis.speak(utterance);
|
||||
}, [stopGreeting]);
|
||||
|
||||
useEffect(() => {
|
||||
nativeVoiceService.on('onSubtitle', (subtitle) => {
|
||||
setSubtitles((prev) => {
|
||||
if (subtitle.isFinal) {
|
||||
const isDup = prev.some((s) => s.isFinal && s.role === subtitle.role && s.text === subtitle.text);
|
||||
if (isDup) return prev;
|
||||
return [...prev.filter((s) => s.isFinal || s.role !== subtitle.role), subtitle];
|
||||
}
|
||||
const finals = prev.filter((s) => s.isFinal);
|
||||
return [...finals, subtitle];
|
||||
});
|
||||
});
|
||||
|
||||
nativeVoiceService.on('onConnectionStateChange', setConnectionState);
|
||||
nativeVoiceService.on('onError', (err) => setError(err?.message || 'Native voice error'));
|
||||
|
||||
return () => {
|
||||
stopGreeting();
|
||||
nativeVoiceService.disconnect();
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
};
|
||||
}, [stopGreeting]);
|
||||
|
||||
const start = useCallback(async (options = {}) => {
|
||||
setError(null);
|
||||
setIsConnecting(true);
|
||||
|
||||
try {
|
||||
const userId = `user_${Date.now().toString(36)}`;
|
||||
const sessionId = options.parentSessionId || `native_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`;
|
||||
sessionRef.current = { sessionId, userId };
|
||||
|
||||
await nativeVoiceService.connect({
|
||||
sessionId,
|
||||
userId,
|
||||
botName: options.botName,
|
||||
systemRole: options.systemRole,
|
||||
speakingStyle: options.speakingStyle,
|
||||
modelVersion: options.modelVersion,
|
||||
speaker: options.speaker,
|
||||
});
|
||||
|
||||
setIsActive(true);
|
||||
setSubtitles([]);
|
||||
setDuration(0);
|
||||
playGreeting(options.greetingText);
|
||||
timerRef.current = setInterval(() => {
|
||||
setDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
return { sessionId };
|
||||
} catch (err) {
|
||||
console.error('[useNativeVoiceChat] Start failed:', err);
|
||||
setError(err.message || 'Failed to start native voice chat');
|
||||
await nativeVoiceService.disconnect();
|
||||
sessionRef.current = null;
|
||||
return null;
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
let result = { sessionId: null, subtitles: [] };
|
||||
try {
|
||||
const localFinalSubtitles = subtitles.filter((s) => s.isFinal);
|
||||
if (sessionRef.current) {
|
||||
result = {
|
||||
sessionId: sessionRef.current.sessionId,
|
||||
subtitles: localFinalSubtitles,
|
||||
};
|
||||
sessionRef.current = null;
|
||||
}
|
||||
|
||||
await nativeVoiceService.disconnect();
|
||||
stopGreeting();
|
||||
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
|
||||
setIsActive(false);
|
||||
setIsMuted(false);
|
||||
setConnectionState('disconnected');
|
||||
} catch (err) {
|
||||
console.error('[useNativeVoiceChat] Stop failed:', err);
|
||||
}
|
||||
return result;
|
||||
}, [stopGreeting, subtitles]);
|
||||
|
||||
const toggleMute = useCallback(async () => {
|
||||
const next = !isMuted;
|
||||
await nativeVoiceService.setMuted(next);
|
||||
setIsMuted(next);
|
||||
}, [isMuted]);
|
||||
|
||||
const clearError = useCallback(() => setError(null), []);
|
||||
|
||||
return {
|
||||
isActive,
|
||||
isMuted,
|
||||
isConnecting,
|
||||
subtitles,
|
||||
connectionState,
|
||||
error,
|
||||
duration,
|
||||
start,
|
||||
stop,
|
||||
toggleMute,
|
||||
clearError,
|
||||
};
|
||||
}
|
||||
@@ -1,198 +0,0 @@
|
||||
import { useState, useCallback, useRef, useEffect } from 'react';
|
||||
import rtcService from '../services/rtcService';
|
||||
import { prepareVoiceChat, startVoiceChat, stopVoiceChat, executeToolCall, executeFcCallback, sendSubtitle, forwardRoomMessage } from '../services/voiceApi';
|
||||
|
||||
export function useVoiceChat() {
|
||||
const [isActive, setIsActive] = useState(false);
|
||||
const [isMuted, setIsMuted] = useState(false);
|
||||
const [isConnecting, setIsConnecting] = useState(false);
|
||||
const [subtitles, setSubtitles] = useState([]);
|
||||
const [connectionState, setConnectionState] = useState('disconnected');
|
||||
const [error, setError] = useState(null);
|
||||
const [duration, setDuration] = useState(0);
|
||||
const sessionRef = useRef(null);
|
||||
const timerRef = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
rtcService.on('onSubtitle', (subtitle) => {
|
||||
setSubtitles((prev) => {
|
||||
if (subtitle.isFinal) {
|
||||
return [...prev.filter((s) => s.sequence !== subtitle.sequence), subtitle];
|
||||
}
|
||||
const idx = prev.findIndex((s) => s.sequence === subtitle.sequence && !s.isFinal);
|
||||
if (idx >= 0) {
|
||||
const updated = [...prev];
|
||||
updated[idx] = subtitle;
|
||||
return updated;
|
||||
}
|
||||
return [...prev, subtitle];
|
||||
});
|
||||
|
||||
// 方案B:将用户最终字幕转发到后端,供 FC 回调时作为知识库查询
|
||||
if (subtitle.isFinal && subtitle.role === 'user' && subtitle.text) {
|
||||
const session = sessionRef.current;
|
||||
if (session) {
|
||||
sendSubtitle({
|
||||
sessionId: session.sessionId,
|
||||
roomId: session.roomId,
|
||||
text: subtitle.text,
|
||||
role: 'user',
|
||||
definite: true,
|
||||
sequence: subtitle.sequence,
|
||||
}).catch((err) => console.warn('[useVoiceChat] Send subtitle failed:', err.message));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
rtcService.on('onToolCall', async (toolCall) => {
|
||||
const session = sessionRef.current;
|
||||
if (!session) {
|
||||
console.warn('[useVoiceChat] Tool call received but no active session');
|
||||
return;
|
||||
}
|
||||
console.log(`[useVoiceChat] Tool call: ${toolCall.function_name}, session: ${session.sessionId}`);
|
||||
try {
|
||||
// 构建FC回调消息格式
|
||||
const message = JSON.stringify([{
|
||||
id: toolCall.tool_call_id,
|
||||
function: {
|
||||
name: toolCall.function_name,
|
||||
arguments: toolCall.arguments
|
||||
},
|
||||
seq: 1
|
||||
}]);
|
||||
|
||||
// 调用fc_callback端点,传递必要的参数
|
||||
const result = await executeFcCallback({
|
||||
roomId: session.roomId,
|
||||
taskId: session.taskId || session.sessionId,
|
||||
type: 'tool_calls',
|
||||
message: message
|
||||
});
|
||||
console.log('[useVoiceChat] FC callback result:', result);
|
||||
} catch (err) {
|
||||
console.error('[useVoiceChat] FC callback failed:', err);
|
||||
}
|
||||
});
|
||||
|
||||
// 方案B:转发所有 RTC 房间消息到后端(可能包含 ASR/会话状态数据)
|
||||
rtcService.on('onRoomMessage', (msg) => {
|
||||
const session = sessionRef.current;
|
||||
if (session && msg.text) {
|
||||
forwardRoomMessage({
|
||||
roomId: session.roomId,
|
||||
uid: msg.uid,
|
||||
text: msg.text,
|
||||
}).catch(() => {}); // 静默失败,不影响主流程
|
||||
}
|
||||
});
|
||||
|
||||
rtcService.on('onConnectionStateChange', setConnectionState);
|
||||
rtcService.on('onError', (err) => setError(err?.message || 'RTC error'));
|
||||
|
||||
return () => {
|
||||
rtcService.destroy();
|
||||
if (timerRef.current) clearInterval(timerRef.current);
|
||||
};
|
||||
}, []);
|
||||
|
||||
const start = useCallback(async (options = {}) => {
|
||||
setError(null);
|
||||
setIsConnecting(true);
|
||||
|
||||
try {
|
||||
const userId = `user_${Date.now().toString(36)}`;
|
||||
const { parentSessionId, ...startOptions } = options;
|
||||
|
||||
// 第一步:准备房间,获取 token
|
||||
const prepareRes = await prepareVoiceChat({ userId });
|
||||
if (!prepareRes.success) throw new Error(prepareRes.error);
|
||||
|
||||
const { sessionId, roomId, taskId, rtcToken, rtcAppId } = prepareRes.data;
|
||||
sessionRef.current = { sessionId, roomId, taskId, parentSessionId };
|
||||
|
||||
// 第二步:用户先进房
|
||||
await rtcService.init(rtcAppId);
|
||||
await rtcService.joinRoom(roomId, userId, rtcToken);
|
||||
console.log('[useVoiceChat] User joined room, now starting AI...');
|
||||
|
||||
// 第三步:用户已在房间内,启动 AI 语音对话
|
||||
const startRes = await startVoiceChat({ sessionId, ...startOptions });
|
||||
if (!startRes.success) throw new Error(startRes.error);
|
||||
|
||||
setIsActive(true);
|
||||
setSubtitles([]);
|
||||
setDuration(0);
|
||||
timerRef.current = setInterval(() => {
|
||||
setDuration((d) => d + 1);
|
||||
}, 1000);
|
||||
} catch (err) {
|
||||
console.error('[useVoiceChat] Start failed:', err);
|
||||
setError(err.message || 'Failed to start voice chat');
|
||||
rtcService.destroy();
|
||||
if (sessionRef.current) {
|
||||
stopVoiceChat(sessionRef.current.sessionId).catch(() => {});
|
||||
sessionRef.current = null;
|
||||
}
|
||||
} finally {
|
||||
setIsConnecting(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const stop = useCallback(async () => {
|
||||
let result = { sessionId: null, subtitles: [] };
|
||||
try {
|
||||
// 在离开房间前,先从前端 state 中提取已确认的字幕
|
||||
const localFinalSubtitles = subtitles.filter((s) => s.isFinal);
|
||||
|
||||
await rtcService.leaveRoom();
|
||||
|
||||
if (sessionRef.current) {
|
||||
const sid = sessionRef.current.sessionId;
|
||||
const response = await stopVoiceChat(sid);
|
||||
const backendSubtitles = response?.data?.subtitles || [];
|
||||
|
||||
// 优先使用前端本地字幕(RTC 直接接收,更完整),后端字幕作为 fallback
|
||||
result = {
|
||||
sessionId: sid,
|
||||
subtitles: localFinalSubtitles.length > 0 ? localFinalSubtitles : backendSubtitles,
|
||||
};
|
||||
sessionRef.current = null;
|
||||
}
|
||||
|
||||
if (timerRef.current) {
|
||||
clearInterval(timerRef.current);
|
||||
timerRef.current = null;
|
||||
}
|
||||
|
||||
setIsActive(false);
|
||||
setIsMuted(false);
|
||||
setConnectionState('disconnected');
|
||||
} catch (err) {
|
||||
console.error('[useVoiceChat] Stop failed:', err);
|
||||
}
|
||||
return result;
|
||||
}, [subtitles]);
|
||||
|
||||
const toggleMute = useCallback(async () => {
|
||||
const next = !isMuted;
|
||||
await rtcService.setMuted(next);
|
||||
setIsMuted(next);
|
||||
}, [isMuted]);
|
||||
|
||||
const clearError = useCallback(() => setError(null), []);
|
||||
|
||||
return {
|
||||
isActive,
|
||||
isMuted,
|
||||
isConnecting,
|
||||
subtitles,
|
||||
connectionState,
|
||||
error,
|
||||
duration,
|
||||
start,
|
||||
stop,
|
||||
toggleMute,
|
||||
clearError,
|
||||
};
|
||||
}
|
||||
@@ -17,3 +17,11 @@ body {
|
||||
background: #334155;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
@keyframes slide-in {
|
||||
from { transform: translateX(-100%); }
|
||||
to { transform: translateX(0); }
|
||||
}
|
||||
.animate-slide-in {
|
||||
animation: slide-in 0.2s ease-out;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,26 @@
|
||||
import axios from 'axios';
|
||||
|
||||
function resolveApiBaseURL(configured, path) {
|
||||
if (configured) {
|
||||
return configured;
|
||||
}
|
||||
if (typeof window === 'undefined') {
|
||||
return path;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `${protocol}//${hostname || '127.0.0.1'}:3012${path}`;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
const chatApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_CHAT_API_BASE_URL, '/api/chat');
|
||||
|
||||
const api = axios.create({
|
||||
baseURL: '/api/chat',
|
||||
baseURL: chatApiBaseURL,
|
||||
timeout: 30000,
|
||||
});
|
||||
|
||||
@@ -27,7 +46,7 @@ export function sendMessageStream(sessionId, message, { onChunk, onToolCall, onD
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
const response = await fetch('/api/chat/send-stream', {
|
||||
const response = await fetch(`${chatApiBaseURL}/send-stream`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ sessionId, message }),
|
||||
|
||||
332
test2/client/src/services/nativeVoiceService.js
Normal file
332
test2/client/src/services/nativeVoiceService.js
Normal file
@@ -0,0 +1,332 @@
|
||||
class NativeVoiceService {
|
||||
constructor() {
|
||||
this.ws = null;
|
||||
this.mediaStream = null;
|
||||
this.captureContext = null;
|
||||
this.captureSource = null;
|
||||
this.captureProcessor = null;
|
||||
this.captureSilenceGain = null;
|
||||
this.playbackContext = null;
|
||||
this.playbackTime = 0;
|
||||
this.activeSources = new Set();
|
||||
this.pendingSamples = [];
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
this.callbacks = {
|
||||
onSubtitle: null,
|
||||
onConnectionStateChange: null,
|
||||
onError: null,
|
||||
onAssistantPending: null,
|
||||
onDiagnostic: null,
|
||||
};
|
||||
}
|
||||
|
||||
resolveWebSocketUrl(sessionId, userId) {
|
||||
const query = new URLSearchParams({
|
||||
sessionId,
|
||||
userId: userId || '',
|
||||
});
|
||||
const configuredBase = import.meta.env.VITE_VOICE_WS_BASE_URL || import.meta.env.VITE_VOICE_API_BASE_URL || '';
|
||||
if (configuredBase && !configuredBase.startsWith('/')) {
|
||||
let base = configuredBase.replace(/\/$/, '');
|
||||
if (base.startsWith('https://')) {
|
||||
base = `wss://${base.slice('https://'.length)}`;
|
||||
} else if (base.startsWith('http://')) {
|
||||
base = `ws://${base.slice('http://'.length)}`;
|
||||
}
|
||||
if (base.endsWith('/api/voice')) {
|
||||
base = base.slice(0, -'/api/voice'.length);
|
||||
} else if (base.endsWith('/api')) {
|
||||
base = base.slice(0, -'/api'.length);
|
||||
}
|
||||
return `${base}/ws/realtime-dialog?${query.toString()}`;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `ws://${hostname || '127.0.0.1'}:3012/ws/realtime-dialog?${query.toString()}`;
|
||||
}
|
||||
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
||||
return `${protocol}//${window.location.host}/ws/realtime-dialog?${query.toString()}`;
|
||||
}
|
||||
|
||||
emitConnectionState(state) {
|
||||
this.callbacks.onConnectionStateChange?.(state);
|
||||
}
|
||||
|
||||
emitDiagnostic(type, payload) {
|
||||
this.callbacks.onDiagnostic?.({ type, payload, timestamp: Date.now() });
|
||||
}
|
||||
|
||||
resetPlaybackQueue() {
|
||||
this.activeSources.forEach((source) => {
|
||||
try {
|
||||
source.stop();
|
||||
} catch (_) {}
|
||||
try {
|
||||
source.disconnect();
|
||||
} catch (_) {}
|
||||
});
|
||||
this.activeSources.clear();
|
||||
if (this.playbackContext) {
|
||||
this.playbackTime = this.playbackContext.currentTime + 0.02;
|
||||
} else {
|
||||
this.playbackTime = 0;
|
||||
}
|
||||
}
|
||||
|
||||
async connect({ sessionId, userId, botName, systemRole, speakingStyle, modelVersion, speaker }) {
|
||||
await this.disconnect();
|
||||
const wsUrl = this.resolveWebSocketUrl(sessionId, userId);
|
||||
this.emitConnectionState('connecting');
|
||||
this.playbackContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
if (this.playbackContext.state === 'suspended') {
|
||||
await this.playbackContext.resume().catch(() => {});
|
||||
}
|
||||
this.playbackTime = this.playbackContext.currentTime;
|
||||
|
||||
await new Promise((resolve, reject) => {
|
||||
this.readyResolver = resolve;
|
||||
this.readyRejector = reject;
|
||||
const ws = new WebSocket(wsUrl);
|
||||
ws.binaryType = 'arraybuffer';
|
||||
this.ws = ws;
|
||||
|
||||
ws.onopen = () => {
|
||||
this.emitConnectionState('connected');
|
||||
ws.send(JSON.stringify({
|
||||
type: 'start',
|
||||
sessionId,
|
||||
userId,
|
||||
botName,
|
||||
systemRole,
|
||||
speakingStyle,
|
||||
modelVersion,
|
||||
speaker,
|
||||
}));
|
||||
};
|
||||
|
||||
ws.onerror = () => {
|
||||
const error = new Error('WebSocket connection failed');
|
||||
this.callbacks.onError?.(error);
|
||||
this.readyRejector?.(error);
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
reject(error);
|
||||
};
|
||||
|
||||
ws.onclose = () => {
|
||||
this.emitConnectionState('disconnected');
|
||||
if (this.readyRejector) {
|
||||
this.readyRejector(new Error('WebSocket closed before ready'));
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
}
|
||||
};
|
||||
|
||||
ws.onmessage = (event) => {
|
||||
if (typeof event.data === 'string') {
|
||||
this.handleJsonMessage(event.data);
|
||||
return;
|
||||
}
|
||||
this.handleAudioMessage(event.data);
|
||||
};
|
||||
});
|
||||
|
||||
await this.startCapture();
|
||||
}
|
||||
|
||||
handleJsonMessage(raw) {
|
||||
try {
|
||||
const msg = JSON.parse(raw);
|
||||
if (msg.type === 'ready') {
|
||||
this.readyResolver?.();
|
||||
this.readyResolver = null;
|
||||
this.readyRejector = null;
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'subtitle') {
|
||||
this.callbacks.onSubtitle?.({
|
||||
text: msg.text,
|
||||
role: msg.role,
|
||||
isFinal: !!msg.isFinal,
|
||||
sequence: msg.sequence,
|
||||
});
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'tts_reset') {
|
||||
this.resetPlaybackQueue();
|
||||
this.emitDiagnostic('tts_reset', msg);
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'assistant_pending') {
|
||||
this.callbacks.onAssistantPending?.(!!msg.active);
|
||||
return;
|
||||
}
|
||||
if (msg.type === 'error') {
|
||||
this.callbacks.onError?.(new Error(msg.error || 'native voice error'));
|
||||
return;
|
||||
}
|
||||
this.emitDiagnostic('ws_message', msg);
|
||||
} catch (error) {
|
||||
this.emitDiagnostic('ws_raw_text', raw);
|
||||
}
|
||||
}
|
||||
|
||||
handleAudioMessage(arrayBuffer) {
|
||||
if (!this.playbackContext) {
|
||||
return;
|
||||
}
|
||||
const pcm16 = new Int16Array(arrayBuffer);
|
||||
if (!pcm16.length) {
|
||||
return;
|
||||
}
|
||||
const audioBuffer = this.playbackContext.createBuffer(1, pcm16.length, 24000);
|
||||
const channel = audioBuffer.getChannelData(0);
|
||||
for (let i = 0; i < pcm16.length; i += 1) {
|
||||
channel[i] = pcm16[i] / 32768;
|
||||
}
|
||||
const source = this.playbackContext.createBufferSource();
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(this.playbackContext.destination);
|
||||
this.activeSources.add(source);
|
||||
source.onended = () => {
|
||||
this.activeSources.delete(source);
|
||||
try {
|
||||
source.disconnect();
|
||||
} catch (_) {}
|
||||
};
|
||||
const now = this.playbackContext.currentTime;
|
||||
if (this.playbackTime < now) {
|
||||
this.playbackTime = now + 0.02;
|
||||
}
|
||||
source.start(this.playbackTime);
|
||||
this.playbackTime += audioBuffer.duration;
|
||||
this.emitDiagnostic('audio_chunk', { samples: pcm16.length, duration: audioBuffer.duration });
|
||||
}
|
||||
|
||||
async startCapture() {
|
||||
this.mediaStream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
channelCount: 1,
|
||||
noiseSuppression: true,
|
||||
echoCancellation: true,
|
||||
autoGainControl: true,
|
||||
},
|
||||
video: false,
|
||||
});
|
||||
this.captureContext = new (window.AudioContext || window.webkitAudioContext)();
|
||||
this.captureSource = this.captureContext.createMediaStreamSource(this.mediaStream);
|
||||
this.captureProcessor = this.captureContext.createScriptProcessor(4096, 1, 1);
|
||||
this.captureSilenceGain = this.captureContext.createGain();
|
||||
this.captureSilenceGain.gain.value = 0;
|
||||
this.captureProcessor.onaudioprocess = (event) => {
|
||||
const input = event.inputBuffer.getChannelData(0);
|
||||
const downsampled = this.downsampleBuffer(input, this.captureContext.sampleRate, 16000);
|
||||
for (let i = 0; i < downsampled.length; i += 1) {
|
||||
this.pendingSamples.push(downsampled[i]);
|
||||
}
|
||||
while (this.pendingSamples.length >= 320) {
|
||||
const chunk = this.pendingSamples.splice(0, 320);
|
||||
const pcm = new Int16Array(chunk.length);
|
||||
for (let i = 0; i < chunk.length; i += 1) {
|
||||
const sample = Math.max(-1, Math.min(1, chunk[i]));
|
||||
pcm[i] = sample < 0 ? sample * 32768 : sample * 32767;
|
||||
}
|
||||
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(pcm.buffer);
|
||||
}
|
||||
}
|
||||
};
|
||||
this.captureSource.connect(this.captureProcessor);
|
||||
this.captureProcessor.connect(this.captureSilenceGain);
|
||||
this.captureSilenceGain.connect(this.captureContext.destination);
|
||||
}
|
||||
|
||||
downsampleBuffer(buffer, inputRate, outputRate) {
|
||||
if (outputRate >= inputRate) {
|
||||
return Array.from(buffer);
|
||||
}
|
||||
const sampleRateRatio = inputRate / outputRate;
|
||||
const newLength = Math.round(buffer.length / sampleRateRatio);
|
||||
const result = new Array(newLength);
|
||||
let offsetResult = 0;
|
||||
let offsetBuffer = 0;
|
||||
while (offsetResult < result.length) {
|
||||
const nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
|
||||
let accum = 0;
|
||||
let count = 0;
|
||||
for (let i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i += 1) {
|
||||
accum += buffer[i];
|
||||
count += 1;
|
||||
}
|
||||
result[offsetResult] = count > 0 ? accum / count : 0;
|
||||
offsetResult += 1;
|
||||
offsetBuffer = nextOffsetBuffer;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
async setMuted(muted) {
|
||||
this.mediaStream?.getAudioTracks().forEach((track) => {
|
||||
track.enabled = !muted;
|
||||
});
|
||||
}
|
||||
|
||||
async disconnect() {
|
||||
if (this.captureProcessor) {
|
||||
this.captureProcessor.disconnect();
|
||||
this.captureProcessor.onaudioprocess = null;
|
||||
this.captureProcessor = null;
|
||||
}
|
||||
if (this.captureSource) {
|
||||
this.captureSource.disconnect();
|
||||
this.captureSource = null;
|
||||
}
|
||||
if (this.captureSilenceGain) {
|
||||
this.captureSilenceGain.disconnect();
|
||||
this.captureSilenceGain = null;
|
||||
}
|
||||
if (this.captureContext) {
|
||||
await this.captureContext.close().catch(() => {});
|
||||
this.captureContext = null;
|
||||
}
|
||||
if (this.mediaStream) {
|
||||
this.mediaStream.getTracks().forEach((track) => track.stop());
|
||||
this.mediaStream = null;
|
||||
}
|
||||
if (this.ws) {
|
||||
try {
|
||||
if (this.ws.readyState === WebSocket.OPEN) {
|
||||
this.ws.send(JSON.stringify({ type: 'stop' }));
|
||||
this.ws.close();
|
||||
}
|
||||
} catch (_) {}
|
||||
this.ws = null;
|
||||
}
|
||||
if (this.playbackContext) {
|
||||
this.resetPlaybackQueue();
|
||||
await this.playbackContext.close().catch(() => {});
|
||||
this.playbackContext = null;
|
||||
}
|
||||
this.playbackTime = 0;
|
||||
this.pendingSamples = [];
|
||||
this.emitConnectionState('disconnected');
|
||||
}
|
||||
|
||||
on(event, callback) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = callback;
|
||||
}
|
||||
}
|
||||
|
||||
off(event) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const nativeVoiceService = new NativeVoiceService();
|
||||
export default nativeVoiceService;
|
||||
@@ -1,323 +0,0 @@
|
||||
/**
|
||||
* 火山引擎 RTC SDK 封装
|
||||
* 负责 WebRTC 音频流的建立和管理
|
||||
*/
|
||||
class RTCService {
|
||||
constructor() {
|
||||
this.engine = null;
|
||||
this.joined = false;
|
||||
this.callbacks = {
|
||||
onSubtitle: null,
|
||||
onAudioStatus: null,
|
||||
onConnectionStateChange: null,
|
||||
onError: null,
|
||||
onUserJoined: null,
|
||||
onUserLeft: null,
|
||||
onToolCall: null,
|
||||
onRoomMessage: null,
|
||||
};
|
||||
}
|
||||
|
||||
async init(appId) {
|
||||
if (this.engine) {
|
||||
this.destroy();
|
||||
}
|
||||
|
||||
try {
|
||||
const VERTC = await import('@volcengine/rtc');
|
||||
const createEngine = VERTC.default?.createEngine || VERTC.createEngine;
|
||||
const events = VERTC.default?.events || VERTC.events;
|
||||
|
||||
if (!createEngine) {
|
||||
throw new Error('Failed to load RTC SDK: createEngine not found');
|
||||
}
|
||||
|
||||
this.engine = createEngine(appId);
|
||||
this.events = events;
|
||||
|
||||
this.engine.on(events.onConnectionStateChanged, (state) => {
|
||||
console.log('[RTC] Connection state:', state);
|
||||
this.callbacks.onConnectionStateChange?.(state);
|
||||
});
|
||||
|
||||
if (events.onSubtitleStateChanged) {
|
||||
this.engine.on(events.onSubtitleStateChanged, (state) => {
|
||||
console.log('[RTC] Subtitle state changed:', state);
|
||||
});
|
||||
}
|
||||
|
||||
if (events.onSubtitleMessageReceived) {
|
||||
this.engine.on(events.onSubtitleMessageReceived, (subtitles) => {
|
||||
console.log('[RTC] Subtitle received:', subtitles.length, 'items');
|
||||
subtitles.forEach((sub) => {
|
||||
// bot 的 userId 以 'bot_' 开头,无 userId 或 bot_ 开头都是 assistant
|
||||
const isBot = !sub.userId || sub.userId.startsWith('bot_');
|
||||
this.callbacks.onSubtitle?.({
|
||||
text: sub.text,
|
||||
role: isBot ? 'assistant' : 'user',
|
||||
isFinal: sub.definite,
|
||||
sequence: sub.sequence,
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
this.engine.on(events.onUserJoined, (info) => {
|
||||
console.log('[RTC] User joined:', info.userInfo?.userId);
|
||||
this.callbacks.onUserJoined?.(info);
|
||||
});
|
||||
|
||||
this.engine.on(events.onUserLeave, (info) => {
|
||||
console.log('[RTC] User left:', info.userInfo?.userId);
|
||||
this.callbacks.onUserLeft?.(info);
|
||||
});
|
||||
|
||||
this.engine.on(events.onError, (error) => {
|
||||
console.error('[RTC] Error:', error);
|
||||
this.callbacks.onError?.(error);
|
||||
});
|
||||
|
||||
// === Function Calling: 监听房间消息(SDK 回调参数是单个 event 对象) ===
|
||||
if (events.onRoomBinaryMessageReceived) {
|
||||
this.engine.on(events.onRoomBinaryMessageReceived, (event) => {
|
||||
try {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const raw = event.message;
|
||||
const text = typeof raw === 'string' ? raw : new TextDecoder().decode(raw);
|
||||
console.log('[RTC][FC] Room binary from', uid, ':', text.substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text });
|
||||
const parsed = JSON.parse(text);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] Room binary (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (events.onRoomMessageReceived) {
|
||||
this.engine.on(events.onRoomMessageReceived, (event) => {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const msg = event.message || '';
|
||||
console.log('[RTC][FC] Room text from', uid, ':', String(msg).substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text: String(msg) });
|
||||
try {
|
||||
const parsed = JSON.parse(msg);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] Room text (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (events.onUserBinaryMessageReceived) {
|
||||
this.engine.on(events.onUserBinaryMessageReceived, (event) => {
|
||||
try {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const raw = event.message;
|
||||
const text = typeof raw === 'string' ? raw : new TextDecoder().decode(raw);
|
||||
console.log('[RTC][FC] User binary from', uid, ':', text.substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text });
|
||||
const parsed = JSON.parse(text);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] User binary (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (events.onUserMessageReceived) {
|
||||
this.engine.on(events.onUserMessageReceived, (event) => {
|
||||
const uid = event.uid || event.userId || 'unknown';
|
||||
const msg = event.message || '';
|
||||
console.log('[RTC][FC] User text from', uid, ':', String(msg).substring(0, 500));
|
||||
this.callbacks.onRoomMessage?.({ uid, text: String(msg) });
|
||||
try {
|
||||
const parsed = JSON.parse(msg);
|
||||
this._handleRoomMessage(uid, parsed);
|
||||
} catch (e) {
|
||||
console.log('[RTC][FC] User text (non-JSON):', e.message);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// === 诊断事件 ===
|
||||
if (events.onUserPublishStream) {
|
||||
this.engine.on(events.onUserPublishStream, (info) => {
|
||||
console.log('[RTC][DIAG] Remote user published stream:', info.userId, 'mediaType:', info.mediaType);
|
||||
});
|
||||
}
|
||||
if (events.onUserUnpublishStream) {
|
||||
this.engine.on(events.onUserUnpublishStream, (info) => {
|
||||
console.log('[RTC][DIAG] Remote user unpublished stream:', info.userId, 'mediaType:', info.mediaType);
|
||||
});
|
||||
}
|
||||
if (events.onAutoplayFailed) {
|
||||
this.engine.on(events.onAutoplayFailed, (info) => {
|
||||
console.error('[RTC][DIAG] ❌ Autoplay FAILED! Audio blocked by browser:', info);
|
||||
});
|
||||
}
|
||||
if (events.onPlayerEvent) {
|
||||
this.engine.on(events.onPlayerEvent, (info) => {
|
||||
console.log('[RTC][DIAG] Player event:', info);
|
||||
});
|
||||
}
|
||||
if (events.onRemoteStreamStats) {
|
||||
this.engine.on(events.onRemoteStreamStats, (stats) => {
|
||||
if (stats.audioRecvBytes > 0) {
|
||||
console.log('[RTC][DIAG] Receiving audio from:', stats.uid, 'bytes:', stats.audioRecvBytes);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 启用音频属性报告,检测是否有远端音频
|
||||
try {
|
||||
this.engine.enableAudioPropertiesReport?.({ interval: 3000 });
|
||||
if (events.onRemoteAudioPropertiesReport) {
|
||||
this.engine.on(events.onRemoteAudioPropertiesReport, (infos) => {
|
||||
infos?.forEach((info) => {
|
||||
if (info.audioPropertiesInfo?.linearVolume > 0) {
|
||||
console.log('[RTC][DIAG] 🔊 Remote audio detected! user:', info.streamKey?.userId, 'volume:', info.audioPropertiesInfo.linearVolume);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
if (events.onLocalAudioPropertiesReport) {
|
||||
this.engine.on(events.onLocalAudioPropertiesReport, (infos) => {
|
||||
infos?.forEach((info) => {
|
||||
if (info.audioPropertiesInfo?.linearVolume > 0) {
|
||||
console.log('[RTC][DIAG] 🎤 Local mic active, volume:', info.audioPropertiesInfo.linearVolume);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[RTC][DIAG] enableAudioPropertiesReport not available:', e.message);
|
||||
}
|
||||
|
||||
console.log('[RTC] Engine initialized with diagnostic listeners');
|
||||
console.log('[RTC] Available events:', Object.keys(events).filter(k => k.startsWith('on')).join(', '));
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('[RTC] Init failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async joinRoom(roomId, userId, token) {
|
||||
if (!this.engine) throw new Error('Engine not initialized');
|
||||
|
||||
await this.engine.joinRoom(
|
||||
token,
|
||||
roomId,
|
||||
{ userId },
|
||||
{
|
||||
isAutoPublish: true,
|
||||
isAutoSubscribeAudio: true,
|
||||
isAutoSubscribeVideo: false,
|
||||
}
|
||||
);
|
||||
|
||||
await this.engine.startAudioCapture();
|
||||
|
||||
// 激活字幕接收(必须在 joinRoom 之后调用)
|
||||
try {
|
||||
await this.engine.startSubtitle({});
|
||||
console.log('[RTC] Subtitle enabled');
|
||||
} catch (e) {
|
||||
console.warn('[RTC] startSubtitle failed:', e.message || e);
|
||||
}
|
||||
|
||||
this.joined = true;
|
||||
console.log(`[RTC] Joined room ${roomId} as ${userId}`);
|
||||
}
|
||||
|
||||
async leaveRoom() {
|
||||
if (!this.engine || !this.joined) return;
|
||||
try {
|
||||
await this.engine.stopAudioCapture();
|
||||
await this.engine.leaveRoom();
|
||||
this.joined = false;
|
||||
console.log('[RTC] Left room');
|
||||
} catch (e) {
|
||||
console.warn('[RTC] Leave room error:', e);
|
||||
}
|
||||
}
|
||||
|
||||
async setMuted(muted) {
|
||||
if (!this.engine) return;
|
||||
if (muted) {
|
||||
await this.engine.stopAudioCapture();
|
||||
} else {
|
||||
await this.engine.startAudioCapture();
|
||||
}
|
||||
}
|
||||
|
||||
_handleRoomMessage(uid, parsed) {
|
||||
console.log('[RTC][FC] Parsed message type:', parsed.type || parsed.event || 'unknown', 'from:', uid);
|
||||
|
||||
// 尝试多种可能的 tool call 消息格式
|
||||
let toolCalls = null;
|
||||
|
||||
// 格式1: { type: "function_call", data: { tool_calls: [...] } }
|
||||
if (parsed.type === 'function_call' && parsed.data?.tool_calls) {
|
||||
toolCalls = parsed.data.tool_calls;
|
||||
}
|
||||
// 格式2: { event: "function_call", tool_calls: [...] }
|
||||
else if (parsed.event === 'function_call' && parsed.tool_calls) {
|
||||
toolCalls = parsed.tool_calls;
|
||||
}
|
||||
// 格式3: { type: "conversation", data: { event: "function_call", ... } }
|
||||
else if (parsed.type === 'conversation' && parsed.data?.event === 'function_call') {
|
||||
toolCalls = parsed.data.tool_calls || [parsed.data];
|
||||
}
|
||||
// 格式4: 直接是 tool_calls 数组
|
||||
else if (parsed.tool_calls) {
|
||||
toolCalls = parsed.tool_calls;
|
||||
}
|
||||
// 格式5: 单个 function_call 对象
|
||||
else if (parsed.function?.name || parsed.function_name) {
|
||||
toolCalls = [parsed];
|
||||
}
|
||||
|
||||
if (toolCalls && toolCalls.length > 0) {
|
||||
console.log('[RTC][FC] ✅ Tool calls detected:', toolCalls.length);
|
||||
toolCalls.forEach((tc) => {
|
||||
const callId = tc.id || tc.tool_call_id || `tc_${Date.now()}`;
|
||||
const funcName = tc.function?.name || tc.function_name || 'unknown';
|
||||
const args = tc.function?.arguments || tc.arguments || '{}';
|
||||
console.log(`[RTC][FC] Tool call: ${funcName}(${args}), id=${callId}`);
|
||||
this.callbacks.onToolCall?.({ tool_call_id: callId, function_name: funcName, arguments: args });
|
||||
});
|
||||
} else {
|
||||
console.log('[RTC][FC] Message is not a tool call, full payload:', JSON.stringify(parsed).substring(0, 300));
|
||||
}
|
||||
}
|
||||
|
||||
on(event, callback) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = callback;
|
||||
}
|
||||
}
|
||||
|
||||
off(event) {
|
||||
if (event in this.callbacks) {
|
||||
this.callbacks[event] = null;
|
||||
}
|
||||
}
|
||||
|
||||
destroy() {
|
||||
if (this.engine) {
|
||||
try {
|
||||
if (this.joined) {
|
||||
this.engine.stopAudioCapture().catch(() => {});
|
||||
this.engine.leaveRoom().catch(() => {});
|
||||
}
|
||||
this.engine.destroyEngine?.();
|
||||
} catch (e) {
|
||||
console.warn('[RTC] Destroy error:', e);
|
||||
}
|
||||
this.engine = null;
|
||||
this.joined = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rtcService = new RTCService();
|
||||
export default rtcService;
|
||||
@@ -1,7 +1,27 @@
|
||||
import axios from 'axios';
|
||||
|
||||
function resolveApiBaseURL(configured, path) {
|
||||
if (configured) {
|
||||
return configured;
|
||||
}
|
||||
if (typeof window === 'undefined') {
|
||||
return path;
|
||||
}
|
||||
const hostname = window.location.hostname;
|
||||
const port = window.location.port;
|
||||
const protocol = window.location.protocol === 'https:' ? 'https:' : 'http:';
|
||||
const isLocalHost = hostname === 'localhost' || hostname === '127.0.0.1';
|
||||
if ((window.location.protocol === 'file:' || isLocalHost) && port !== '3012') {
|
||||
return `${protocol}//${hostname || '127.0.0.1'}:3012${path}`;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
|
||||
const voiceApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_VOICE_API_BASE_URL, '/api/voice');
|
||||
const sessionApiBaseURL = resolveApiBaseURL(import.meta.env.VITE_SESSION_API_BASE_URL, '/api/session');
|
||||
|
||||
const api = axios.create({
|
||||
baseURL: '/api/voice',
|
||||
baseURL: voiceApiBaseURL,
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
@@ -10,64 +30,9 @@ export async function getVoiceConfig() {
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function prepareVoiceChat(params) {
|
||||
const { data } = await api.post('/prepare', params);
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function startVoiceChat(params) {
|
||||
const { data } = await api.post('/start', params);
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function stopVoiceChat(sessionId) {
|
||||
const { data } = await api.post('/stop', { sessionId });
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function sendSubtitle(params) {
|
||||
const { data } = await api.post('/subtitle', params);
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function getSubtitles(sessionId) {
|
||||
const { data } = await api.get(`/subtitles/${sessionId}`);
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function getActiveSessions() {
|
||||
const { data } = await api.get('/sessions');
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function forwardRoomMessage({ roomId, uid, text }) {
|
||||
const { data } = await api.post('/room_message', { roomId, uid, text });
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function executeToolCall({ sessionId, toolCallId, functionName, arguments: args }) {
|
||||
const { data } = await api.post('/tool-callback', {
|
||||
sessionId,
|
||||
tool_call_id: toolCallId,
|
||||
function_name: functionName,
|
||||
arguments: args,
|
||||
});
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function executeFcCallback({ roomId, taskId, type, message }) {
|
||||
const { data } = await api.post('/fc_callback', {
|
||||
RoomID: roomId,
|
||||
TaskID: taskId,
|
||||
Type: type,
|
||||
Message: message,
|
||||
});
|
||||
return data;
|
||||
}
|
||||
|
||||
// ========== 会话历史 API ==========
|
||||
const sessionApi = axios.create({
|
||||
baseURL: '/api/session',
|
||||
baseURL: sessionApiBaseURL,
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
@@ -80,3 +45,15 @@ export async function switchSessionMode(sessionId, targetMode) {
|
||||
const { data } = await sessionApi.post(`/${sessionId}/switch`, { targetMode });
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function getSessionList(userId, limit = 50) {
|
||||
const params = { limit };
|
||||
if (userId) params.userId = userId;
|
||||
const { data } = await sessionApi.get('/list', { params });
|
||||
return data.data;
|
||||
}
|
||||
|
||||
export async function deleteSessionById(sessionId) {
|
||||
const { data } = await sessionApi.delete(`/${sessionId}`);
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ import { defineConfig } from 'vite';
|
||||
import react from '@vitejs/plugin-react';
|
||||
import tailwindcss from '@tailwindcss/vite';
|
||||
|
||||
const backendTarget = 'http://localhost:3012';
|
||||
|
||||
export default defineConfig({
|
||||
plugins: [react(), tailwindcss()],
|
||||
build: {
|
||||
@@ -12,9 +14,14 @@ export default defineConfig({
|
||||
port: 5174,
|
||||
proxy: {
|
||||
'/api': {
|
||||
target: 'http://localhost:3012',
|
||||
target: backendTarget,
|
||||
changeOrigin: true,
|
||||
},
|
||||
'/ws': {
|
||||
target: backendTarget,
|
||||
changeOrigin: true,
|
||||
ws: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user