Files
bigwo/test2/server/services/realtimeDialogRouting.js

397 lines
20 KiB
JavaScript
Raw Normal View History

const ToolExecutor = require('./toolExecutor');
const arkChatService = require('./arkChatService');
const db = require('../db');
function normalizeTextForSpeech(text) {
return (text || '')
.replace(/^#{1,6}\s*/gm, '')
.replace(/\*\*([^*]*)\*\*/g, '$1')
.replace(/__([^_]*)__/g, '$1')
.replace(/\*([^*]+)\*/g, '$1')
.replace(/_([^_]+)_/g, '$1')
.replace(/~~([^~]*)~~/g, '$1')
.replace(/`{1,3}[^`]*`{1,3}/g, '')
.replace(/^[-*]{3,}\s*$/gm, '')
.replace(/^>\s*/gm, '')
.replace(/!\[[^\]]*\]\([^)]*\)/g, '')
.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1')
.replace(/^[\s]*[-*+]\s+/gm, ' ')
.replace(/^[\s]*\d+[.)]\s+/gm, ' ')
.replace(/---\s*来源[:]/g, '来源:')
.replace(/\r/g, ' ')
.replace(/\n{2,}/g, '。')
.replace(/\n/g, ' ')
.replace(/。{2,}/g, '。')
.replace(/([])\1+/g, '$1')
.replace(/([。!?;,])\s*([。!?;,])/g, '$2')
.replace(/\s+/g, ' ')
.trim();
}
function splitTextForSpeech(text, maxLen = 180) {
const content = normalizeTextForSpeech(text);
if (!content) return [];
if (content.length <= maxLen) return [content];
const chunks = [];
let remaining = content;
while (remaining.length > maxLen) {
const currentMaxLen = chunks.length === 0 ? Math.min(90, maxLen) : maxLen;
let splitIndex = Math.max(
remaining.lastIndexOf('。', currentMaxLen),
remaining.lastIndexOf('', currentMaxLen),
remaining.lastIndexOf('', currentMaxLen),
remaining.lastIndexOf('', currentMaxLen),
remaining.lastIndexOf('', currentMaxLen),
remaining.lastIndexOf(',', currentMaxLen)
);
if (splitIndex < Math.floor(currentMaxLen / 2)) {
splitIndex = currentMaxLen;
} else {
splitIndex += 1;
}
chunks.push(remaining.slice(0, splitIndex).trim());
remaining = remaining.slice(splitIndex).trim();
}
if (remaining) chunks.push(remaining);
return chunks.filter(Boolean);
}
function estimateSpeechDurationMs(text) {
const plainText = normalizeTextForSpeech(text).replace(/\s+/g, '');
const length = plainText.length;
return Math.max(4000, Math.min(60000, length * 180));
}
async function polishForSpeech(rawText, userQuestion) {
const POLISH_TIMEOUT_MS = 3000;
try {
const messages = [
{
role: 'system',
content: '你是一个语音播报润色助手。请将下面的知识库回答改写为自然、亲切的口语风格像朋友聊天一样。要求1) 保留所有关键信息和数据不得编造2) 去掉"根据知识库信息"等机械前缀3) 适合语音朗读简洁流畅4) 控制在120字以内5) 只输出改写后的文本,不要加引号或解释。',
},
{
role: 'user',
content: `用户问题:${userQuestion}\n\n原始回答:${rawText}`,
},
];
const result = await Promise.race([
arkChatService.chat(messages, [], { useKnowledgeBase: false }),
new Promise((_, reject) => setTimeout(() => reject(new Error('polish timeout')), POLISH_TIMEOUT_MS)),
]);
const polished = (result?.content || '').trim();
if (polished && polished.length >= 10) {
console.log(`[RealtimeRouting] polishForSpeech ok len=${polished.length} original=${rawText.length}`);
return polished;
}
} catch (err) {
console.warn('[RealtimeRouting] polishForSpeech failed:', err.message);
}
return null;
}
function buildDirectRouteMessages(session, context, userText) {
const messages = [];
const systemPrompt = [
'你是语音前置路由器,只负责判断当前用户问题应该走哪条链路。',
'你必须只输出一个 JSON 对象,不要输出解释、代码块或额外文本。',
'允许的 route 只有chat、search_knowledge、query_weather、query_order、get_current_time、calculate。',
'规则如下:',
'1. 企业产品、功能、政策、售后、专业说明、品牌官方信息 -> search_knowledge。',
'2. 天气 -> query_weather。',
'3. 订单状态 -> query_order。',
'4. 当前时间、日期、星期 -> get_current_time。',
'5. 明确的数学计算 -> calculate。',
'6. 闲聊、问候、开放式泛化交流 -> chat。',
'输出格式示例:{"route":"chat","args":{},"reply":""}',
'如果 route=search_knowledgeargs 中必须包含 query。',
'如果 route=query_weatherargs 中必须包含 city。',
'如果 route=query_orderargs 中必须包含 order_id。',
'如果 route=calculateargs 中必须包含 expression。',
`当前助手设定:${session.systemRole || '你是一个友善的智能助手。'} ${session.speakingStyle || '请使用温和、清晰的口吻。'}`,
].join('\n');
messages.push({ role: 'system', content: systemPrompt });
(context || []).slice(-6).forEach((item) => {
if (item && item.role && item.content) {
messages.push({ role: item.role, content: item.content });
}
});
messages.push({ role: 'user', content: userText });
return messages;
}
function buildDirectChatMessages(session, context, userText) {
const messages = [];
const systemPrompt = [
session.systemRole || '你是一个友善的智能助手。',
session.speakingStyle || '请使用温和、清晰的口吻。',
'这是语音对话场景,请直接给出自然、完整、适合朗读的中文回复。',
'如果不是基于知识库或工具结果,就不要冒充官方结论。',
].join('\n');
messages.push({ role: 'system', content: systemPrompt });
(context || []).slice(-10).forEach((item) => {
if (item && item.role && item.content) {
messages.push({ role: item.role, content: item.content });
}
});
messages.push({ role: 'user', content: userText });
return messages;
}
function normalizeKnowledgeAlias(text) {
return String(text || '')
.replace(/X{2}系统/gi, '一成系统')
.replace(/一城系统|逸城系统|一程系统|易成系统|一诚系统|亦成系统|艺成系统|溢成系统|义成系统|毅成系统|怡成系统|以成系统|已成系统|亿成系统|忆成系统|益成系统/g, '一成系统')
.replace(/(?<![一\u4e00-\u9fff])(一城|逸城|一程|易成|一诚|亦成|艺成|溢成|义成|毅成|怡成|以成|已成|亿成|忆成|益成)(?=系统)/g, '一成')
.replace(/大窝|大握|大我|大卧/g, '大沃')
.replace(/盛咖学院|圣咖学愿|盛咖学院|圣咖学院|盛卡学愿/g, '盛咖学愿')
.replace(/AI众享|Ai众享|爱众享|艾众享|哎众享/gi, 'Ai众享')
.replace(/暖炉原理/g, '火炉原理');
}
function hasKnowledgeKeyword(text) {
const normalized = normalizeKnowledgeAlias(text);
return /(一成系统|Ai众享|AI众享|数字化工作室|盛咖学愿|四大AI生态|四大Ai生态|三大平台|PM公司|德国PM|PM-FitLine|FitLine|PM细胞营养素|细胞营养素|小红|大白|小白|Activize|Basics|Restorate|儿童倍适|NTC|营养保送|火炉原理|暖炉原理|阿育吠陀|Ayurveda|基础三合一|三合一|基础套装|基础二合一|二合一|招商合作|招商|代理|加盟|事业机会|邀约话术|起步三关|精品会议|成长上总裁|AI落地|ai落地|转观念|好转反应|整应反应|排毒反应|副作用|不良反应|皮肤发痒|促销活动|促销|优惠|活动分数|5\+1|CC套装|CC胶囊|IB5|口腔免疫喷雾|Q10|辅酵素|Women\+|乐活|乳清蛋白|蛋白粉|乳酪煲|乳酪饮品|乳酪|倍力健|关节套装|关节舒缓|男士乳霜|去角质|面膜|发宝|叶黄素|奶昔|健康饮品|传销|骗局|骗子|正规吗|合法吗|正不正规|合不合法|是不是传销|直销还是传销|层级分销|非法集资|拉人头|下线|发展下线|报单|人头费|怎么吃|怎么服用|吃多少|服用方法|搭配|功效|成分|原料)/i.test(normalized);
}
function isKnowledgeFollowUp(text) {
const normalized = String(text || '').trim().replace(/[,。!??~\s]+$/g, '').replace(/^(那你|那再|那|你再|再来|再|麻烦你|帮我)[,、\s]*/g, '');
if (!normalized) return false;
if (/^(详细|详细说说|详细查一下|展开说说|继续说|继续讲|介绍一下|给我介绍一下|详细介绍一下|继续介绍一下|怎么用|怎么操作|怎么配置|适合谁|有什么区别|费用多少|价格多少|怎么申请|怎么开通|是什么|什么意思|地址在哪|公司地址在哪|电话多少|公司电话多少|联系方式|公司联系方式|具体政策|具体内容|怎么吃|功效是什么|有什么功效|成分是什么|有什么成分|多少钱|哪里买|怎么买|配方|原理是什么|有什么好处|怎么服用|适合什么人)$/.test(normalized)) {
return true;
}
return /^(这个|那个|它|该系统|这个系统|那个系统|这个功能|那个功能|这个产品|那个产品|这个公司|那家公司|这个政策|那个政策|这个培训|那个培训)(的)?(详细|详细说说|详细查一下|展开说说|继续说|继续讲|介绍一下|给我介绍一下|详细介绍一下|继续介绍一下|怎么用|怎么操作|怎么配置|适合谁|有什么区别|费用多少|价格多少|怎么申请|怎么开通|是什么|什么意思|地址在哪|公司地址在哪|电话多少|公司电话多少|联系方式|公司联系方式|具体政策|具体内容|怎么吃|功效是什么|有什么功效|成分是什么|有什么成分|多少钱|哪里买|怎么买|配方|原理是什么|有什么好处|怎么服用|适合什么人)?$/.test(normalized);
}
function shouldForceKnowledgeRoute(userText, context = []) {
const text = (userText || '').trim();
if (!text) return false;
if (hasKnowledgeKeyword(text)) return true;
if (!isKnowledgeFollowUp(text)) return false;
const recentContextText = (Array.isArray(context) ? context : [])
.slice(-6)
.map((item) => String(item?.content || '').trim())
.join('\n');
return hasKnowledgeKeyword(recentContextText);
}
function withHandoffSummary(session, context) {
const summary = String(session?.handoffSummary || '').trim();
if (!summary || session?.handoffSummaryUsed) {
return context;
}
return [
{ role: 'assistant', content: `会话交接摘要:${summary}` },
...(Array.isArray(context) ? context : []),
];
}
function parseDirectRouteDecision(content, userText) {
const raw = (content || '').trim();
const jsonText = raw.replace(/^```json\s*/i, '').replace(/^```\s*/i, '').replace(/```$/i, '').trim();
const start = jsonText.indexOf('{');
const end = jsonText.lastIndexOf('}');
const candidate = start >= 0 && end > start ? jsonText.slice(start, end + 1) : jsonText;
try {
const parsed = JSON.parse(candidate);
const route = parsed.route;
const args = parsed.args && typeof parsed.args === 'object' ? parsed.args : {};
if (route === 'chat') return { route: 'chat', args: {} };
if (route === 'search_knowledge') return { route: 'search_knowledge', args: { query: args.query || userText } };
if (route === 'query_weather' && args.city) return { route: 'query_weather', args: { city: args.city } };
if (route === 'query_order' && args.order_id) return { route: 'query_order', args: { order_id: args.order_id } };
if (route === 'get_current_time') return { route: 'get_current_time', args: {} };
if (route === 'calculate' && args.expression) return { route: 'calculate', args: { expression: args.expression } };
} catch (error) {
console.warn('[NativeVoice] route JSON parse failed:', error.message, 'raw=', raw);
}
return { route: 'search_knowledge', args: { query: userText } };
}
function getRuleBasedDirectRouteDecision(userText) {
const text = (userText || '').trim();
if (!text) return { route: 'chat', args: {} };
if (/(几点|几号|日期|星期|周几|现在时间|当前时间)/.test(text)) return { route: 'get_current_time', args: {} };
if (/(天气|气温|下雨|晴天|阴天|温度)/.test(text)) {
return { route: 'query_weather', args: { city: text.replace(/.*?(北京|上海|广州|深圳|杭州|成都|重庆|武汉|西安|南京|苏州|天津|长沙|郑州|青岛|宁波|无锡)/, '$1') || '北京' } };
}
if (/(订单|物流|快递|单号)/.test(text)) return { route: 'query_order', args: { order_id: text } };
if (/^[\d\s+\-*/().=%]+$/.test(text) || /(等于多少|帮我算|计算一下|算一下)/.test(text)) {
return { route: 'calculate', args: { expression: text.replace(/(帮我算|计算一下|算一下|等于多少)/g, '').trim() || text } };
}
if (/(传销|骗局|骗子|正规吗|合法吗|正不正规|合不合法|是不是传销|直销还是传销|层级分销|非法集资|拉人头|下线|发展下线|报单|人头费)/.test(text)) {
return { route: 'search_knowledge', args: { query: text } };
}
if (/^(喂|你好|您好|嗨|哈喽|hello|hi|在吗|在不在|早上好|中午好|下午好|晚上好|早安|晚安|谢谢|感谢|再见|拜拜|嗯|哦|好的|对|是的|没有了|没事了|可以了|行|OK|ok)[,!。??~\s]*[啊呀吧呢哦嗯嘛哈的了]*[!。??~]*$/.test(text)) {
return { route: 'chat', args: {} };
}
if (/^(喂[,\s]*)?(你好|您好)[,!。??\s]*(在吗|请问)?[!。??]*$/.test(text)) {
return { route: 'chat', args: {} };
}
return { route: 'chat', args: {} };
}
function extractToolResultText(toolName, toolResult) {
if (!toolResult) return '';
if (toolName === 'search_knowledge') {
if (toolResult.errorType === 'timeout') {
return '知识库查询超时了,请稍后重试,或换一种更具体的问法再试。';
}
if (toolResult.errorType === 'not_configured') {
return '知识库当前未配置完成,请先检查知识库配置。';
}
if (toolResult.errorType === 'endpoint_not_configured') {
return '知识库已配置但方舟LLM端点未就绪暂时无法检索请稍后再试。';
}
if (toolResult.results && Array.isArray(toolResult.results)) {
return toolResult.results.map((item) => item.content || JSON.stringify(item)).join('\n');
}
if (typeof toolResult === 'string') return toolResult;
if (toolResult.error) return toolResult.error;
}
if (toolName === 'query_weather' && !toolResult.error) return `${toolResult.city}今天${toolResult.weather},气温${toolResult.temp},湿度${toolResult.humidity}${toolResult.wind}${toolResult.tips || ''}`.trim();
if (toolName === 'query_order' && !toolResult.error) return `订单${toolResult.order_id}当前状态是${toolResult.status},预计送达时间${toolResult.estimated_delivery},快递单号${toolResult.tracking_number}`;
if (toolName === 'get_current_time' && !toolResult.error) return `现在是${toolResult.datetime}${toolResult.weekday}`;
if (toolName === 'calculate' && !toolResult.error) return `${toolResult.expression} 的计算结果是 ${toolResult.formatted}`;
if (toolResult.error) return toolResult.error;
return typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult);
}
async function resolveReply(sessionId, session, text) {
const recentMessages = await db.getRecentMessages(sessionId, 20).catch(() => []);
const scopedMessages = session?.handoffSummaryUsed
? recentMessages.filter((item) => !/^chat_/i.test(String(item?.source || '')))
: recentMessages;
const baseContext = scopedMessages
.filter((item) => item && (item.role === 'user' || item.role === 'assistant'))
.map((item) => ({ role: item.role, content: item.content }));
const context = withHandoffSummary(session, baseContext);
const originalText = text.trim();
let routeDecision = getRuleBasedDirectRouteDecision(text.trim());
if (routeDecision.route === 'chat' && shouldForceKnowledgeRoute(text.trim(), context)) {
routeDecision = { route: 'search_knowledge', args: { query: text.trim() } };
}
let replyText = '';
let source = 'voice_bot';
let toolName = null;
let responseMeta = {
route: routeDecision.route,
original_text: originalText,
};
if (routeDecision.route === 'chat') {
session.handoffSummaryUsed = true;
return {
delivery: 'upstream_chat',
speechText: '',
ragItems: [],
source,
toolName,
routeDecision,
responseMeta,
};
} else {
toolName = routeDecision.route;
source = 'voice_tool';
const toolArgs = toolName === 'search_knowledge'
? { ...(routeDecision.args || {}), response_mode: 'answer' }
: routeDecision.args;
const toolResult = await ToolExecutor.execute(routeDecision.route, toolArgs, context);
replyText = extractToolResultText(toolName, toolResult);
responseMeta = {
...responseMeta,
tool_name: toolName,
tool_args: toolArgs || {},
source: toolResult?.source || null,
original_query: toolResult?.original_query || routeDecision.args?.query || originalText,
rewritten_query: toolResult?.rewritten_query || null,
selected_dataset_ids: toolResult?.selected_dataset_ids || null,
selected_kb_routes: toolResult?.selected_kb_routes || null,
hit: typeof toolResult?.hit === 'boolean' ? toolResult.hit : null,
reason: toolResult?.reason || null,
error_type: toolResult?.errorType || null,
latency_ms: toolResult?.latency_ms || null,
};
const ragItems = toolName === 'search_knowledge'
? (toolResult?.hit && Array.isArray(toolResult?.results)
? toolResult.results
.filter((item) => item && item.content)
.map((item) => ({
title: item.title || '知识库结果',
content: item.content,
}))
: [])
: (!toolResult?.error && replyText
? [{ title: `${toolName}结果`, content: replyText }]
: []);
if (ragItems.length > 0) {
let speechText = normalizeTextForSpeech(replyText);
session.handoffSummaryUsed = true;
if (toolName === 'search_knowledge' && speechText) {
const cleanedText = speechText.replace(/^(根据知识库信息[,:\s]*|根据.*?[,]\s*)/i, '');
return {
delivery: 'external_rag',
speechText: '',
ragItems: [{ title: '知识库结果', content: cleanedText || speechText }],
source,
toolName,
routeDecision,
responseMeta,
};
}
return {
delivery: 'external_rag',
speechText: '',
ragItems,
source,
toolName,
routeDecision,
responseMeta,
};
}
if (toolName === 'search_knowledge' && !toolResult?.hit) {
session.handoffSummaryUsed = true;
// 敏感问题(传销/正规性知识库未命中时不交给S2S自由发挥直接返回安全回复
if (/(传销|骗局|骗子|正规吗|合法吗|正不正规|合不合法|是不是传销|直销还是传销|层级分销|非法集资|拉人头|下线|发展下线|报单|人头费)/.test(originalText)) {
const safeReply = '德国PM是一家1993年成立于德国的合法直销公司获得邓白氏AAA+认证业务覆盖100多个国家和地区。它不是传销是正规的直销企业哦。如果你想了解更多可以问我关于PM公司或产品的详细介绍。';
return {
delivery: 'external_rag',
speechText: '',
ragItems: [{ title: '品牌保护', content: safeReply }],
source: 'voice_tool',
toolName: 'search_knowledge',
routeDecision,
responseMeta: { ...responseMeta, hit: true, reason: 'brand_protection' },
};
}
return {
delivery: 'upstream_chat',
speechText: '',
ragItems: [],
source: 'voice_bot',
toolName: null,
routeDecision,
responseMeta,
};
}
}
const speechText = normalizeTextForSpeech(replyText);
session.handoffSummaryUsed = true;
if (!speechText) {
return { delivery: 'local_tts', speechText: '', ragItems: [], source, toolName, routeDecision, responseMeta };
}
return { delivery: 'local_tts', speechText, ragItems: [], source, toolName, routeDecision, responseMeta };
}
module.exports = {
getRuleBasedDirectRouteDecision,
normalizeKnowledgeAlias,
normalizeTextForSpeech,
splitTextForSpeech,
estimateSpeechDurationMs,
shouldForceKnowledgeRoute,
resolveReply,
};