feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目

This commit is contained in:
User
2026-03-13 13:06:46 +08:00
parent 9dab61345c
commit 5521b673f5
215 changed files with 7626 additions and 1876 deletions

View File

@@ -1,6 +1,60 @@
const axios = require('axios');
const arkChatService = require('./arkChatService');
class ToolExecutor {
static hasCanonicalKnowledgeTerm(query) {
return /(一成系统|PM-FitLine|PM细胞营养素|NTC营养保送系统|Activize Oxyplus|小红产品|Basics|大白产品|Restorate|小白产品|儿童倍适|火炉原理|阿育吠陀)/i.test(String(query || ''));
}
static normalizeKnowledgeQueryAlias(query) {
return String(query || '')
.replace(/^[啊哦嗯呢呀哎诶额,。!?、\s]+/g, '')
.replace(/[啊哦嗯呢呀哎诶额,。!?、\s]+$/g, '')
.replace(/X{2}系统/gi, '一成系统')
.replace(/一城系统|逸城系统|一程系统|易成系统|一诚系统|亦成系统/g, '一成系统')
.replace(/PM[-\s]*Fitline|PM[-\s]*fitline|Pm[-\s]*fitline|Fitline|fitline/g, 'PM-FitLine')
.replace(/PM细胞营养|PM营养素|德国PM营养素/g, 'PM细胞营养素')
.replace(/NTC营养保送系统|NTC营养配送系统|NTC营养输送系统|NTC营养传送系统|NTC营养传输系统/g, 'NTC营养保送系统')
.replace(/Nutrient Transport Concept/gi, 'NTC营养保送系统')
.replace(/Activize Oxyplus|Activize/gi, 'Activize Oxyplus')
.replace(/Restorate/gi, 'Restorate')
.replace(/Basics/gi, 'Basics')
.replace(/基础三合一|基础套装?|三合一基础套|大白小红小白/g, 'Basics')
.replace(/小红产品|小红/g, '小红产品 Activize Oxyplus')
.replace(/大白产品|大白/g, '大白产品 Basics')
.replace(/小白产品|小白/g, '小白产品 Restorate')
.replace(/儿童倍适|儿童产品/g, '儿童倍适')
.replace(/火炉原理/g, '火炉原理')
.replace(/阿育吠陀|Ayurveda/gi, '阿育吠陀')
.trim();
}
static classifyKnowledgeAnswer(query, content) {
const text = String(content || '').trim();
if (!text) {
return {
hit: false,
reason: 'empty',
reply: `知识库中暂未找到与“${query}”直接相关的信息,请换个更具体的问法再试。`,
};
}
const noHitPattern = /未检索到|没有检索到|没有相关内容|暂无相关内容|未找到相关信息|没有找到相关信息|知识库中没有相关内容|暂未找到与.*直接相关的信息|无法基于知识库/;
if (noHitPattern.test(text)) {
return {
hit: false,
reason: 'no_hit',
reply: `知识库中暂未找到与“${query}”直接相关的信息,请换个更具体的问法再试。`,
};
}
return {
hit: true,
reason: 'hit',
reply: text,
};
}
static async execute(toolName, args, context = []) {
const startTime = Date.now();
console.log(`[ToolExecutor] Executing: ${toolName}`, args);
@@ -79,36 +133,108 @@ class ToolExecutor {
};
}
static async searchKnowledge({ query } = {}, context = []) {
static async searchKnowledge({ query, response_mode } = {}, context = []) {
const startTime = Date.now();
query = query || '';
const responseMode = response_mode === 'snippet' ? 'snippet' : 'answer';
console.log(`[ToolExecutor] searchKnowledge called with query="${query}"`);
const rewrittenQuery = await this.rewriteKnowledgeQuery(query, context);
if (rewrittenQuery && rewrittenQuery !== query) {
console.log(`[ToolExecutor] searchKnowledge rewritten query="${rewrittenQuery}"`);
}
const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS;
if (kbIds && kbIds !== 'your_knowledge_base_dataset_id') {
try {
console.log('[ToolExecutor] Trying Ark Knowledge Search...');
const result = await this.searchArkKnowledge(query, context);
console.log(`[ToolExecutor] Ark KB search succeeded in ${Date.now() - startTime}ms`);
return result;
const result = await this.searchArkKnowledge(rewrittenQuery || query, context, responseMode);
const latencyMs = Date.now() - startTime;
console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms`);
return {
...result,
original_query: query,
rewritten_query: rewrittenQuery || query,
latency_ms: latencyMs,
};
} catch (error) {
const latencyMs = Date.now() - startTime;
console.warn('[ToolExecutor] Ark Knowledge Search failed:', error.message);
console.log('[ToolExecutor] Falling back to local Knowledge Base');
return {
query,
original_query: query,
rewritten_query: rewrittenQuery || query,
latency_ms: latencyMs,
errorType: error.code === 'ECONNABORTED' || /timeout/i.test(error.message) ? 'timeout' : 'request_failed',
error: `知识库查询失败: ${error.message}`,
source: 'ark_knowledge',
hit: false,
reason: 'error',
};
}
}
console.log('[ToolExecutor] Using local Knowledge Base (voice fast path)');
const result = this.searchLocalKnowledge(query);
console.log(`[ToolExecutor] Local KB search completed in ${Date.now() - startTime}ms`);
return result;
const latencyMs = Date.now() - startTime;
console.warn('[ToolExecutor] Ark knowledge base is not configured');
return {
query,
original_query: query,
rewritten_query: rewrittenQuery || query,
latency_ms: latencyMs,
errorType: 'not_configured',
error: '知识库未配置,请检查 VOLC_ARK_KNOWLEDGE_BASE_IDS',
source: 'ark_knowledge',
hit: false,
reason: 'not_configured',
};
}
static async rewriteKnowledgeQuery(query, context = []) {
const originalQuery = String(query || '').trim();
if (!originalQuery) {
return '';
}
const normalizedQuery = this.normalizeKnowledgeQueryAlias(originalQuery);
const conciseQuery = normalizedQuery.replace(/[,。!?、,.!?\s]+/g, '');
const recentContext = (Array.isArray(context) ? context : [])
.filter((item) => item && (item.role === 'user' || item.role === 'assistant') && String(item.content || '').trim())
.slice(-6)
.map((item) => `${item.role === 'user' ? '用户' : '助手'}${String(item.content || '').trim()}`)
.join('\n');
const isPronounFollowUp = /^(这个|那个|它|该系统|这个系统|那个系统|详细|继续|怎么|为什么|适合谁|什么意思)/.test(normalizedQuery);
if (this.hasCanonicalKnowledgeTerm(normalizedQuery) && conciseQuery.length <= 36 && !isPronounFollowUp) {
return normalizedQuery;
}
if (!process.env.VOLC_ARK_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID === 'your_ark_endpoint_id') {
return normalizedQuery;
}
try {
const result = await arkChatService.chat([
{
role: 'system',
content: '你是知识库检索词改写助手。你的任务是把用户当前问题改写成适合企业知识库检索的完整查询语句。必须处理三类问题1补全多轮对话中的省略主语2纠正语音识别错误、口语噪声和同音误写3把别名统一成知识库里的规范说法。规则不要改变用户真实意图不要回答问题只输出一行最终检索词优先保留真正的产品名、系统名、技术名。当前知识库高频规范术语包括一成系统、PM-FitLine、PM细胞营养素、NTC营养保送系统、Activize Oxyplus、小红产品、Basics、大白产品、Restorate、小白产品、儿童倍适、火炉原理、阿育吠陀。示例XX系统、一城系统、逸城系统、一程系统等都统一理解为一成系统NTC营养配送系统、NTC营养输送系统统一为NTC营养保送系统Fitline、PM fitline 统一为 PM-FitLine小红统一为小红产品 Activize Oxyplus。',
},
{
role: 'user',
content: `最近上下文:\n${recentContext || '无'}\n\n当前原始问题:${normalizedQuery}\n\n请输出最终检索词:`,
},
], []);
const rewritten = this.normalizeKnowledgeQueryAlias(String(result.content || '').replace(/^["'“”]+|["'“”]+$/g, '').trim());
return rewritten || normalizedQuery;
} catch (error) {
console.warn('[ToolExecutor] rewriteKnowledgeQuery failed:', error.message);
return normalizedQuery;
}
}
/**
* 通过方舟 Chat Completions API + knowledge_base metadata 进行知识检索
* 使用独立的 LLM 调用,专门用于知识库检索场景(如语音通话的工具回调)
*/
static async searchArkKnowledge(query, context = []) {
static async searchArkKnowledge(query, context = [], responseMode = 'answer') {
const endpointId = process.env.VOLC_ARK_ENDPOINT_ID;
const authKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID;
const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS;
@@ -126,12 +252,14 @@ class ToolExecutor {
// 提取最近 3 轮对话作为上下文(最多 6 条 user/assistant 消息)
const recentContext = context
.filter(m => m.role === 'user' || m.role === 'assistant')
.slice(-6);
.slice(responseMode === 'snippet' ? -4 : -6);
const messages = [
{
role: 'system',
content: '你是一个知识库检索助手。请根据知识库中的内容回答用户问题。如果知识库中没有相关内容,请如实说明。回答时请引用知识库来源。',
content: responseMode === 'snippet'
? '你是知识库片段提取助手。请基于知识库提取与用户问题最相关的2到4条简洁知识片段供语音系统继续组织回复。规则只输出直接相关的中文事实片段每条尽量简短不要寒暄不要解释你的任务不要写“根据知识库”如果没有相关内容请明确说未找到相关内容。'
: '你是一个知识库检索助手。请根据知识库中的内容回答用户问题。如果知识库中没有相关内容,请如实说明。回答时请引用知识库来源。',
},
...recentContext,
{
@@ -165,21 +293,24 @@ class ToolExecutor {
'Content-Type': 'application/json',
'Authorization': `Bearer ${authKey}`,
},
timeout: 15000, // 方舟知识库超时 15s减少等待防止 LLM 重试风暴)
timeout: 30000,
}
);
const choice = response.data.choices?.[0];
const content = choice?.message?.content || '未找到相关信息';
const classified = this.classifyKnowledgeAnswer(query, content);
return {
query,
results: [{
title: '方舟知识库检索结果',
content: content,
content: classified.reply,
}],
total: 1,
source: 'ark_knowledge',
hit: classified.hit,
reason: classified.reason,
};
}
@@ -209,7 +340,7 @@ class ToolExecutor {
],
stream: true,
auto_save_history: false,
}, { headers, timeout: 15000 });
}, { headers, timeout: 30000 });
const chatData = chatRes.data?.data;
if (!chatData?.id || !chatData?.conversation_id) {