feat(server): KB prompt优化、字幕修复、S2S重连、助手配置API
- assistantProfileConfig: KB answer prompt改为分层策略(严格产品信息+灵活常识补充) - nativeVoiceGateway: S2S upstream自动重连(最多50次)、event 351字幕debounce(800ms取最长文本) - toolExecutor: 确定性query改写增强、KB查询传递session上下文 - contextKeywordTracker: 支持KB话题记忆优先enrichment - contentSafeGuard: 新增品牌安全内容过滤服务 - assistantProfileService: 新增助手配置CRUD服务 - routes/assistantProfile: 新增助手配置API路由 - knowledgeKeywords: 扩展KB关键词词典 - fastAsrCorrector: ASR纠错规则更新 - tests/: KB prompt测试、保护窗口测试、Viking性能测试 - docs/: 助手配置API文档、系统提示词目录
This commit is contained in:
@@ -30,7 +30,7 @@ class ArkChatService {
|
||||
if (datasetIds.length === 0) return null;
|
||||
|
||||
const topK = parseInt(process.env.VOLC_ARK_KNOWLEDGE_TOP_K) || 3;
|
||||
const threshold = parseFloat(process.env.VOLC_ARK_KNOWLEDGE_THRESHOLD) || 0.5;
|
||||
const threshold = parseFloat(process.env.VOLC_ARK_KNOWLEDGE_THRESHOLD) || 0.4;
|
||||
|
||||
return {
|
||||
dataset_ids: datasetIds,
|
||||
|
||||
@@ -73,7 +73,7 @@ function buildKnowledgeAnswerPrompt(profileOverrides = null) {
|
||||
const personalInfoBlock = personalInfoLines.length > 0
|
||||
? ` 对于${profile.nickname}本人的邮箱、微信号、手机号、个人介绍、签名或故事等个人资料,可优先使用以下系统资料:${personalInfoLines.join(' ')}`
|
||||
: '';
|
||||
return `你是${profile.nickname}的智能助手${documentsClause}。你的回答必须严格依据知识库内容,不得补充知识库未提及的信息,不得猜测,不得编造。若知识库中没有明确答案,就直接说明知识库未提及或暂未找到相关信息。回答保持口语化、简洁、专业,200字内。${personalInfoBlock}`;
|
||||
return `你是${profile.nickname}的智能助手${documentsClause}。知识库涵盖近50款PM-FitLine产品的完整资料(成分、用法、剂量、价格、规格、搭配方案、好转反应等)及117个常见问答。回答规则:产品相关具体信息必须严格依据知识库,不得猜测或自行补充;公司背景、健康常识可适当补充。产品常有别名(小红=艾特维、大白=倍适、小白=维适多等),请注意识别。不得编造产品名或数据。PM是营养品非药物,涉及疾病建议咨询医生。若知识库无相关内容,坦诚说明并建议咨询推荐人。回答口语化、简洁,1-3句给结论,150字内。${personalInfoBlock}`;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
178
test2/server/services/assistantProfileService.js
Normal file
178
test2/server/services/assistantProfileService.js
Normal file
@@ -0,0 +1,178 @@
|
||||
const axios = require('axios');
|
||||
const {
|
||||
DEFAULT_VOICE_ASSISTANT_PROFILE,
|
||||
resolveAssistantProfile,
|
||||
} = require('./assistantProfileConfig');
|
||||
|
||||
const assistantProfileCache = new Map();
|
||||
|
||||
function getAssistantProfileApiUrl() {
|
||||
const value = String(process.env.ASSISTANT_PROFILE_API_URL || '').trim();
|
||||
return value && !value.startsWith('your_') ? value : '';
|
||||
}
|
||||
|
||||
function getAssistantProfileApiMethod() {
|
||||
const method = String(process.env.ASSISTANT_PROFILE_API_METHOD || 'GET').trim().toUpperCase();
|
||||
return method === 'POST' ? 'POST' : 'GET';
|
||||
}
|
||||
|
||||
function getAssistantProfileTimeoutMs() {
|
||||
const value = Number(process.env.ASSISTANT_PROFILE_API_TIMEOUT_MS || 5000);
|
||||
return Number.isFinite(value) && value > 0 ? value : 5000;
|
||||
}
|
||||
|
||||
function getAssistantProfileCacheTtlMs() {
|
||||
const value = Number(process.env.ASSISTANT_PROFILE_CACHE_TTL_MS || 60000);
|
||||
return Number.isFinite(value) && value >= 0 ? value : 60000;
|
||||
}
|
||||
|
||||
function getAssistantProfileCacheKey(userId = null) {
|
||||
return String(userId || 'global').trim() || 'global';
|
||||
}
|
||||
|
||||
function parseAssistantProfileHeaders() {
|
||||
const raw = String(process.env.ASSISTANT_PROFILE_API_HEADERS || '').trim();
|
||||
if (!raw) return {};
|
||||
try {
|
||||
const parsed = JSON.parse(raw);
|
||||
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
||||
return {};
|
||||
}
|
||||
return Object.fromEntries(
|
||||
Object.entries(parsed)
|
||||
.map(([key, value]) => [String(key || '').trim(), String(value || '').trim()])
|
||||
.filter(([key, value]) => key && value)
|
||||
);
|
||||
} catch (error) {
|
||||
console.warn('[AssistantProfile] parse headers failed:', error.message);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function pickAssistantProfilePayload(payload) {
|
||||
if (!payload || typeof payload !== 'object' || Array.isArray(payload)) {
|
||||
return {};
|
||||
}
|
||||
const candidates = [
|
||||
payload.assistantProfile,
|
||||
payload.profile,
|
||||
payload.data?.assistantProfile,
|
||||
payload.data?.profile,
|
||||
payload.data,
|
||||
payload,
|
||||
];
|
||||
for (const candidate of candidates) {
|
||||
if (candidate && typeof candidate === 'object' && !Array.isArray(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
function sanitizeAssistantProfilePayload(payload) {
|
||||
const source = pickAssistantProfilePayload(payload);
|
||||
return {
|
||||
documents: source.documents,
|
||||
email: source.email,
|
||||
nickname: source.nickname,
|
||||
wxl: source.wxl,
|
||||
mobile: source.mobile,
|
||||
wx_code: source.wx_code,
|
||||
intro: source.intro,
|
||||
sign: source.sign,
|
||||
story: source.story,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchRemoteAssistantProfile(userId = null) {
|
||||
const url = getAssistantProfileApiUrl();
|
||||
if (!url) {
|
||||
return {
|
||||
profile: resolveAssistantProfile(),
|
||||
source: 'default',
|
||||
cached: false,
|
||||
fetchedAt: Date.now(),
|
||||
configured: false,
|
||||
error: null,
|
||||
};
|
||||
}
|
||||
const headers = {
|
||||
Accept: 'application/json',
|
||||
...parseAssistantProfileHeaders(),
|
||||
};
|
||||
const token = String(process.env.ASSISTANT_PROFILE_API_TOKEN || '').trim();
|
||||
if (token) {
|
||||
headers.Authorization = `Bearer ${token}`;
|
||||
}
|
||||
const method = getAssistantProfileApiMethod();
|
||||
const timeout = getAssistantProfileTimeoutMs();
|
||||
const params = userId ? { userId } : undefined;
|
||||
const response = method === 'POST'
|
||||
? await axios.post(url, userId ? { userId } : {}, { headers, timeout })
|
||||
: await axios.get(url, { headers, timeout, params });
|
||||
const profile = resolveAssistantProfile(sanitizeAssistantProfilePayload(response.data));
|
||||
return {
|
||||
profile,
|
||||
source: 'remote_api',
|
||||
cached: false,
|
||||
fetchedAt: Date.now(),
|
||||
configured: true,
|
||||
error: null,
|
||||
};
|
||||
}
|
||||
|
||||
async function getAssistantProfile(options = {}) {
|
||||
const userId = String(options.userId || '').trim() || null;
|
||||
const forceRefresh = !!options.forceRefresh;
|
||||
const overrides = options.overrides && typeof options.overrides === 'object' ? options.overrides : null;
|
||||
const cacheKey = getAssistantProfileCacheKey(userId);
|
||||
const ttlMs = getAssistantProfileCacheTtlMs();
|
||||
const cached = assistantProfileCache.get(cacheKey);
|
||||
|
||||
if (!forceRefresh && cached && (Date.now() - cached.fetchedAt) <= ttlMs) {
|
||||
return {
|
||||
profile: resolveAssistantProfile({ ...cached.profile, ...(overrides || {}) }),
|
||||
source: cached.source,
|
||||
cached: true,
|
||||
fetchedAt: cached.fetchedAt,
|
||||
configured: cached.configured,
|
||||
error: null,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const fetched = await fetchRemoteAssistantProfile(userId);
|
||||
assistantProfileCache.set(cacheKey, fetched);
|
||||
return {
|
||||
profile: resolveAssistantProfile({ ...fetched.profile, ...(overrides || {}) }),
|
||||
source: fetched.source,
|
||||
cached: false,
|
||||
fetchedAt: fetched.fetchedAt,
|
||||
configured: fetched.configured,
|
||||
error: null,
|
||||
};
|
||||
} catch (error) {
|
||||
const fallback = cached?.profile || DEFAULT_VOICE_ASSISTANT_PROFILE;
|
||||
return {
|
||||
profile: resolveAssistantProfile({ ...fallback, ...(overrides || {}) }),
|
||||
source: cached ? 'cache_fallback' : 'default_fallback',
|
||||
cached: !!cached,
|
||||
fetchedAt: cached?.fetchedAt || null,
|
||||
configured: !!getAssistantProfileApiUrl(),
|
||||
error: error.message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function clearAssistantProfileCache(userId = null) {
|
||||
if (userId == null || String(userId).trim() === '') {
|
||||
assistantProfileCache.clear();
|
||||
return;
|
||||
}
|
||||
assistantProfileCache.delete(getAssistantProfileCacheKey(userId));
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getAssistantProfile,
|
||||
clearAssistantProfileCache,
|
||||
};
|
||||
161
test2/server/services/contentSafeGuard.js
Normal file
161
test2/server/services/contentSafeGuard.js
Normal file
@@ -0,0 +1,161 @@
|
||||
/**
|
||||
* 内容安全兜底模块
|
||||
* 检测 AI 返回内容中的品牌有害关键词,拦截并替换为安全回复
|
||||
*
|
||||
* 三层检测策略:
|
||||
* 1. 通用负面词(不绑定品牌名,出现即拦截)
|
||||
* 2. 品牌 + 负面组合词(品牌名 + 贬义描述)
|
||||
* 3. 负面 + 品牌组合词(贬义描述 + 品牌名)
|
||||
*/
|
||||
|
||||
// ── 品牌名匹配片段 ──
|
||||
const BRAND_NAMES = '德国PM|PM-International|PM公司|PM-FitLine|FitLine|一成系统|一成团队|大沃|PM营养素|PM健康|PM事业|PM直销|PM产品|PM';
|
||||
|
||||
// ── 第一层:通用负面关键词(单独出现即拦截) ──
|
||||
const GENERIC_HARMFUL_WORDS = [
|
||||
// 传销 & 变体
|
||||
'传销', '直销骗局', '非法直销', '变相传销', '网络传销', '精神传销',
|
||||
'传销组织', '传销模式', '传销公司', '传销骗局', '传销陷阱', '传销套路',
|
||||
// 骗局 & 诈骗
|
||||
'骗局', '骗子公司', '骗子', '骗人的', '诈骗', '行骗', '欺诈',
|
||||
'虚假宣传', '夸大宣传', '虚假广告', '消费欺诈', '商业欺诈',
|
||||
// 非法 & 违法
|
||||
'非法集资', '非法经营', '非法营销', '非法组织', '非法敛财',
|
||||
'涉嫌违法', '涉嫌传销', '疑似传销', '涉嫌欺诈', '涉嫌诈骗',
|
||||
'违法经营', '违规经营', '违规操作',
|
||||
// 不合规
|
||||
'不正规', '不合法', '不合规', '不靠谱', '不正当',
|
||||
// 经典传销术语
|
||||
'庞氏骗局', '老鼠会', '拉人头', '割韭菜', '资金盘', '层级分销',
|
||||
'金字塔骗局', '金字塔模式', '发展下线', '上线下线',
|
||||
'会员费骗局', '入门费骗局', '人头费骗局',
|
||||
// 社交媒体常见说法
|
||||
'智商税', '缴智商税', '交智商税', '收割', '被收割', '被割',
|
||||
'洗脑', '被洗脑', '洗脑术', '洗脑营销', '精神控制',
|
||||
'坑人', '坑钱', '坑货', '害人', '黑心', '黑幕',
|
||||
'暴利', '暴利产品', '天价产品', '高价低质',
|
||||
// 监管处罚
|
||||
'被查处', '被取缔', '被罚款', '被处罚', '被举报',
|
||||
'工商处罚', '市场监管处罚', '行政处罚',
|
||||
'依法处理', '依法查处', '依法取缔',
|
||||
// 否定资质
|
||||
'没有合法直销资质', '没有直销资质', '不具备直销资质',
|
||||
'没有合法资质', '没有经营资质', '无合法资质',
|
||||
'没有取得批准文号', '未取得批准文号', '没取得批准文号',
|
||||
'没有取得资质', '未取得资质', '没取得资质',
|
||||
'没有取得直销资质', '未取得直销资质',
|
||||
'没有取得牌照', '未取得牌照',
|
||||
'没有保健食品批准', '未取得保健食品',
|
||||
// 劝退 & 风险警告
|
||||
'法律风险', '资金损失', '经济损失', '血本无归',
|
||||
'不符合相关法律', '不符合法律法规', '违反法律法规',
|
||||
// 受害者视角
|
||||
'受害者', '受骗者', '上当受骗', '上当了', '被骗了',
|
||||
'维权', '退款难', '投诉无门',
|
||||
// 负面定性
|
||||
'臭名昭著', '声名狼藉', '劣迹斑斑', '口碑极差',
|
||||
'过街老鼠', '千夫所指',
|
||||
];
|
||||
|
||||
// ── 第二层:品牌名 + 负面描述组合 ──
|
||||
const BRAND_NEGATIVE_SUFFIXES = [
|
||||
'是传销', '属于传销', '涉嫌传销', '疑似传销', '就是传销',
|
||||
'是骗局', '是骗子', '是骗人的', '在骗人', '骗钱',
|
||||
'是非法的', '不合法', '不正规', '不靠谱', '不可信',
|
||||
'有问题', '有争议', '有风险', '有隐患',
|
||||
'已被查', '已被处罚', '被取缔', '被举报',
|
||||
'在割韭菜', '在收割', '在洗脑', '在坑人', '在骗人',
|
||||
'没有资质', '没有牌照', '没有直销牌照', '没有合法直销资质', '没有直销资质',
|
||||
'没取得', '没有取得', '未取得',
|
||||
'不符合法律', '不符合法规', '不符合相关法律',
|
||||
'存在法律风险', '带来法律风险', '面临法律风险',
|
||||
'存在资金损失', '带来资金损失', '带来经济损失',
|
||||
'产品不行', '产品是假的', '产品没用', '产品有害',
|
||||
'害了很多人', '坑了很多人', '骗了很多人',
|
||||
'不值得信任', '不值得加入', '不建议加入', '不要加入',
|
||||
'千万别信', '千万别买', '千万不要', '别上当',
|
||||
'是假的', '没用', '没效果', '没什么用', '不管用', '是垃圾', '垃圾产品',
|
||||
];
|
||||
|
||||
// ── 第三层:负面描述 + 品牌名组合 ──
|
||||
const NEGATIVE_BRAND_PREFIXES = [
|
||||
'传销组织', '传销公司', '传销骗局', '传销陷阱',
|
||||
'骗局', '骗子公司', '骗人的',
|
||||
'非法的', '不合法的', '不正规的', '不靠谱的',
|
||||
'有争议的', '有问题的', '有风险的',
|
||||
'坑人的', '害人的', '割韭菜的', '洗脑的',
|
||||
'千万别信', '千万不要买', '千万不要加入',
|
||||
'远离', '警惕', '小心', '当心', '注意',
|
||||
];
|
||||
|
||||
// 构建正则
|
||||
const genericPattern = GENERIC_HARMFUL_WORDS.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|');
|
||||
const brandSuffixPattern = `(?:${BRAND_NAMES}).*?(?:${BRAND_NEGATIVE_SUFFIXES.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|')})`;
|
||||
const prefixBrandPattern = `(?:${NEGATIVE_BRAND_PREFIXES.map((w) => w.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|')}).*?(?:${BRAND_NAMES})`;
|
||||
|
||||
const BRAND_HARMFUL_PATTERN = new RegExp(
|
||||
`${genericPattern}|${brandSuffixPattern}|${prefixBrandPattern}`,
|
||||
'i',
|
||||
);
|
||||
|
||||
// ── 白名单:正面合法性描述,匹配则放行 ──
|
||||
const POSITIVE_PHRASES = [
|
||||
'不是传销', '并非传销', '并不是传销', '不属于传销',
|
||||
'合法正规的直销企业', '合法正规直销企业', '合法直销公司', '合法直销企业',
|
||||
'正规直销企业', '正规直销公司', '正规持牌直销公司', '正规持牌直销企业',
|
||||
'拥有直销牌照', '持有直销牌照', '获得直销牌照',
|
||||
'邓白氏AAA\\+', '邓白氏AAA', 'AAA\\+认证', 'AAA\\+信用',
|
||||
'合法合规', '正规合法', '正规经营',
|
||||
'业务覆盖全球', '覆盖.*国家',
|
||||
'1993年成立', '成立于德国',
|
||||
];
|
||||
const positivePhrasesPattern = POSITIVE_PHRASES.join('|');
|
||||
const BRAND_POSITIVE_LEGALITY_PATTERN = new RegExp(
|
||||
`(?:${BRAND_NAMES}).*?(?:${positivePhrasesPattern})|(?:${positivePhrasesPattern}).*?(?:${BRAND_NAMES})`,
|
||||
'i',
|
||||
);
|
||||
|
||||
// 语音模式安全回复(假装没听清,让用户重新说)
|
||||
const VOICE_SAFE_REPLY = '不好意思,我刚才没有听清楚,你可以再说一遍吗?';
|
||||
|
||||
// 文字模式安全回复(正面回应品牌合法性)
|
||||
const TEXT_SAFE_REPLY = '德国PM是一家1993年成立于德国的合法直销公司,获得邓白氏AAA+认证,业务覆盖100多个国家和地区。如果你想了解更多,可以问我关于PM公司的详细介绍哦。';
|
||||
|
||||
/**
|
||||
* 检测文本是否包含品牌有害内容
|
||||
* @param {string} text - 待检测文本
|
||||
* @returns {boolean} true = 有害,需要拦截
|
||||
*/
|
||||
function isBrandHarmful(text) {
|
||||
if (!text) return false;
|
||||
const normalized = String(text).replace(/\s+/g, ' ');
|
||||
// 白名单放行:正面描述 PM 合法性的内容
|
||||
if (BRAND_POSITIVE_LEGALITY_PATTERN.test(normalized)) {
|
||||
return false;
|
||||
}
|
||||
return BRAND_HARMFUL_PATTERN.test(normalized);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取语音模式的安全回复
|
||||
*/
|
||||
function getVoiceSafeReply() {
|
||||
return VOICE_SAFE_REPLY;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文字模式的安全回复
|
||||
*/
|
||||
function getTextSafeReply() {
|
||||
return TEXT_SAFE_REPLY;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
BRAND_HARMFUL_PATTERN,
|
||||
BRAND_POSITIVE_LEGALITY_PATTERN,
|
||||
VOICE_SAFE_REPLY,
|
||||
TEXT_SAFE_REPLY,
|
||||
isBrandHarmful,
|
||||
getVoiceSafeReply,
|
||||
getTextSafeReply,
|
||||
};
|
||||
@@ -85,23 +85,31 @@ class ContextKeywordTracker {
|
||||
return data.keywords;
|
||||
}
|
||||
|
||||
enrichQueryWithContext(sessionId, query) {
|
||||
enrichQueryWithContext(sessionId, query, session = null) {
|
||||
const normalized = (query || '').trim();
|
||||
const keywords = this.getSessionKeywords(sessionId);
|
||||
|
||||
if (keywords.length === 0) {
|
||||
const isSimpleFollowUp = /^(这个|那个|它|它的|他|他的|该|这款|那款|详细|继续|怎么|为什么|适合谁|什么意思|怎么吃|怎么用|功效|成分|多少钱|哪里买|价格|副作用|正规吗|地址|电话|联系方式|区别|哪个好|规格|包装|剂型|形态|一天几次|每天几次|每日几次)/i.test(normalized);
|
||||
|
||||
if (!isSimpleFollowUp) {
|
||||
return normalized;
|
||||
}
|
||||
|
||||
const isSimpleFollowUp = /^(这个|那个|它|该|这款|那款|详细|继续|怎么|为什么|适合谁|什么意思|怎么吃|怎么用|功效|成分|多少钱|哪里买|价格|副作用|正规吗|地址|电话|联系方式|区别|哪个好)/i.test(normalized);
|
||||
|
||||
if (isSimpleFollowUp) {
|
||||
const keywordStr = keywords.slice(-3).join(' ');
|
||||
console.log(`[ContextTracker] Enriching: "${normalized}" + "${keywordStr}"`);
|
||||
return `${keywordStr} ${normalized}`;
|
||||
// 优先用session的KB话题记忆(60秒内有效)
|
||||
// 解决:聊了"一成系统"再聊"骨关节"后追问"这款怎么吃",应关联"骨关节"而非"一成系统"
|
||||
const KB_TOPIC_TTL = 60000;
|
||||
if (session?._lastKbTopic && session?._lastKbHitAt && (Date.now() - session._lastKbHitAt < KB_TOPIC_TTL)) {
|
||||
console.log(`[ContextTracker] Enriching from KB topic memory: "${normalized}" + "${session._lastKbTopic}"`);
|
||||
return `${session._lastKbTopic} ${normalized}`;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
// fallback: 原有keyword tracker逻辑
|
||||
const keywords = this.getSessionKeywords(sessionId);
|
||||
if (keywords.length === 0) {
|
||||
return normalized;
|
||||
}
|
||||
const keywordStr = keywords.slice(-3).join(' ');
|
||||
console.log(`[ContextTracker] Enriching: "${normalized}" + "${keywordStr}"`);
|
||||
return `${keywordStr} ${normalized}`;
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
|
||||
@@ -31,6 +31,14 @@ const PHRASE_MAP = {
|
||||
'一陈系统': '一成系统',
|
||||
'依成系统': '一成系统',
|
||||
'伊成系统': '一成系统',
|
||||
'益生系统': '一成系统',
|
||||
'易诚系统': '一成系统',
|
||||
'易乘系统': '一成系统',
|
||||
'一声系统': '一成系统',
|
||||
'亿生系统': '一成系统',
|
||||
'义诚系统': '一成系统',
|
||||
'忆诚系统': '一成系统',
|
||||
'以诚系统': '一成系统',
|
||||
'盛咖学院': '盛咖学愿',
|
||||
'圣咖学愿': '盛咖学愿',
|
||||
'盛卡学愿': '盛咖学愿',
|
||||
@@ -65,6 +73,8 @@ const WORD_MAP = {
|
||||
'一乘': '一成', '一承': '一成', '一丞': '一成', '一呈': '一成',
|
||||
'一澄': '一成', '一橙': '一成', '一层': '一成', '一陈': '一成',
|
||||
'依成': '一成', '伊成': '一成',
|
||||
'益生': '一成', '易诚': '一成', '义诚': '一成', '忆诚': '一成', '以诚': '一成',
|
||||
'一声': '一成', '亿生': '一成', '易乘': '一成',
|
||||
'大窝': '大沃', '大握': '大沃', '大我': '大沃', '大卧': '大沃',
|
||||
'爱众享': 'Ai众享', '艾众享': 'Ai众享', '哎众享': 'Ai众享',
|
||||
'小洪': '小红', '小宏': '小红', '小鸿': '小红',
|
||||
@@ -123,7 +133,7 @@ function correctAsrText(text) {
|
||||
result = replaceOrderedMappings(result, WORD_MAP);
|
||||
|
||||
// 激进策略:所有"X+系统"格式(非常见系统词)一律转为"一成系统"
|
||||
result = result.replace(/[一二三四五六七八九十壹贰叁肆伍陆柒捌玖拾百千万亿兆零两几单双半多少全数整这那某每各以已亦艺毅怡逸溢义忆益伊依乙翼奕弈邑佚颐译蚁屹役疫裔翊熠旖漪倚绮峄羿轶壹弋驿奕懿肄翌苡圯佾诒铱仡]{1,2}(?:成|城|程|诚|乘|承|丞|呈|澄|橙|层|陈|趁|撑|称|秤|盛|剩|胜)系统/g, '一成系统');
|
||||
result = result.replace(/[一二三四五六七八九十壹贰叁肆伍陆柒捌玖拾百千万亿兆零两几单双半多少全数整这那某每各以已亦艺毅怡逸溢义忆益伊依乙翼奕弈邑佚颐译蚁屹役疫裔翊熠旖漪倚绮峄羿轶壹弋驿奕懿肄翌苡圯佾诒铱仡易]{1,2}(?:成|城|程|诚|乘|承|丞|呈|澄|橙|层|陈|趁|撑|称|秤|盛|剩|胜|生|声)系统/g, '一成系统');
|
||||
|
||||
for (const [from, to] of Object.entries(PRODUCT_ALIAS_MAP).sort((a, b) => b[0].length - a[0].length)) {
|
||||
if (shouldExpandProductAlias(result, from)) {
|
||||
|
||||
@@ -222,6 +222,9 @@ const ROUTE_TOPIC_KEYWORDS = [
|
||||
'慈善',
|
||||
'慈善事业',
|
||||
'社会责任',
|
||||
'Rolf Sorg',
|
||||
'RolfSorg',
|
||||
'斯派尔',
|
||||
'不上市',
|
||||
'汽车奖励',
|
||||
'退休金',
|
||||
@@ -359,6 +362,8 @@ const ROUTE_TOPIC_KEYWORDS = [
|
||||
'直销还是传销',
|
||||
'合不合法',
|
||||
'正不正规',
|
||||
'正规吗',
|
||||
'合法吗',
|
||||
'层级分销',
|
||||
'非法集资',
|
||||
'拉人头',
|
||||
@@ -410,6 +415,8 @@ const ROUTE_TOPIC_KEYWORDS = [
|
||||
'区别',
|
||||
'哪个好',
|
||||
'多久见效',
|
||||
'见效',
|
||||
'多久能见效',
|
||||
'哪里买',
|
||||
'怎么买',
|
||||
'保质期',
|
||||
@@ -466,6 +473,110 @@ const ROUTE_TOPIC_KEYWORDS = [
|
||||
'搭配吃',
|
||||
'吃药',
|
||||
'药物',
|
||||
// 产品剂型/形态(用户质疑/纠正时常提及)
|
||||
'粉末',
|
||||
'粉剂',
|
||||
'粉状',
|
||||
'冲剂',
|
||||
'冲泡',
|
||||
'片剂',
|
||||
'药片',
|
||||
'胶囊',
|
||||
'软胶囊',
|
||||
'颗粒',
|
||||
'口服液',
|
||||
'膏状',
|
||||
// 质疑/纠正/确认/怀疑/复查类口语词(全覆盖)
|
||||
// 直接否定
|
||||
'不是的',
|
||||
'才不是',
|
||||
'不是不是',
|
||||
'不是这么回事',
|
||||
// 指出错误
|
||||
'搞错了',
|
||||
'说错了',
|
||||
'弄错了',
|
||||
'记错了',
|
||||
'搞混了',
|
||||
'搞反了',
|
||||
'记岔了',
|
||||
'说反了',
|
||||
'张冠李戴',
|
||||
'答非所问',
|
||||
// 说AI不对
|
||||
'不对',
|
||||
'不是这样',
|
||||
'不准确',
|
||||
'不正确',
|
||||
'有误',
|
||||
'说的不对',
|
||||
'回答有误',
|
||||
'不太对',
|
||||
'不太准',
|
||||
// 与认知矛盾
|
||||
'不一样',
|
||||
'不一致',
|
||||
'前后矛盾',
|
||||
'自相矛盾',
|
||||
// 怀疑/不信
|
||||
'不信',
|
||||
'骗人',
|
||||
'忽悠',
|
||||
'吹牛',
|
||||
'太夸张',
|
||||
'离谱',
|
||||
'扯淡',
|
||||
'瞎扯',
|
||||
// 确认/复查
|
||||
'你确定吗',
|
||||
'确定吗',
|
||||
'真的吗',
|
||||
'当真',
|
||||
'再查一下',
|
||||
'再确认一下',
|
||||
'再核实',
|
||||
'重新查',
|
||||
'核实一下',
|
||||
'查清楚',
|
||||
'搞清楚',
|
||||
// 委婉质疑
|
||||
'好像不是',
|
||||
'好像不对',
|
||||
'我觉得不对',
|
||||
'恐怕不是',
|
||||
'感觉不对',
|
||||
// 质问来源
|
||||
'谁说的',
|
||||
'谁告诉你',
|
||||
'有什么根据',
|
||||
'有什么依据',
|
||||
'有证据吗',
|
||||
'有依据吗',
|
||||
// 不可能/反问
|
||||
'怎么可能',
|
||||
'不可能',
|
||||
'不会吧',
|
||||
'不是吧',
|
||||
'开玩笑',
|
||||
'别逗了',
|
||||
'胡说',
|
||||
'瞎说',
|
||||
'乱说',
|
||||
// 纠正句式
|
||||
'到底是',
|
||||
'究竟是',
|
||||
'应该是',
|
||||
'明明是',
|
||||
'其实是',
|
||||
'本来是',
|
||||
'怎么变成',
|
||||
'不应该是',
|
||||
// 产品形态/使用方式
|
||||
'冲着喝',
|
||||
'泡着喝',
|
||||
'直接吞',
|
||||
'是喝的',
|
||||
'是吃的',
|
||||
];
|
||||
|
||||
const CANONICAL_KNOWLEDGE_TERMS = [
|
||||
@@ -896,6 +1007,7 @@ const SCIENCE_TRAINING_ROUTE_KEYWORDS = uniqueKeywords([
|
||||
const KNOWLEDGE_ROUTE_KEYWORDS = uniqueKeywords([
|
||||
...KNOWLEDGE_ENTITY_KEYWORDS,
|
||||
...ROUTE_TOPIC_KEYWORDS,
|
||||
...FAQ_ROUTE_KEYWORDS,
|
||||
]);
|
||||
|
||||
const TRACKER_KEYWORD_GROUPS = [
|
||||
|
||||
@@ -29,6 +29,7 @@ const {
|
||||
buildVoiceSystemRole,
|
||||
buildVoiceGreeting,
|
||||
} = require('./assistantProfileConfig');
|
||||
const { getAssistantProfile } = require('./assistantProfileService');
|
||||
|
||||
const sessions = new Map();
|
||||
|
||||
@@ -498,6 +499,10 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq |
|
||||
session._lastKbTopic = cleanText;
|
||||
session._lastKbHitAt = Date.now();
|
||||
}
|
||||
// 直接用KB原始回答作为字幕,不依赖S2S event 351(S2S可能拆段/改写/丢失内容)
|
||||
const ragSubtitleText = ragContent.map((item) => item.content).join(' ');
|
||||
persistAssistantSpeech(session, ragSubtitleText, { source, toolName, meta: responseMeta });
|
||||
session.lastDeliveredAssistantTurnSeq = activeTurnSeq;
|
||||
session._pendingExternalRagReply = true;
|
||||
await sendExternalRag(session, ragContent);
|
||||
session.awaitingUpstreamReply = true;
|
||||
@@ -1003,7 +1008,10 @@ function attachClientHandlers(session) {
|
||||
}
|
||||
|
||||
if (parsed.type === 'start') {
|
||||
session.userId = parsed.userId || session.userId || null;
|
||||
const remoteProfileResult = await getAssistantProfile({ userId: session.userId });
|
||||
const assistantProfile = resolveAssistantProfile({
|
||||
...(remoteProfileResult.profile || {}),
|
||||
...(session.assistantProfile || {}),
|
||||
...((parsed.assistantProfile && typeof parsed.assistantProfile === 'object') ? parsed.assistantProfile : {}),
|
||||
});
|
||||
@@ -1014,7 +1022,6 @@ function attachClientHandlers(session) {
|
||||
session.speaker = parsed.speaker || process.env.VOLC_S2S_SPEAKER_ID || 'zh_female_vv_jupiter_bigtts';
|
||||
session.modelVersion = parsed.modelVersion || 'O';
|
||||
session.greetingText = parsed.greetingText || buildVoiceGreeting(assistantProfile);
|
||||
session.userId = parsed.userId || session.userId || null;
|
||||
// 立即发送 ready,不等 upstream event 150,大幅缩短前端等待时间
|
||||
sendReady(session);
|
||||
session.upstream = createUpstreamConnection(session);
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
const axios = require('axios');
|
||||
const https = require('https');
|
||||
const arkChatService = require('./arkChatService');
|
||||
const { buildKnowledgeAnswerPrompt } = require('./assistantProfileConfig');
|
||||
const { buildKnowledgeAnswerPrompt, resolveAssistantProfile } = require('./assistantProfileConfig');
|
||||
const { getAssistantProfile } = require('./assistantProfileService');
|
||||
|
||||
// HTTP keep-alive agent:复用TCP连接,避免每次请求重新握手
|
||||
const kbHttpAgent = new https.Agent({
|
||||
@@ -43,14 +44,14 @@ const {
|
||||
SCIENCE_TRAINING_ROUTE_KEYWORDS,
|
||||
} = require('./knowledgeKeywords');
|
||||
|
||||
// KB查询缓存:相同effectiveQuery + datasetIds在TTL内直接返回缓存结果
|
||||
// KB查询缓存:相同effectiveQuery + datasetIds + userId在TTL内直接返回缓存结果
|
||||
const KB_CACHE_TTL_MS = 5 * 60 * 1000; // 5分钟 (hit结果)
|
||||
const KB_CACHE_NOHIT_TTL_MS = 2 * 60 * 1000; // 2分钟 (no-hit结果,较短TTL)
|
||||
const KB_CACHE_MAX_SIZE = 200;
|
||||
const kbQueryCache = new Map();
|
||||
|
||||
function getKbCacheKey(query, datasetIds) {
|
||||
return `${(query || '').trim()}|${(datasetIds || []).sort().join(',')}`;
|
||||
function getKbCacheKey(query, datasetIds, profileScope = 'global') {
|
||||
return `${String(profileScope || 'global').trim() || 'global'}|${(query || '').trim()}|${(datasetIds || []).sort().join(',')}`;
|
||||
}
|
||||
|
||||
function getKbCache(key) {
|
||||
@@ -698,6 +699,16 @@ class ToolExecutor {
|
||||
query = query || '';
|
||||
const responseMode = response_mode === 'snippet' ? 'snippet' : 'answer';
|
||||
const knowledgeEndpointId = process.env.VOLC_ARK_KNOWLEDGE_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID;
|
||||
const profileUserId = _session?.profileUserId || _session?.userId || null;
|
||||
const assistantProfileResult = await getAssistantProfile({ userId: profileUserId });
|
||||
const assistantProfile = resolveAssistantProfile({
|
||||
...(assistantProfileResult?.profile || {}),
|
||||
...(_session?.assistantProfile || {}),
|
||||
});
|
||||
if (_session && assistantProfileResult?.profile) {
|
||||
_session.assistantProfile = assistantProfile;
|
||||
}
|
||||
const profileScope = profileUserId || 'global';
|
||||
console.log(`[ToolExecutor] searchKnowledge called with query="${query}"`);
|
||||
|
||||
// 注意:answer 模式必须依据知识库回答,因此不再允许本地热答案直接绕过知识库。
|
||||
@@ -748,7 +759,7 @@ class ToolExecutor {
|
||||
}
|
||||
try {
|
||||
// 缓存检查:相同effectiveQuery + datasetIds命中缓存时直接返回,避免重复API调用
|
||||
const cacheKey = getKbCacheKey(effectiveQuery, kbTarget.datasetIds);
|
||||
const cacheKey = getKbCacheKey(effectiveQuery, kbTarget.datasetIds, profileScope);
|
||||
const cached = getKbCache(cacheKey);
|
||||
if (cached) {
|
||||
const latencyMs = Date.now() - startTime;
|
||||
@@ -764,7 +775,7 @@ class ToolExecutor {
|
||||
};
|
||||
}
|
||||
console.log('[ToolExecutor] Trying Ark Knowledge Search...');
|
||||
const arkResult = await this.searchArkKnowledge(effectiveQuery, context, responseMode, kbTarget.datasetIds, query, _session?.assistantProfile || null);
|
||||
const arkResult = await this.searchArkKnowledge(effectiveQuery, context, responseMode, kbTarget.datasetIds, query, assistantProfile);
|
||||
const latencyMs = Date.now() - startTime;
|
||||
console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms`);
|
||||
// 缓存所有结果(hit用5分钟TTL,no-hit用2分钟TTL),避免重复API调用
|
||||
|
||||
Reference in New Issue
Block a user