feat(server): KB prompt优化、字幕修复、S2S重连、助手配置API

- assistantProfileConfig: KB answer prompt改为分层策略(严格产品信息+灵活常识补充)
- nativeVoiceGateway: S2S upstream自动重连(最多50次)、event 351字幕debounce(800ms取最长文本)
- toolExecutor: 确定性query改写增强、KB查询传递session上下文
- contextKeywordTracker: 支持KB话题记忆优先enrichment
- contentSafeGuard: 新增品牌安全内容过滤服务
- assistantProfileService: 新增助手配置CRUD服务
- routes/assistantProfile: 新增助手配置API路由
- knowledgeKeywords: 扩展KB关键词词典
- fastAsrCorrector: ASR纠错规则更新
- tests/: KB prompt测试、保护窗口测试、Viking性能测试
- docs/: 助手配置API文档、系统提示词目录
This commit is contained in:
User
2026-03-24 17:19:36 +08:00
parent 57a03677a9
commit 9567eb7358
34 changed files with 7076 additions and 46 deletions

View File

@@ -0,0 +1,576 @@
/**
* KB保护窗口 + 质疑检测 + query去噪 + 话题记忆 深度扩展测试
* 与 test_kb_protection.js 互补,覆盖更多边界、组合、时序场景
*
* 运行方式: node --test tests/test_kb_protection_extended.js
*/
const { describe, it } = require('node:test');
const assert = require('node:assert/strict');
const { shouldForceKnowledgeRoute, normalizeKnowledgeAlias } = require('../services/realtimeDialogRouting');
const { hasKnowledgeRouteKeyword } = require('../services/knowledgeKeywords');
const contextKeywordTracker = require('../services/contextKeywordTracker');
let ToolExecutor;
try { ToolExecutor = require('../services/toolExecutor'); } catch (e) { ToolExecutor = null; }
const kbCtx = [
{ role: 'user', content: '基础三合一怎么吃' },
{ role: 'assistant', content: '大白早上空腹1平勺温水冲服小红中午1平勺小白睡前1平勺...' },
];
// ================================================================
// 1. shouldForceKnowledgeRoute — 组合质疑+产品名
// ================================================================
describe('组合质疑+产品名 —— 质疑词嵌入具体产品场景', () => {
const combos = [
'大白不是这样吃的',
'小红功效你搞错了吧',
'CC套装明明是乳霜',
'基础三合一不是冲剂',
'Q10你说的不对',
'D-Drink不是这么用的',
'一成系统跟我了解的不一样',
'火炉原理好像不是这么说的',
'IB5不可能是这个功效吧',
'小白Restorate我记得不是这样',
'儿童倍适应该是胶囊不是粉末',
'Hair+你再查查',
'NTC你确定是这个原理吗',
'邓白氏谁说的AAA+',
'关节套装真的有这个功效吗',
'TopShape太夸张了吧',
'ProShape氨基酸骗人的吧',
'叶黄素我不信有这个作用',
'乳清蛋白说的有问题',
'运动饮料不是这个成分',
];
for (const text of combos) {
it(`"${text}" → 应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 2. shouldForceKnowledgeRoute — 带标点/语气词的质疑变体
// ================================================================
describe('带标点/语气词的质疑变体', () => {
const variants = [
'不对吧?',
'不对不对不对!',
'你搞错了吧!!',
'说错了,,,',
'我不信!真的假的?',
'骗人的吧……',
'太夸张了~',
'离谱啊!',
'扯淡吧??',
'怎么可能???',
'不可能!不是吧!',
'好像不对哦~',
'你再查查?',
'核实一下嘛。',
'真的吗?真的吗?',
'谁说的啊?',
'有什么根据呢?',
'到底是什么啊!',
'应该是胶囊呀~',
'明明是粉末嘛!',
];
for (const text of variants) {
it(`"${text}" → 应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 3. shouldForceKnowledgeRoute — 前缀剥离后的质疑
// ================================================================
describe('前缀剥离 —— 带"那/那你/你再/再"前缀的质疑', () => {
const prefixed = [
'你再看看这个对不对',
'帮我再确认一下',
'你再看看吧',
'再来说说',
'麻烦你核实一下',
'帮我确认一下',
'那你确定吗',
'那再确认一下',
'那不对吧',
'那你搞错了',
'那我记得不是这样',
'再帮我查查',
'那再给我介绍一下',
'那详细说说',
'你再展开说说',
'那怎么吃',
'再讲讲功效是什么',
];
for (const text of prefixed) {
it(`"${text}" → 应识别为KB追问`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 4. shouldForceKnowledgeRoute — 长句中嵌入质疑
// ================================================================
describe('长句中嵌入质疑 —— 质疑词不在句首', () => {
const longSentences = [
'我刚才听你说的跟我了解的不一样',
'你之前的回答好像有误吧',
'按照我之前看到的资料应该是胶囊',
'怎么跟我之前在网上搜的不一致',
'别人告诉我是粉末的来着',
'但是我觉得你说的不太对',
'我看了很多资料你确定吗',
'感觉你说的和我了解的有出入',
'以前有人跟我说是冲着喝的',
'但是网上说法跟你不一样',
'我一直以为不是这样的',
'到底是什么意思啊',
'这些信息可靠吗有根据吗',
'我朋友说你讲的不对',
'这跟官方说的不一致吧',
];
for (const text of longSentences) {
it(`"${text}" → 应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 5. shouldForceKnowledgeRoute — 纯追问模式subject+action
// ================================================================
describe('subject+action追问模式 —— 带上下文', () => {
const subjectActions = [
'这个怎么吃',
'那个功效是什么',
'它适合谁',
'这个产品多少钱',
'那个产品哪里买',
'这个怎么用',
'那个怎么操作',
'这个系统怎么配置',
'这个产品成分是什么',
'那个产品有什么功效',
'它怎么服用',
'这个有什么好处',
'那个配方',
'这个原理是什么',
'这个产品适合什么人',
'那个产品怎么买',
];
for (const text of subjectActions) {
it(`"${text}" → 有上下文应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB with context`);
});
}
});
// ================================================================
// 6. shouldForceKnowledgeRoute — 否定用例扩展
// ================================================================
describe('否定用例扩展 —— 不应走KB的各类闲聊', () => {
const chitchat = [
'你好',
'嗨',
'谢谢',
'再见',
'好的',
'嗯嗯',
'哈哈哈',
'拜拜',
'没事了',
'不用了',
'可以了',
'行',
'知道了',
'明白了',
'了解了',
'好吧',
'算了',
'今天天气好',
'你是谁',
'你叫什么名字',
'你是机器人吗',
'讲个笑话',
'唱首歌',
'几点了',
'我饿了',
'晚安',
'早上好',
'下午好',
'辛苦了',
'厉害',
];
for (const text of chitchat) {
it(`"${text}" → 不应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text), false, `"${text}" should NOT route KB`);
});
}
});
// ================================================================
// 7. hasKnowledgeRouteKeyword — 系统性全类别关键词覆盖
// ================================================================
describe('hasKnowledgeRouteKeyword — 产品名关键词系统覆盖', () => {
const productKeywords = [
'大白', '小红', '小白', '基础三合一', 'Basics', 'Activize', 'Restorate',
'儿童倍适', 'CC套装', 'CC-Cell', 'Q10', 'IB5', 'D-Drink',
'Hair+', 'ProShape氨基酸', 'Herbal Tea', 'TopShape', 'Men Face',
'MEN+', '乐活', '草本茶', '叶黄素', '葡萄籽', '益生菌',
'胶原蛋白', '关节套装', '乳清蛋白', '运动饮料', '苹果细胞抗氧素',
];
for (const kw of productKeywords) {
it(`产品"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — FAQ/科学关键词系统覆盖', () => {
const faqKeywords = [
'怎么吃', '功效', '成分', '多少钱', '价格', '适合谁',
'副作用', '多久见效', '见效', '好转反应', '是不是传销',
'传销', '是不是传销', '保质期', '哪里买', '怎么买',
'NTC', '火炉原理', '阿育吠陀', '细胞营养素',
'正规吗', '合法吗', '贵不贵', '不舒服',
];
for (const kw of faqKeywords) {
it(`FAQ"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — 事业/培训关键词系统覆盖', () => {
const bizKeywords = [
'招商', '代理', '加盟', '事业机会', '创业', '起步三关',
'精品会议', '成长上总裁', '做PM', '加入PM', 'PM事业',
];
for (const kw of bizKeywords) {
it(`事业"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — 质疑类关键词系统覆盖', () => {
const challengeKeywords = [
'搞错了', '说错了', '弄错了', '不对', '不准确', '有误',
'确定吗', '真的吗', '不可能', '胡说', '骗人', '离谱',
'核实一下', '再查查', '粉末', '胶囊', '片剂', '冲剂',
'口服液', '软胶囊', '颗粒', '膏状', '到底是', '应该是',
'明明是', '不信', '吹牛', '扯淡', '有依据吗', '谁说的',
];
for (const kw of challengeKeywords) {
it(`质疑"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — 不应命中的普通词汇', () => {
const noMatch = [
'你好', '天气', '笑话', '唱歌', '吃饭', '睡觉',
'电影', '音乐', '游戏', '旅游', '工作', '学习',
'开心', '难过', '累', '饿', '渴', '无聊',
];
for (const kw of noMatch) {
it(`闲聊"${kw}" → 不应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), false, `"${kw}" should NOT match`);
});
}
});
// ================================================================
// 8. sanitizeRewrittenQuery — 深度去噪测试
// ================================================================
describe('sanitizeRewrittenQuery — 深度去噪截断', () => {
const skip = !ToolExecutor || !ToolExecutor.sanitizeRewrittenQuery;
const fillerCases = [
['骨关节啊嗯呢产品', '骨关节', '去除嗯啊呢'],
['那个就是说这个呢功效是什么', '功效', '去除口语填充'],
['骨关节哦嗯额功效', '骨关节', '去除多个语气词'],
['基础三合一呀怎么吃呀', '基础三合一', '去除呀'],
['嗯嗯那个小红功效', '小红', '去除嗯嗯那个'],
];
for (const [input, expectContain, label] of fillerCases) {
it(`${label}: "${input}" → 含"${expectContain}"`, { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery(input);
assert.ok(result.includes(expectContain), `Got "${result}"`);
});
}
it('多次重复去重: "小红 小红 小红 功效"', { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery('小红 小红 小红 功效');
const count = (result.match(/小红/g) || []).length;
assert.ok(count <= 2, `Should dedupe, got "${result}" (${count} occurrences)`);
});
it('去除连续空格', { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery('骨关节 产品 功效');
assert.ok(!/ /.test(result), `Should remove multi-spaces, got "${result}"`);
});
const truncCases = [
''.padEnd(100, '德国PM细胞营养素基础套装大白小红小白'),
'这是一段超长的查询' + '关于产品的详细信息'.repeat(10),
];
for (let i = 0; i < truncCases.length; i++) {
it(`超长截断 case ${i + 1}`, { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery(truncCases[i]);
assert.ok(result.length <= 80, `Should truncate, got len=${result.length}`);
});
}
it('特殊字符不崩溃', { skip }, () => {
const specials = ['骨关节\n产品', '基础三合一\t怎么吃', '小红\r\n功效'];
for (const s of specials) {
const result = ToolExecutor.sanitizeRewrittenQuery(s);
assert.equal(typeof result, 'string');
}
});
it('已干净的query不被破坏', { skip }, () => {
const clean = '德国PM基础三合一 大白 小红 小白 怎么吃';
const result = ToolExecutor.sanitizeRewrittenQuery(clean);
assert.ok(result.includes('基础三合一'), `Core preserved: got "${result}"`);
assert.ok(result.includes('怎么吃'), `Action preserved: got "${result}"`);
});
it('全标点输入', { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery(',,,。。。!!!');
assert.equal(typeof result, 'string');
});
});
// ================================================================
// 9. enrichQueryWithContext — 多场景深度测试
// ================================================================
describe('enrichQueryWithContext — 多场景深度', () => {
const sid = 'test_enrich_ext_' + Date.now();
it('新session空关键词 → 返回原始query', () => {
const result = contextKeywordTracker.enrichQueryWithContext('empty_sid_' + Date.now(), '怎么吃', null);
assert.ok(result.includes('怎么吃'), `Should return original: got "${result}"`);
});
it('有关键词+追问 → 关键词注入', () => {
const s = 'enrich_inject_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效详细介绍');
const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', null);
assert.ok(result.includes('怎么吃'), `Should include query: got "${result}"`);
});
it('非追问query → 不注入关键词', () => {
const s = 'enrich_noinject_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效');
const result = contextKeywordTracker.enrichQueryWithContext(s, '德国PM公司在哪里', null);
assert.equal(result, '德国PM公司在哪里', `Non-follow-up should not inject: got "${result}"`);
});
it('KB话题记忆优先于keyword tracker', () => {
const s = 'enrich_priority_' + Date.now();
contextKeywordTracker.updateSession(s, '一成系统三大平台');
const session = { _lastKbTopic: 'CC套装功效', _lastKbHitAt: Date.now() };
const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', session);
assert.ok(result.includes('CC'), `Should use KB topic: got "${result}"`);
});
it('KB话题过期(>60s) → 降级到keyword tracker', () => {
const s = 'enrich_expired_' + Date.now();
contextKeywordTracker.updateSession(s, '一成系统详细');
const session = { _lastKbTopic: 'CC套装功效', _lastKbHitAt: Date.now() - 90000 };
const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', session);
assert.ok(!result.includes('CC'), `Should NOT use expired KB topic: got "${result}"`);
});
it('多轮更新后取最近关键词', () => {
const s = 'enrich_multi_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效');
contextKeywordTracker.updateSession(s, '小红Activize怎么吃');
contextKeywordTracker.updateSession(s, 'Q10辅酵素作用');
const result = contextKeywordTracker.enrichQueryWithContext(s, '多少钱', null);
assert.ok(result.includes('多少钱'), `Should include query: got "${result}"`);
});
it('各种追问前缀都能触发enrichment', () => {
const s = 'enrich_prefixes_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效');
const followUps = ['怎么吃', '功效是什么', '多少钱', '适合谁', '成分是什么', '哪里买', '副作用', '什么意思', '怎么用', '他的规格是什么', '它的包装是什么', '这款是什么剂型', '那个是什么形态', '一天几次', '每天几次', '每日几次'];
for (const fup of followUps) {
const result = contextKeywordTracker.enrichQueryWithContext(s, fup, null);
assert.ok(result.includes(fup), `"${fup}" should be in result: got "${result}"`);
}
});
});
// ================================================================
// 10. KB保护窗口 — 精细时序测试
// ================================================================
describe('KB保护窗口 — 精细时序与边界', () => {
function simulateProtection(text, session) {
let isKb = shouldForceKnowledgeRoute(text);
const WINDOW = 60000;
if (!isKb && session._lastKbHitAt && (Date.now() - session._lastKbHitAt < WINDOW)) {
const isPureChitchat = /^(喂|你好|嗨|谢谢|再见|拜拜|好的|嗯|哦|行|没事了|不用了|可以了)[,。!?\s]*$/.test(text);
if (!isPureChitchat) isKb = true;
}
return isKb;
}
describe('窗口内5s-55s非闲聊提升', () => {
const timings = [5000, 10000, 20000, 30000, 45000, 55000, 59000, 59999];
for (const t of timings) {
it(`${t}ms前KB hit + "哦这样啊" → 应走KB`, () => {
const session = { _lastKbHitAt: Date.now() - t };
assert.equal(simulateProtection('哦这样啊', session), true);
});
}
});
describe('窗口外61s+)不提升', () => {
const timings = [60001, 65000, 120000, 300000];
for (const t of timings) {
it(`${t}ms前KB hit + "哦这样啊" → 不走KB`, () => {
const session = { _lastKbHitAt: Date.now() - t };
assert.equal(simulateProtection('哦这样啊', session), false);
});
}
});
describe('窗口内各类纯闲聊不提升', () => {
const chitchat = ['你好', '嗨', '谢谢', '再见', '拜拜', '好的', '嗯', '哦', '行', '没事了', '不用了', '可以了', '喂'];
for (const c of chitchat) {
it(`窗口内"${c}" → 不走KB`, () => {
const session = { _lastKbHitAt: Date.now() - 5000 };
assert.equal(simulateProtection(c, session), false);
});
}
});
describe('窗口内各类非闲聊提升', () => {
const nonChat = [
'然后呢', '还有吗', '继续', '还有什么', '那怎么办',
'这样可以吗', '有什么注意事项', '跟别的有什么区别',
'会不会有副作用', '我能吃吗', '孕妇可以吗', '小孩能吃吗',
'老人适合吗', '饭前还是饭后', '要吃多久', '一天几次',
];
for (const text of nonChat) {
it(`窗口内"${text}" → 应走KB`, () => {
const session = { _lastKbHitAt: Date.now() - 10000 };
assert.equal(simulateProtection(text, session), true, `"${text}" should be elevated`);
});
}
});
it('无KB历史(_lastKbHitAt=0) → 不提升', () => {
assert.equal(simulateProtection('然后呢', { _lastKbHitAt: 0 }), false);
});
it('无KB历史(undefined) → 不提升', () => {
assert.equal(simulateProtection('然后呢', {}), false);
});
it('_lastKbHitAt=null → 不提升', () => {
assert.equal(simulateProtection('然后呢', { _lastKbHitAt: null }), false);
});
});
// ================================================================
// 11. 端到端多轮模拟 — 更多变体场景
// ================================================================
describe('端到端多轮模拟 — 更多变体', () => {
it('3轮产品→追问→质疑价格', () => {
assert.equal(shouldForceKnowledgeRoute('Q10辅酵素功效'), true);
const ctx = [{ role: 'user', content: 'Q10辅酵素功效' }, { role: 'assistant', content: 'Q10...' }];
assert.equal(shouldForceKnowledgeRoute('多少钱', ctx), true);
assert.equal(shouldForceKnowledgeRoute('太贵了吧,你确定吗'), true);
});
it('3轮公司→认证→怀疑合法性', () => {
assert.equal(shouldForceKnowledgeRoute('德国PM公司介绍'), true);
assert.equal(shouldForceKnowledgeRoute('邓白氏AAA+认证'), true);
const ctx = [{ role: 'user', content: '邓白氏' }, { role: 'assistant', content: '邓白氏是...' }];
assert.equal(shouldForceKnowledgeRoute('我不信,网上说是传销', ctx), true);
});
it('4轮系统→功能→质疑→再查', () => {
assert.equal(shouldForceKnowledgeRoute('一成系统介绍'), true);
const ctx1 = [{ role: 'user', content: '一成系统' }, { role: 'assistant', content: '一成系统...' }];
assert.equal(shouldForceKnowledgeRoute('行动圈是什么', ctx1), true);
assert.equal(shouldForceKnowledgeRoute('跟我了解的不一样'), true);
assert.equal(shouldForceKnowledgeRoute('你再查查一成系统'), true);
});
it('5轮产品A→产品B→对比→质疑→纠正', () => {
assert.equal(shouldForceKnowledgeRoute('大白怎么吃'), true);
assert.equal(shouldForceKnowledgeRoute('小红怎么吃'), true);
assert.equal(shouldForceKnowledgeRoute('大白和小红有什么区别'), true);
const ctx = [{ role: 'user', content: '区别' }, { role: 'assistant', content: '大白是基础...' }];
assert.equal(shouldForceKnowledgeRoute('你搞混了吧', ctx), true);
assert.equal(shouldForceKnowledgeRoute('应该是小红提供能量大白补充矿物质'), true);
});
it('连续4次质疑不同方式', () => {
assert.equal(shouldForceKnowledgeRoute('小白功效'), true);
const ctx = [{ role: 'user', content: '小白功效' }, { role: 'assistant', content: '小白...' }];
assert.equal(shouldForceKnowledgeRoute('不对吧', ctx), true);
assert.equal(shouldForceKnowledgeRoute('你再查查', ctx), true);
assert.equal(shouldForceKnowledgeRoute('我不信', ctx), true);
assert.equal(shouldForceKnowledgeRoute('有什么根据', ctx), true);
});
it('KB话题→闲聊打断→再回到KB话题', () => {
assert.equal(shouldForceKnowledgeRoute('CC套装怎么用'), true);
assert.equal(shouldForceKnowledgeRoute('谢谢'), false);
assert.equal(shouldForceKnowledgeRoute('CC套装适合谁'), true);
});
});
// ================================================================
// 12. normalizeKnowledgeAlias — 更多归一化场景
// ================================================================
describe('normalizeKnowledgeAlias — 更多归一化场景', () => {
const cases = [
['一成,,系统', '一成系统', '多标点分隔'],
['一成、系统', '一成系统', '顿号分隔'],
['一成 系统', '一成系统', '多空格分隔'],
['大我产品', '大沃', '大我→大沃'],
['大卧介绍', '大沃', '大卧→大沃'],
['哎众享怎么用', 'Ai众享', '哎众享→Ai众享'],
['艾众享是什么', 'Ai众享', '艾众享→Ai众享'],
['盛卡学愿介绍', '盛咖学愿', '盛卡→盛咖'],
['圣咖学院怎么用', '盛咖学愿', '圣咖学院→盛咖学愿'],
];
for (const [input, expectContain, label] of cases) {
it(`${label}: "${input}" → 含"${expectContain}"`, () => {
const result = normalizeKnowledgeAlias(input);
assert.ok(result.includes(expectContain), `Got "${result}"`);
});
}
});
console.log('\n=== KB保护扩展测试加载完成 ===\n');