Files
bigwo/test2/server/tests/test_kb_protection_extended.js
User 9567eb7358 feat(server): KB prompt优化、字幕修复、S2S重连、助手配置API
- assistantProfileConfig: KB answer prompt改为分层策略(严格产品信息+灵活常识补充)
- nativeVoiceGateway: S2S upstream自动重连(最多50次)、event 351字幕debounce(800ms取最长文本)
- toolExecutor: 确定性query改写增强、KB查询传递session上下文
- contextKeywordTracker: 支持KB话题记忆优先enrichment
- contentSafeGuard: 新增品牌安全内容过滤服务
- assistantProfileService: 新增助手配置CRUD服务
- routes/assistantProfile: 新增助手配置API路由
- knowledgeKeywords: 扩展KB关键词词典
- fastAsrCorrector: ASR纠错规则更新
- tests/: KB prompt测试、保护窗口测试、Viking性能测试
- docs/: 助手配置API文档、系统提示词目录
2026-03-24 17:19:36 +08:00

577 lines
23 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* KB保护窗口 + 质疑检测 + query去噪 + 话题记忆 深度扩展测试
* 与 test_kb_protection.js 互补,覆盖更多边界、组合、时序场景
*
* 运行方式: node --test tests/test_kb_protection_extended.js
*/
const { describe, it } = require('node:test');
const assert = require('node:assert/strict');
const { shouldForceKnowledgeRoute, normalizeKnowledgeAlias } = require('../services/realtimeDialogRouting');
const { hasKnowledgeRouteKeyword } = require('../services/knowledgeKeywords');
const contextKeywordTracker = require('../services/contextKeywordTracker');
let ToolExecutor;
try { ToolExecutor = require('../services/toolExecutor'); } catch (e) { ToolExecutor = null; }
const kbCtx = [
{ role: 'user', content: '基础三合一怎么吃' },
{ role: 'assistant', content: '大白早上空腹1平勺温水冲服小红中午1平勺小白睡前1平勺...' },
];
// ================================================================
// 1. shouldForceKnowledgeRoute — 组合质疑+产品名
// ================================================================
describe('组合质疑+产品名 —— 质疑词嵌入具体产品场景', () => {
const combos = [
'大白不是这样吃的',
'小红功效你搞错了吧',
'CC套装明明是乳霜',
'基础三合一不是冲剂',
'Q10你说的不对',
'D-Drink不是这么用的',
'一成系统跟我了解的不一样',
'火炉原理好像不是这么说的',
'IB5不可能是这个功效吧',
'小白Restorate我记得不是这样',
'儿童倍适应该是胶囊不是粉末',
'Hair+你再查查',
'NTC你确定是这个原理吗',
'邓白氏谁说的AAA+',
'关节套装真的有这个功效吗',
'TopShape太夸张了吧',
'ProShape氨基酸骗人的吧',
'叶黄素我不信有这个作用',
'乳清蛋白说的有问题',
'运动饮料不是这个成分',
];
for (const text of combos) {
it(`"${text}" → 应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 2. shouldForceKnowledgeRoute — 带标点/语气词的质疑变体
// ================================================================
describe('带标点/语气词的质疑变体', () => {
const variants = [
'不对吧?',
'不对不对不对!',
'你搞错了吧!!',
'说错了,,,',
'我不信!真的假的?',
'骗人的吧……',
'太夸张了~',
'离谱啊!',
'扯淡吧??',
'怎么可能???',
'不可能!不是吧!',
'好像不对哦~',
'你再查查?',
'核实一下嘛。',
'真的吗?真的吗?',
'谁说的啊?',
'有什么根据呢?',
'到底是什么啊!',
'应该是胶囊呀~',
'明明是粉末嘛!',
];
for (const text of variants) {
it(`"${text}" → 应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 3. shouldForceKnowledgeRoute — 前缀剥离后的质疑
// ================================================================
describe('前缀剥离 —— 带"那/那你/你再/再"前缀的质疑', () => {
const prefixed = [
'你再看看这个对不对',
'帮我再确认一下',
'你再看看吧',
'再来说说',
'麻烦你核实一下',
'帮我确认一下',
'那你确定吗',
'那再确认一下',
'那不对吧',
'那你搞错了',
'那我记得不是这样',
'再帮我查查',
'那再给我介绍一下',
'那详细说说',
'你再展开说说',
'那怎么吃',
'再讲讲功效是什么',
];
for (const text of prefixed) {
it(`"${text}" → 应识别为KB追问`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 4. shouldForceKnowledgeRoute — 长句中嵌入质疑
// ================================================================
describe('长句中嵌入质疑 —— 质疑词不在句首', () => {
const longSentences = [
'我刚才听你说的跟我了解的不一样',
'你之前的回答好像有误吧',
'按照我之前看到的资料应该是胶囊',
'怎么跟我之前在网上搜的不一致',
'别人告诉我是粉末的来着',
'但是我觉得你说的不太对',
'我看了很多资料你确定吗',
'感觉你说的和我了解的有出入',
'以前有人跟我说是冲着喝的',
'但是网上说法跟你不一样',
'我一直以为不是这样的',
'到底是什么意思啊',
'这些信息可靠吗有根据吗',
'我朋友说你讲的不对',
'这跟官方说的不一致吧',
];
for (const text of longSentences) {
it(`"${text}" → 应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`);
});
}
});
// ================================================================
// 5. shouldForceKnowledgeRoute — 纯追问模式subject+action
// ================================================================
describe('subject+action追问模式 —— 带上下文', () => {
const subjectActions = [
'这个怎么吃',
'那个功效是什么',
'它适合谁',
'这个产品多少钱',
'那个产品哪里买',
'这个怎么用',
'那个怎么操作',
'这个系统怎么配置',
'这个产品成分是什么',
'那个产品有什么功效',
'它怎么服用',
'这个有什么好处',
'那个配方',
'这个原理是什么',
'这个产品适合什么人',
'那个产品怎么买',
];
for (const text of subjectActions) {
it(`"${text}" → 有上下文应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB with context`);
});
}
});
// ================================================================
// 6. shouldForceKnowledgeRoute — 否定用例扩展
// ================================================================
describe('否定用例扩展 —— 不应走KB的各类闲聊', () => {
const chitchat = [
'你好',
'嗨',
'谢谢',
'再见',
'好的',
'嗯嗯',
'哈哈哈',
'拜拜',
'没事了',
'不用了',
'可以了',
'行',
'知道了',
'明白了',
'了解了',
'好吧',
'算了',
'今天天气好',
'你是谁',
'你叫什么名字',
'你是机器人吗',
'讲个笑话',
'唱首歌',
'几点了',
'我饿了',
'晚安',
'早上好',
'下午好',
'辛苦了',
'厉害',
];
for (const text of chitchat) {
it(`"${text}" → 不应走KB`, () => {
assert.equal(shouldForceKnowledgeRoute(text), false, `"${text}" should NOT route KB`);
});
}
});
// ================================================================
// 7. hasKnowledgeRouteKeyword — 系统性全类别关键词覆盖
// ================================================================
describe('hasKnowledgeRouteKeyword — 产品名关键词系统覆盖', () => {
const productKeywords = [
'大白', '小红', '小白', '基础三合一', 'Basics', 'Activize', 'Restorate',
'儿童倍适', 'CC套装', 'CC-Cell', 'Q10', 'IB5', 'D-Drink',
'Hair+', 'ProShape氨基酸', 'Herbal Tea', 'TopShape', 'Men Face',
'MEN+', '乐活', '草本茶', '叶黄素', '葡萄籽', '益生菌',
'胶原蛋白', '关节套装', '乳清蛋白', '运动饮料', '苹果细胞抗氧素',
];
for (const kw of productKeywords) {
it(`产品"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — FAQ/科学关键词系统覆盖', () => {
const faqKeywords = [
'怎么吃', '功效', '成分', '多少钱', '价格', '适合谁',
'副作用', '多久见效', '见效', '好转反应', '是不是传销',
'传销', '是不是传销', '保质期', '哪里买', '怎么买',
'NTC', '火炉原理', '阿育吠陀', '细胞营养素',
'正规吗', '合法吗', '贵不贵', '不舒服',
];
for (const kw of faqKeywords) {
it(`FAQ"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — 事业/培训关键词系统覆盖', () => {
const bizKeywords = [
'招商', '代理', '加盟', '事业机会', '创业', '起步三关',
'精品会议', '成长上总裁', '做PM', '加入PM', 'PM事业',
];
for (const kw of bizKeywords) {
it(`事业"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — 质疑类关键词系统覆盖', () => {
const challengeKeywords = [
'搞错了', '说错了', '弄错了', '不对', '不准确', '有误',
'确定吗', '真的吗', '不可能', '胡说', '骗人', '离谱',
'核实一下', '再查查', '粉末', '胶囊', '片剂', '冲剂',
'口服液', '软胶囊', '颗粒', '膏状', '到底是', '应该是',
'明明是', '不信', '吹牛', '扯淡', '有依据吗', '谁说的',
];
for (const kw of challengeKeywords) {
it(`质疑"${kw}" → 应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`);
});
}
});
describe('hasKnowledgeRouteKeyword — 不应命中的普通词汇', () => {
const noMatch = [
'你好', '天气', '笑话', '唱歌', '吃饭', '睡觉',
'电影', '音乐', '游戏', '旅游', '工作', '学习',
'开心', '难过', '累', '饿', '渴', '无聊',
];
for (const kw of noMatch) {
it(`闲聊"${kw}" → 不应命中`, () => {
assert.equal(hasKnowledgeRouteKeyword(kw), false, `"${kw}" should NOT match`);
});
}
});
// ================================================================
// 8. sanitizeRewrittenQuery — 深度去噪测试
// ================================================================
describe('sanitizeRewrittenQuery — 深度去噪截断', () => {
const skip = !ToolExecutor || !ToolExecutor.sanitizeRewrittenQuery;
const fillerCases = [
['骨关节啊嗯呢产品', '骨关节', '去除嗯啊呢'],
['那个就是说这个呢功效是什么', '功效', '去除口语填充'],
['骨关节哦嗯额功效', '骨关节', '去除多个语气词'],
['基础三合一呀怎么吃呀', '基础三合一', '去除呀'],
['嗯嗯那个小红功效', '小红', '去除嗯嗯那个'],
];
for (const [input, expectContain, label] of fillerCases) {
it(`${label}: "${input}" → 含"${expectContain}"`, { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery(input);
assert.ok(result.includes(expectContain), `Got "${result}"`);
});
}
it('多次重复去重: "小红 小红 小红 功效"', { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery('小红 小红 小红 功效');
const count = (result.match(/小红/g) || []).length;
assert.ok(count <= 2, `Should dedupe, got "${result}" (${count} occurrences)`);
});
it('去除连续空格', { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery('骨关节 产品 功效');
assert.ok(!/ /.test(result), `Should remove multi-spaces, got "${result}"`);
});
const truncCases = [
''.padEnd(100, '德国PM细胞营养素基础套装大白小红小白'),
'这是一段超长的查询' + '关于产品的详细信息'.repeat(10),
];
for (let i = 0; i < truncCases.length; i++) {
it(`超长截断 case ${i + 1}`, { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery(truncCases[i]);
assert.ok(result.length <= 80, `Should truncate, got len=${result.length}`);
});
}
it('特殊字符不崩溃', { skip }, () => {
const specials = ['骨关节\n产品', '基础三合一\t怎么吃', '小红\r\n功效'];
for (const s of specials) {
const result = ToolExecutor.sanitizeRewrittenQuery(s);
assert.equal(typeof result, 'string');
}
});
it('已干净的query不被破坏', { skip }, () => {
const clean = '德国PM基础三合一 大白 小红 小白 怎么吃';
const result = ToolExecutor.sanitizeRewrittenQuery(clean);
assert.ok(result.includes('基础三合一'), `Core preserved: got "${result}"`);
assert.ok(result.includes('怎么吃'), `Action preserved: got "${result}"`);
});
it('全标点输入', { skip }, () => {
const result = ToolExecutor.sanitizeRewrittenQuery(',,,。。。!!!');
assert.equal(typeof result, 'string');
});
});
// ================================================================
// 9. enrichQueryWithContext — 多场景深度测试
// ================================================================
describe('enrichQueryWithContext — 多场景深度', () => {
const sid = 'test_enrich_ext_' + Date.now();
it('新session空关键词 → 返回原始query', () => {
const result = contextKeywordTracker.enrichQueryWithContext('empty_sid_' + Date.now(), '怎么吃', null);
assert.ok(result.includes('怎么吃'), `Should return original: got "${result}"`);
});
it('有关键词+追问 → 关键词注入', () => {
const s = 'enrich_inject_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效详细介绍');
const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', null);
assert.ok(result.includes('怎么吃'), `Should include query: got "${result}"`);
});
it('非追问query → 不注入关键词', () => {
const s = 'enrich_noinject_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效');
const result = contextKeywordTracker.enrichQueryWithContext(s, '德国PM公司在哪里', null);
assert.equal(result, '德国PM公司在哪里', `Non-follow-up should not inject: got "${result}"`);
});
it('KB话题记忆优先于keyword tracker', () => {
const s = 'enrich_priority_' + Date.now();
contextKeywordTracker.updateSession(s, '一成系统三大平台');
const session = { _lastKbTopic: 'CC套装功效', _lastKbHitAt: Date.now() };
const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', session);
assert.ok(result.includes('CC'), `Should use KB topic: got "${result}"`);
});
it('KB话题过期(>60s) → 降级到keyword tracker', () => {
const s = 'enrich_expired_' + Date.now();
contextKeywordTracker.updateSession(s, '一成系统详细');
const session = { _lastKbTopic: 'CC套装功效', _lastKbHitAt: Date.now() - 90000 };
const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', session);
assert.ok(!result.includes('CC'), `Should NOT use expired KB topic: got "${result}"`);
});
it('多轮更新后取最近关键词', () => {
const s = 'enrich_multi_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效');
contextKeywordTracker.updateSession(s, '小红Activize怎么吃');
contextKeywordTracker.updateSession(s, 'Q10辅酵素作用');
const result = contextKeywordTracker.enrichQueryWithContext(s, '多少钱', null);
assert.ok(result.includes('多少钱'), `Should include query: got "${result}"`);
});
it('各种追问前缀都能触发enrichment', () => {
const s = 'enrich_prefixes_' + Date.now();
contextKeywordTracker.updateSession(s, '大白产品功效');
const followUps = ['怎么吃', '功效是什么', '多少钱', '适合谁', '成分是什么', '哪里买', '副作用', '什么意思', '怎么用', '他的规格是什么', '它的包装是什么', '这款是什么剂型', '那个是什么形态', '一天几次', '每天几次', '每日几次'];
for (const fup of followUps) {
const result = contextKeywordTracker.enrichQueryWithContext(s, fup, null);
assert.ok(result.includes(fup), `"${fup}" should be in result: got "${result}"`);
}
});
});
// ================================================================
// 10. KB保护窗口 — 精细时序测试
// ================================================================
describe('KB保护窗口 — 精细时序与边界', () => {
function simulateProtection(text, session) {
let isKb = shouldForceKnowledgeRoute(text);
const WINDOW = 60000;
if (!isKb && session._lastKbHitAt && (Date.now() - session._lastKbHitAt < WINDOW)) {
const isPureChitchat = /^(喂|你好|嗨|谢谢|再见|拜拜|好的|嗯|哦|行|没事了|不用了|可以了)[,。!?\s]*$/.test(text);
if (!isPureChitchat) isKb = true;
}
return isKb;
}
describe('窗口内5s-55s非闲聊提升', () => {
const timings = [5000, 10000, 20000, 30000, 45000, 55000, 59000, 59999];
for (const t of timings) {
it(`${t}ms前KB hit + "哦这样啊" → 应走KB`, () => {
const session = { _lastKbHitAt: Date.now() - t };
assert.equal(simulateProtection('哦这样啊', session), true);
});
}
});
describe('窗口外61s+)不提升', () => {
const timings = [60001, 65000, 120000, 300000];
for (const t of timings) {
it(`${t}ms前KB hit + "哦这样啊" → 不走KB`, () => {
const session = { _lastKbHitAt: Date.now() - t };
assert.equal(simulateProtection('哦这样啊', session), false);
});
}
});
describe('窗口内各类纯闲聊不提升', () => {
const chitchat = ['你好', '嗨', '谢谢', '再见', '拜拜', '好的', '嗯', '哦', '行', '没事了', '不用了', '可以了', '喂'];
for (const c of chitchat) {
it(`窗口内"${c}" → 不走KB`, () => {
const session = { _lastKbHitAt: Date.now() - 5000 };
assert.equal(simulateProtection(c, session), false);
});
}
});
describe('窗口内各类非闲聊提升', () => {
const nonChat = [
'然后呢', '还有吗', '继续', '还有什么', '那怎么办',
'这样可以吗', '有什么注意事项', '跟别的有什么区别',
'会不会有副作用', '我能吃吗', '孕妇可以吗', '小孩能吃吗',
'老人适合吗', '饭前还是饭后', '要吃多久', '一天几次',
];
for (const text of nonChat) {
it(`窗口内"${text}" → 应走KB`, () => {
const session = { _lastKbHitAt: Date.now() - 10000 };
assert.equal(simulateProtection(text, session), true, `"${text}" should be elevated`);
});
}
});
it('无KB历史(_lastKbHitAt=0) → 不提升', () => {
assert.equal(simulateProtection('然后呢', { _lastKbHitAt: 0 }), false);
});
it('无KB历史(undefined) → 不提升', () => {
assert.equal(simulateProtection('然后呢', {}), false);
});
it('_lastKbHitAt=null → 不提升', () => {
assert.equal(simulateProtection('然后呢', { _lastKbHitAt: null }), false);
});
});
// ================================================================
// 11. 端到端多轮模拟 — 更多变体场景
// ================================================================
describe('端到端多轮模拟 — 更多变体', () => {
it('3轮产品→追问→质疑价格', () => {
assert.equal(shouldForceKnowledgeRoute('Q10辅酵素功效'), true);
const ctx = [{ role: 'user', content: 'Q10辅酵素功效' }, { role: 'assistant', content: 'Q10...' }];
assert.equal(shouldForceKnowledgeRoute('多少钱', ctx), true);
assert.equal(shouldForceKnowledgeRoute('太贵了吧,你确定吗'), true);
});
it('3轮公司→认证→怀疑合法性', () => {
assert.equal(shouldForceKnowledgeRoute('德国PM公司介绍'), true);
assert.equal(shouldForceKnowledgeRoute('邓白氏AAA+认证'), true);
const ctx = [{ role: 'user', content: '邓白氏' }, { role: 'assistant', content: '邓白氏是...' }];
assert.equal(shouldForceKnowledgeRoute('我不信,网上说是传销', ctx), true);
});
it('4轮系统→功能→质疑→再查', () => {
assert.equal(shouldForceKnowledgeRoute('一成系统介绍'), true);
const ctx1 = [{ role: 'user', content: '一成系统' }, { role: 'assistant', content: '一成系统...' }];
assert.equal(shouldForceKnowledgeRoute('行动圈是什么', ctx1), true);
assert.equal(shouldForceKnowledgeRoute('跟我了解的不一样'), true);
assert.equal(shouldForceKnowledgeRoute('你再查查一成系统'), true);
});
it('5轮产品A→产品B→对比→质疑→纠正', () => {
assert.equal(shouldForceKnowledgeRoute('大白怎么吃'), true);
assert.equal(shouldForceKnowledgeRoute('小红怎么吃'), true);
assert.equal(shouldForceKnowledgeRoute('大白和小红有什么区别'), true);
const ctx = [{ role: 'user', content: '区别' }, { role: 'assistant', content: '大白是基础...' }];
assert.equal(shouldForceKnowledgeRoute('你搞混了吧', ctx), true);
assert.equal(shouldForceKnowledgeRoute('应该是小红提供能量大白补充矿物质'), true);
});
it('连续4次质疑不同方式', () => {
assert.equal(shouldForceKnowledgeRoute('小白功效'), true);
const ctx = [{ role: 'user', content: '小白功效' }, { role: 'assistant', content: '小白...' }];
assert.equal(shouldForceKnowledgeRoute('不对吧', ctx), true);
assert.equal(shouldForceKnowledgeRoute('你再查查', ctx), true);
assert.equal(shouldForceKnowledgeRoute('我不信', ctx), true);
assert.equal(shouldForceKnowledgeRoute('有什么根据', ctx), true);
});
it('KB话题→闲聊打断→再回到KB话题', () => {
assert.equal(shouldForceKnowledgeRoute('CC套装怎么用'), true);
assert.equal(shouldForceKnowledgeRoute('谢谢'), false);
assert.equal(shouldForceKnowledgeRoute('CC套装适合谁'), true);
});
});
// ================================================================
// 12. normalizeKnowledgeAlias — 更多归一化场景
// ================================================================
describe('normalizeKnowledgeAlias — 更多归一化场景', () => {
const cases = [
['一成,,系统', '一成系统', '多标点分隔'],
['一成、系统', '一成系统', '顿号分隔'],
['一成 系统', '一成系统', '多空格分隔'],
['大我产品', '大沃', '大我→大沃'],
['大卧介绍', '大沃', '大卧→大沃'],
['哎众享怎么用', 'Ai众享', '哎众享→Ai众享'],
['艾众享是什么', 'Ai众享', '艾众享→Ai众享'],
['盛卡学愿介绍', '盛咖学愿', '盛卡→盛咖'],
['圣咖学院怎么用', '盛咖学愿', '圣咖学院→盛咖学愿'],
];
for (const [input, expectContain, label] of cases) {
it(`${label}: "${input}" → 含"${expectContain}"`, () => {
const result = normalizeKnowledgeAlias(input);
assert.ok(result.includes(expectContain), `Got "${result}"`);
});
}
});
console.log('\n=== KB保护扩展测试加载完成 ===\n');