/** * KB保护窗口 + 质疑检测 + query去噪 + 话题记忆 深度扩展测试 * 与 test_kb_protection.js 互补,覆盖更多边界、组合、时序场景 * * 运行方式: node --test tests/test_kb_protection_extended.js */ const { describe, it } = require('node:test'); const assert = require('node:assert/strict'); const { shouldForceKnowledgeRoute, normalizeKnowledgeAlias } = require('../services/realtimeDialogRouting'); const { hasKnowledgeRouteKeyword } = require('../services/knowledgeKeywords'); const contextKeywordTracker = require('../services/contextKeywordTracker'); let ToolExecutor; try { ToolExecutor = require('../services/toolExecutor'); } catch (e) { ToolExecutor = null; } const kbCtx = [ { role: 'user', content: '基础三合一怎么吃' }, { role: 'assistant', content: '大白早上空腹1平勺温水冲服,小红中午1平勺,小白睡前1平勺...' }, ]; // ================================================================ // 1. shouldForceKnowledgeRoute — 组合质疑+产品名 // ================================================================ describe('组合质疑+产品名 —— 质疑词嵌入具体产品场景', () => { const combos = [ '大白不是这样吃的', '小红功效你搞错了吧', 'CC套装明明是乳霜', '基础三合一不是冲剂', 'Q10你说的不对', 'D-Drink不是这么用的', '一成系统跟我了解的不一样', '火炉原理好像不是这么说的', 'IB5不可能是这个功效吧', '小白Restorate我记得不是这样', '儿童倍适应该是胶囊不是粉末', 'Hair+你再查查', 'NTC你确定是这个原理吗', '邓白氏谁说的AAA+', '关节套装真的有这个功效吗', 'TopShape太夸张了吧', 'ProShape氨基酸骗人的吧', '叶黄素我不信有这个作用', '乳清蛋白说的有问题', '运动饮料不是这个成分', ]; for (const text of combos) { it(`"${text}" → 应走KB`, () => { assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`); }); } }); // ================================================================ // 2. shouldForceKnowledgeRoute — 带标点/语气词的质疑变体 // ================================================================ describe('带标点/语气词的质疑变体', () => { const variants = [ '不对吧?', '不对不对不对!', '你搞错了吧!!', '说错了,,,', '我不信!真的假的?', '骗人的吧……', '太夸张了~', '离谱啊!', '扯淡吧??', '怎么可能???', '不可能!不是吧!', '好像不对哦~', '你再查查?', '核实一下嘛。', '真的吗?真的吗?', '谁说的啊?', '有什么根据呢?', '到底是什么啊!', '应该是胶囊呀~', '明明是粉末嘛!', ]; for (const text of variants) { it(`"${text}" → 应走KB`, () => { assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`); }); } }); // ================================================================ // 3. shouldForceKnowledgeRoute — 前缀剥离后的质疑 // ================================================================ describe('前缀剥离 —— 带"那/那你/你再/再"前缀的质疑', () => { const prefixed = [ '你再看看这个对不对', '帮我再确认一下', '你再看看吧', '再来说说', '麻烦你核实一下', '帮我确认一下', '那你确定吗', '那再确认一下', '那不对吧', '那你搞错了', '那我记得不是这样', '再帮我查查', '那再给我介绍一下', '那详细说说', '你再展开说说', '那怎么吃', '再讲讲功效是什么', ]; for (const text of prefixed) { it(`"${text}" → 应识别为KB追问`, () => { assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`); }); } }); // ================================================================ // 4. shouldForceKnowledgeRoute — 长句中嵌入质疑 // ================================================================ describe('长句中嵌入质疑 —— 质疑词不在句首', () => { const longSentences = [ '我刚才听你说的跟我了解的不一样', '你之前的回答好像有误吧', '按照我之前看到的资料应该是胶囊', '怎么跟我之前在网上搜的不一致', '别人告诉我是粉末的来着', '但是我觉得你说的不太对', '我看了很多资料你确定吗', '感觉你说的和我了解的有出入', '以前有人跟我说是冲着喝的', '但是网上说法跟你不一样', '我一直以为不是这样的', '到底是什么意思啊', '这些信息可靠吗有根据吗', '我朋友说你讲的不对', '这跟官方说的不一致吧', ]; for (const text of longSentences) { it(`"${text}" → 应走KB`, () => { assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB`); }); } }); // ================================================================ // 5. shouldForceKnowledgeRoute — 纯追问模式(subject+action) // ================================================================ describe('subject+action追问模式 —— 带上下文', () => { const subjectActions = [ '这个怎么吃', '那个功效是什么', '它适合谁', '这个产品多少钱', '那个产品哪里买', '这个怎么用', '那个怎么操作', '这个系统怎么配置', '这个产品成分是什么', '那个产品有什么功效', '它怎么服用', '这个有什么好处', '那个配方', '这个原理是什么', '这个产品适合什么人', '那个产品怎么买', ]; for (const text of subjectActions) { it(`"${text}" → 有上下文应走KB`, () => { assert.equal(shouldForceKnowledgeRoute(text, kbCtx), true, `"${text}" should route KB with context`); }); } }); // ================================================================ // 6. shouldForceKnowledgeRoute — 否定用例扩展 // ================================================================ describe('否定用例扩展 —— 不应走KB的各类闲聊', () => { const chitchat = [ '你好', '嗨', '谢谢', '再见', '好的', '嗯嗯', '哈哈哈', '拜拜', '没事了', '不用了', '可以了', '行', '知道了', '明白了', '了解了', '好吧', '算了', '今天天气好', '你是谁', '你叫什么名字', '你是机器人吗', '讲个笑话', '唱首歌', '几点了', '我饿了', '晚安', '早上好', '下午好', '辛苦了', '厉害', ]; for (const text of chitchat) { it(`"${text}" → 不应走KB`, () => { assert.equal(shouldForceKnowledgeRoute(text), false, `"${text}" should NOT route KB`); }); } }); // ================================================================ // 7. hasKnowledgeRouteKeyword — 系统性全类别关键词覆盖 // ================================================================ describe('hasKnowledgeRouteKeyword — 产品名关键词系统覆盖', () => { const productKeywords = [ '大白', '小红', '小白', '基础三合一', 'Basics', 'Activize', 'Restorate', '儿童倍适', 'CC套装', 'CC-Cell', 'Q10', 'IB5', 'D-Drink', 'Hair+', 'ProShape氨基酸', 'Herbal Tea', 'TopShape', 'Men Face', 'MEN+', '乐活', '草本茶', '叶黄素', '葡萄籽', '益生菌', '胶原蛋白', '关节套装', '乳清蛋白', '运动饮料', '苹果细胞抗氧素', ]; for (const kw of productKeywords) { it(`产品"${kw}" → 应命中`, () => { assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`); }); } }); describe('hasKnowledgeRouteKeyword — FAQ/科学关键词系统覆盖', () => { const faqKeywords = [ '怎么吃', '功效', '成分', '多少钱', '价格', '适合谁', '副作用', '多久见效', '见效', '好转反应', '是不是传销', '传销', '是不是传销', '保质期', '哪里买', '怎么买', 'NTC', '火炉原理', '阿育吠陀', '细胞营养素', '正规吗', '合法吗', '贵不贵', '不舒服', ]; for (const kw of faqKeywords) { it(`FAQ"${kw}" → 应命中`, () => { assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`); }); } }); describe('hasKnowledgeRouteKeyword — 事业/培训关键词系统覆盖', () => { const bizKeywords = [ '招商', '代理', '加盟', '事业机会', '创业', '起步三关', '精品会议', '成长上总裁', '做PM', '加入PM', 'PM事业', ]; for (const kw of bizKeywords) { it(`事业"${kw}" → 应命中`, () => { assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`); }); } }); describe('hasKnowledgeRouteKeyword — 质疑类关键词系统覆盖', () => { const challengeKeywords = [ '搞错了', '说错了', '弄错了', '不对', '不准确', '有误', '确定吗', '真的吗', '不可能', '胡说', '骗人', '离谱', '核实一下', '再查查', '粉末', '胶囊', '片剂', '冲剂', '口服液', '软胶囊', '颗粒', '膏状', '到底是', '应该是', '明明是', '不信', '吹牛', '扯淡', '有依据吗', '谁说的', ]; for (const kw of challengeKeywords) { it(`质疑"${kw}" → 应命中`, () => { assert.equal(hasKnowledgeRouteKeyword(kw), true, `"${kw}" should match`); }); } }); describe('hasKnowledgeRouteKeyword — 不应命中的普通词汇', () => { const noMatch = [ '你好', '天气', '笑话', '唱歌', '吃饭', '睡觉', '电影', '音乐', '游戏', '旅游', '工作', '学习', '开心', '难过', '累', '饿', '渴', '无聊', ]; for (const kw of noMatch) { it(`闲聊"${kw}" → 不应命中`, () => { assert.equal(hasKnowledgeRouteKeyword(kw), false, `"${kw}" should NOT match`); }); } }); // ================================================================ // 8. sanitizeRewrittenQuery — 深度去噪测试 // ================================================================ describe('sanitizeRewrittenQuery — 深度去噪截断', () => { const skip = !ToolExecutor || !ToolExecutor.sanitizeRewrittenQuery; const fillerCases = [ ['骨关节啊嗯呢产品', '骨关节', '去除嗯啊呢'], ['那个就是说这个呢功效是什么', '功效', '去除口语填充'], ['骨关节哦嗯额功效', '骨关节', '去除多个语气词'], ['基础三合一呀怎么吃呀', '基础三合一', '去除呀'], ['嗯嗯那个小红功效', '小红', '去除嗯嗯那个'], ]; for (const [input, expectContain, label] of fillerCases) { it(`${label}: "${input}" → 含"${expectContain}"`, { skip }, () => { const result = ToolExecutor.sanitizeRewrittenQuery(input); assert.ok(result.includes(expectContain), `Got "${result}"`); }); } it('多次重复去重: "小红 小红 小红 功效"', { skip }, () => { const result = ToolExecutor.sanitizeRewrittenQuery('小红 小红 小红 功效'); const count = (result.match(/小红/g) || []).length; assert.ok(count <= 2, `Should dedupe, got "${result}" (${count} occurrences)`); }); it('去除连续空格', { skip }, () => { const result = ToolExecutor.sanitizeRewrittenQuery('骨关节 产品 功效'); assert.ok(!/ /.test(result), `Should remove multi-spaces, got "${result}"`); }); const truncCases = [ ''.padEnd(100, '德国PM细胞营养素基础套装大白小红小白'), '这是一段超长的查询' + '关于产品的详细信息'.repeat(10), ]; for (let i = 0; i < truncCases.length; i++) { it(`超长截断 case ${i + 1}`, { skip }, () => { const result = ToolExecutor.sanitizeRewrittenQuery(truncCases[i]); assert.ok(result.length <= 80, `Should truncate, got len=${result.length}`); }); } it('特殊字符不崩溃', { skip }, () => { const specials = ['骨关节\n产品', '基础三合一\t怎么吃', '小红\r\n功效']; for (const s of specials) { const result = ToolExecutor.sanitizeRewrittenQuery(s); assert.equal(typeof result, 'string'); } }); it('已干净的query不被破坏', { skip }, () => { const clean = '德国PM基础三合一 大白 小红 小白 怎么吃'; const result = ToolExecutor.sanitizeRewrittenQuery(clean); assert.ok(result.includes('基础三合一'), `Core preserved: got "${result}"`); assert.ok(result.includes('怎么吃'), `Action preserved: got "${result}"`); }); it('全标点输入', { skip }, () => { const result = ToolExecutor.sanitizeRewrittenQuery(',,,。。。!!!'); assert.equal(typeof result, 'string'); }); }); // ================================================================ // 9. enrichQueryWithContext — 多场景深度测试 // ================================================================ describe('enrichQueryWithContext — 多场景深度', () => { const sid = 'test_enrich_ext_' + Date.now(); it('新session空关键词 → 返回原始query', () => { const result = contextKeywordTracker.enrichQueryWithContext('empty_sid_' + Date.now(), '怎么吃', null); assert.ok(result.includes('怎么吃'), `Should return original: got "${result}"`); }); it('有关键词+追问 → 关键词注入', () => { const s = 'enrich_inject_' + Date.now(); contextKeywordTracker.updateSession(s, '大白产品功效详细介绍'); const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', null); assert.ok(result.includes('怎么吃'), `Should include query: got "${result}"`); }); it('非追问query → 不注入关键词', () => { const s = 'enrich_noinject_' + Date.now(); contextKeywordTracker.updateSession(s, '大白产品功效'); const result = contextKeywordTracker.enrichQueryWithContext(s, '德国PM公司在哪里', null); assert.equal(result, '德国PM公司在哪里', `Non-follow-up should not inject: got "${result}"`); }); it('KB话题记忆优先于keyword tracker', () => { const s = 'enrich_priority_' + Date.now(); contextKeywordTracker.updateSession(s, '一成系统三大平台'); const session = { _lastKbTopic: 'CC套装功效', _lastKbHitAt: Date.now() }; const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', session); assert.ok(result.includes('CC'), `Should use KB topic: got "${result}"`); }); it('KB话题过期(>60s) → 降级到keyword tracker', () => { const s = 'enrich_expired_' + Date.now(); contextKeywordTracker.updateSession(s, '一成系统详细'); const session = { _lastKbTopic: 'CC套装功效', _lastKbHitAt: Date.now() - 90000 }; const result = contextKeywordTracker.enrichQueryWithContext(s, '怎么吃', session); assert.ok(!result.includes('CC'), `Should NOT use expired KB topic: got "${result}"`); }); it('多轮更新后取最近关键词', () => { const s = 'enrich_multi_' + Date.now(); contextKeywordTracker.updateSession(s, '大白产品功效'); contextKeywordTracker.updateSession(s, '小红Activize怎么吃'); contextKeywordTracker.updateSession(s, 'Q10辅酵素作用'); const result = contextKeywordTracker.enrichQueryWithContext(s, '多少钱', null); assert.ok(result.includes('多少钱'), `Should include query: got "${result}"`); }); it('各种追问前缀都能触发enrichment', () => { const s = 'enrich_prefixes_' + Date.now(); contextKeywordTracker.updateSession(s, '大白产品功效'); const followUps = ['怎么吃', '功效是什么', '多少钱', '适合谁', '成分是什么', '哪里买', '副作用', '什么意思', '怎么用', '他的规格是什么', '它的包装是什么', '这款是什么剂型', '那个是什么形态', '一天几次', '每天几次', '每日几次']; for (const fup of followUps) { const result = contextKeywordTracker.enrichQueryWithContext(s, fup, null); assert.ok(result.includes(fup), `"${fup}" should be in result: got "${result}"`); } }); }); // ================================================================ // 10. KB保护窗口 — 精细时序测试 // ================================================================ describe('KB保护窗口 — 精细时序与边界', () => { function simulateProtection(text, session) { let isKb = shouldForceKnowledgeRoute(text); const WINDOW = 60000; if (!isKb && session._lastKbHitAt && (Date.now() - session._lastKbHitAt < WINDOW)) { const isPureChitchat = /^(喂|你好|嗨|谢谢|再见|拜拜|好的|嗯|哦|行|没事了|不用了|可以了)[,,。!?\s]*$/.test(text); if (!isPureChitchat) isKb = true; } return isKb; } describe('窗口内(5s-55s)非闲聊提升', () => { const timings = [5000, 10000, 20000, 30000, 45000, 55000, 59000, 59999]; for (const t of timings) { it(`${t}ms前KB hit + "哦这样啊" → 应走KB`, () => { const session = { _lastKbHitAt: Date.now() - t }; assert.equal(simulateProtection('哦这样啊', session), true); }); } }); describe('窗口外(61s+)不提升', () => { const timings = [60001, 65000, 120000, 300000]; for (const t of timings) { it(`${t}ms前KB hit + "哦这样啊" → 不走KB`, () => { const session = { _lastKbHitAt: Date.now() - t }; assert.equal(simulateProtection('哦这样啊', session), false); }); } }); describe('窗口内各类纯闲聊不提升', () => { const chitchat = ['你好', '嗨', '谢谢', '再见', '拜拜', '好的', '嗯', '哦', '行', '没事了', '不用了', '可以了', '喂']; for (const c of chitchat) { it(`窗口内"${c}" → 不走KB`, () => { const session = { _lastKbHitAt: Date.now() - 5000 }; assert.equal(simulateProtection(c, session), false); }); } }); describe('窗口内各类非闲聊提升', () => { const nonChat = [ '然后呢', '还有吗', '继续', '还有什么', '那怎么办', '这样可以吗', '有什么注意事项', '跟别的有什么区别', '会不会有副作用', '我能吃吗', '孕妇可以吗', '小孩能吃吗', '老人适合吗', '饭前还是饭后', '要吃多久', '一天几次', ]; for (const text of nonChat) { it(`窗口内"${text}" → 应走KB`, () => { const session = { _lastKbHitAt: Date.now() - 10000 }; assert.equal(simulateProtection(text, session), true, `"${text}" should be elevated`); }); } }); it('无KB历史(_lastKbHitAt=0) → 不提升', () => { assert.equal(simulateProtection('然后呢', { _lastKbHitAt: 0 }), false); }); it('无KB历史(undefined) → 不提升', () => { assert.equal(simulateProtection('然后呢', {}), false); }); it('_lastKbHitAt=null → 不提升', () => { assert.equal(simulateProtection('然后呢', { _lastKbHitAt: null }), false); }); }); // ================================================================ // 11. 端到端多轮模拟 — 更多变体场景 // ================================================================ describe('端到端多轮模拟 — 更多变体', () => { it('3轮:产品→追问→质疑价格', () => { assert.equal(shouldForceKnowledgeRoute('Q10辅酵素功效'), true); const ctx = [{ role: 'user', content: 'Q10辅酵素功效' }, { role: 'assistant', content: 'Q10...' }]; assert.equal(shouldForceKnowledgeRoute('多少钱', ctx), true); assert.equal(shouldForceKnowledgeRoute('太贵了吧,你确定吗'), true); }); it('3轮:公司→认证→怀疑合法性', () => { assert.equal(shouldForceKnowledgeRoute('德国PM公司介绍'), true); assert.equal(shouldForceKnowledgeRoute('邓白氏AAA+认证'), true); const ctx = [{ role: 'user', content: '邓白氏' }, { role: 'assistant', content: '邓白氏是...' }]; assert.equal(shouldForceKnowledgeRoute('我不信,网上说是传销', ctx), true); }); it('4轮:系统→功能→质疑→再查', () => { assert.equal(shouldForceKnowledgeRoute('一成系统介绍'), true); const ctx1 = [{ role: 'user', content: '一成系统' }, { role: 'assistant', content: '一成系统...' }]; assert.equal(shouldForceKnowledgeRoute('行动圈是什么', ctx1), true); assert.equal(shouldForceKnowledgeRoute('跟我了解的不一样'), true); assert.equal(shouldForceKnowledgeRoute('你再查查一成系统'), true); }); it('5轮:产品A→产品B→对比→质疑→纠正', () => { assert.equal(shouldForceKnowledgeRoute('大白怎么吃'), true); assert.equal(shouldForceKnowledgeRoute('小红怎么吃'), true); assert.equal(shouldForceKnowledgeRoute('大白和小红有什么区别'), true); const ctx = [{ role: 'user', content: '区别' }, { role: 'assistant', content: '大白是基础...' }]; assert.equal(shouldForceKnowledgeRoute('你搞混了吧', ctx), true); assert.equal(shouldForceKnowledgeRoute('应该是小红提供能量大白补充矿物质'), true); }); it('连续4次质疑不同方式', () => { assert.equal(shouldForceKnowledgeRoute('小白功效'), true); const ctx = [{ role: 'user', content: '小白功效' }, { role: 'assistant', content: '小白...' }]; assert.equal(shouldForceKnowledgeRoute('不对吧', ctx), true); assert.equal(shouldForceKnowledgeRoute('你再查查', ctx), true); assert.equal(shouldForceKnowledgeRoute('我不信', ctx), true); assert.equal(shouldForceKnowledgeRoute('有什么根据', ctx), true); }); it('KB话题→闲聊打断→再回到KB话题', () => { assert.equal(shouldForceKnowledgeRoute('CC套装怎么用'), true); assert.equal(shouldForceKnowledgeRoute('谢谢'), false); assert.equal(shouldForceKnowledgeRoute('CC套装适合谁'), true); }); }); // ================================================================ // 12. normalizeKnowledgeAlias — 更多归一化场景 // ================================================================ describe('normalizeKnowledgeAlias — 更多归一化场景', () => { const cases = [ ['一成,,系统', '一成系统', '多标点分隔'], ['一成、系统', '一成系统', '顿号分隔'], ['一成 系统', '一成系统', '多空格分隔'], ['大我产品', '大沃', '大我→大沃'], ['大卧介绍', '大沃', '大卧→大沃'], ['哎众享怎么用', 'Ai众享', '哎众享→Ai众享'], ['艾众享是什么', 'Ai众享', '艾众享→Ai众享'], ['盛卡学愿介绍', '盛咖学愿', '盛卡→盛咖'], ['圣咖学院怎么用', '盛咖学愿', '圣咖学院→盛咖学愿'], ]; for (const [input, expectContain, label] of cases) { it(`${label}: "${input}" → 含"${expectContain}"`, () => { const result = normalizeKnowledgeAlias(input); assert.ok(result.includes(expectContain), `Got "${result}"`); }); } }); console.log('\n=== KB保护扩展测试加载完成 ===\n');