/** * 语音识别(ASR)纠错全覆盖测试 * 覆盖:PHRASE_MAP、WORD_MAP、PRODUCT_ALIAS_MAP、激进正则、normalizeKnowledgeAlias、组合流水线 * * 运行方式: node --test tests/test_asr_coverage.js */ const { describe, it } = require('node:test'); const assert = require('node:assert/strict'); const { correctAsrText, PHRASE_MAP, WORD_MAP, PRODUCT_ALIAS_MAP } = require('../services/fastAsrCorrector'); const { normalizeKnowledgeAlias, shouldForceKnowledgeRoute } = require('../services/realtimeDialogRouting'); const { hasKnowledgeRouteKeyword } = require('../services/knowledgeKeywords'); // 辅助:完整ASR流水线(模拟nativeVoiceGateway.extractUserText + routing) function fullAsrPipeline(rawText) { const corrected = correctAsrText(rawText); const normalized = normalizeKnowledgeAlias(corrected); return normalized; } function assertPipelineRouteKb(rawText, msg) { const processed = fullAsrPipeline(rawText); const result = shouldForceKnowledgeRoute(processed); assert.equal(result, true, msg || `ASR "${rawText}" → processed "${processed}" should route to KB`); } // ================================================================ // 1. PHRASE_MAP 全覆盖 —— 每条短语映射逐一验证 // ================================================================ describe('PHRASE_MAP —— 短语级ASR纠错全覆盖', () => { describe('一成系统变体(36条)', () => { const yichengVariants = [ '一城系统', '逸城系统', '一程系统', '易成系统', '一诚系统', '亦成系统', '艺成系统', '溢成系统', '义成系统', '毅成系统', '怡成系统', '以成系统', '已成系统', '亿成系统', '忆成系统', '益成系统', '一乘系统', '一承系统', '一丞系统', '一呈系统', '一澄系统', '一橙系统', '一层系统', '一趁系统', '一陈系统', '依成系统', '伊成系统', '益生系统', '易诚系统', '易乘系统', '一声系统', '亿生系统', '义诚系统', '忆诚系统', '以诚系统', ]; for (const variant of yichengVariants) { it(`"${variant}" → "一成系统"`, () => { const result = correctAsrText(variant); assert.ok(result.includes('一成系统'), `"${variant}" should correct to 一成系统, got "${result}"`); }); } }); describe('其他短语映射', () => { const otherPhrases = [ ['盛咖学院', '盛咖学愿'], ['圣咖学愿', '盛咖学愿'], ['盛卡学愿', '盛咖学愿'], ['营养配送系统', 'NTC营养保送系统'], ['营养输送系统', 'NTC营养保送系统'], ['营养传送系统', 'NTC营养保送系统'], ['营养传输系统', 'NTC营养保送系统'], ['暖炉原理', '火炉原理'], ['整应反应', '好转反应'], ['整健反应', '好转反应'], ['排毒反应', '好转反应'], ['5加1', '5+1'], ['五加一', '5+1'], ['起步三观', '起步三关'], ['起步三官', '起步三关'], ['doublepm', '德国PM'], ['double pm', '德国PM'], ['DoublePM', '德国PM'], ['Double PM', '德国PM'], ['DOUBLEPM', '德国PM'], ['DOUBLE PM', '德国PM'], ['基础三合一', 'PM细胞营养素 基础套装'], ['三合一基础套', 'PM细胞营养素 基础套装'], ['大白小红小白', 'PM细胞营养素 基础套装'], ]; for (const [input, expected] of otherPhrases) { it(`"${input}" → 含"${expected}"`, () => { const result = correctAsrText(input); assert.ok(result.includes(expected), `"${input}" should contain "${expected}", got "${result}"`); }); } }); }); // ================================================================ // 2. WORD_MAP 全覆盖 —— 单词级ASR纠错 // ================================================================ describe('WORD_MAP —— 单词级ASR纠错全覆盖', () => { describe('一成/一城等同音变体', () => { const wordVariants = [ ['一城', '一成'], ['逸城', '一成'], ['一程', '一成'], ['易成', '一成'], ['一诚', '一成'], ['亦成', '一成'], ['艺成', '一成'], ['溢成', '一成'], ['义成', '一成'], ['毅成', '一成'], ['怡成', '一成'], ['以成', '一成'], ['已成', '一成'], ['亿成', '一成'], ['忆成', '一成'], ['益成', '一成'], ['一乘', '一成'], ['一承', '一成'], ['一丞', '一成'], ['一呈', '一成'], ['一澄', '一成'], ['一橙', '一成'], ['一层', '一成'], ['一陈', '一成'], ['依成', '一成'], ['伊成', '一成'], ['益生', '一成'], ['易诚', '一成'], ['义诚', '一成'], ['忆诚', '一成'], ['以诚', '一成'], ['一声', '一成'], ['亿生', '一成'], ['易乘', '一成'], ]; for (const [input, expected] of wordVariants) { it(`"${input}" → "${expected}"`, () => { const result = correctAsrText(`${input}的介绍`); assert.ok(result.includes(expected), `"${input}" should correct to "${expected}", got "${result}"`); }); } }); describe('大沃同音变体', () => { const dawoVariants = ['大窝', '大握', '大我', '大卧']; for (const v of dawoVariants) { it(`"${v}" → "大沃"`, () => { const result = correctAsrText(v); assert.ok(result.includes('大沃'), `"${v}" should correct to 大沃, got "${result}"`); }); } }); describe('Ai众享同音变体', () => { const aiVariants = ['爱众享', '艾众享', '哎众享']; for (const v of aiVariants) { it(`"${v}" → "Ai众享"`, () => { const result = correctAsrText(v); assert.ok(result.includes('Ai众享'), `"${v}" should correct to Ai众享, got "${result}"`); }); } }); describe('产品名同音变体', () => { const productVariants = [ ['小洪', '小红'], ['小宏', '小红'], ['小鸿', '小红'], ['大百', '大白'], ['大柏', '大白'], ['小百', '小白'], ['小柏', '小白'], ['维适多', '小白'], ]; for (const [input, expected] of productVariants) { it(`"${input}" → "${expected}"`, () => { const result = correctAsrText(`${input}产品功效`); assert.ok(result.includes(expected), `"${input}" should correct to "${expected}", got "${result}"`); }); } }); describe('其他同音变体', () => { const others = [ ['营养配送', '营养保送'], ['营养输送', '营养保送'], ['阿玉吠陀', '阿育吠陀'], ['阿育费陀', '阿育吠陀'], ]; for (const [input, expected] of others) { it(`"${input}" → "${expected}"`, () => { const result = correctAsrText(input); assert.ok(result.includes(expected), `"${input}" should correct to "${expected}", got "${result}"`); }); } }); }); // ================================================================ // 3. PRODUCT_ALIAS_MAP —— 产品别名扩展 // ================================================================ describe('PRODUCT_ALIAS_MAP —— 产品别名扩展', () => { const aliasCases = [ ['小红怎么吃', 'Activize'], ['小红功效', 'Activize'], ['Activize是什么', 'Activize Oxyplus'], ['大白怎么吃', 'Basics'], ['大白功效', 'Basics'], ['Basics成分', 'Basics'], ['小白怎么吃', 'Restorate'], ['Restorate功效', 'Restorate'], ['FitLine是什么', 'PM-FitLine'], ['PM FitLine的功效', 'PM-FitLine'], ['PM细胞营养', 'PM细胞营养素'], ['PM营养素功效', 'PM细胞营养素'], ['德国PM营养素', 'PM细胞营养素'], ]; for (const [input, expectContain] of aliasCases) { it(`"${input}" → 扩展含"${expectContain}"`, () => { const result = correctAsrText(input); assert.ok(result.includes(expectContain), `"${input}" should expand to contain "${expectContain}", got "${result}"`); }); } it('非追问位置不应触发产品扩展(如"小红帽")', () => { const result = correctAsrText('小红帽故事'); // 小红帽 doesn't match the expansion pattern because 帽 is not in the suffix list assert.ok(!result.includes('Activize'), `"小红帽故事" should NOT expand 小红, got "${result}"`); }); }); // ================================================================ // 4. 激进正则 —— X+成/城/程...+系统 统一纠正 // ================================================================ describe('激进正则 —— 未在字典中的"X成系统"变体', () => { const aggressiveCases = [ '翼成系统', '奕成系统', '弈成系统', '颐成系统', '译成系统', '蚁成系统', '壹成系统', '一盛系统', '一胜系统', '一生系统', '一称系统', '一撑系统', '双成系统', '半成系统', ]; for (const variant of aggressiveCases) { it(`"${variant}" → "一成系统"(激进正则兜底)`, () => { const result = correctAsrText(variant); assert.ok(result.includes('一成系统'), `"${variant}" should correct to 一成系统 via aggressive regex, got "${result}"`); }); } }); // ================================================================ // 5. normalizeKnowledgeAlias —— 路由层额外归一化 // ================================================================ describe('normalizeKnowledgeAlias —— 路由层归一化', () => { it('一成,系统(带标点间隔)→ 一成系统', () => { const result = normalizeKnowledgeAlias('一成,系统'); assert.ok(result.includes('一成系统'), `Got "${result}"`); }); it('一成 系统(带空格间隔)→ 一成系统', () => { const result = normalizeKnowledgeAlias('一成 系统'); assert.ok(result.includes('一成系统'), `Got "${result}"`); }); it('XX系统 → 一成系统', () => { const result = normalizeKnowledgeAlias('XX系统'); assert.ok(result.includes('一成系统'), `Got "${result}"`); }); it('大窝 → 大沃', () => { const result = normalizeKnowledgeAlias('大窝'); assert.ok(result.includes('大沃'), `Got "${result}"`); }); it('暖炉原理 → 火炉原理', () => { const result = normalizeKnowledgeAlias('暖炉原理'); assert.ok(result.includes('火炉原理'), `Got "${result}"`); }); it('AI众享(大写)→ Ai众享', () => { const result = normalizeKnowledgeAlias('AI众享怎么用'); assert.ok(result.includes('Ai众享'), `Got "${result}"`); }); it('圣咖学院 → 盛咖学愿', () => { const result = normalizeKnowledgeAlias('圣咖学院'); assert.ok(result.includes('盛咖学愿'), `Got "${result}"`); }); }); // ================================================================ // 6. 完整ASR流水线 —— correctAsrText + normalizeKnowledgeAlias 组合 // ================================================================ describe('完整ASR流水线 —— 纠错+归一化组合', () => { describe('一成系统变体经流水线后应路由到KB', () => { const pipelineCases = [ '一城系统是什么', '逸城系统怎么用', '易成系统介绍', '益生系统怎么样', '义诚系统核心优势', '壹成系统三大平台', '一声系统有什么用', '翼成系统赋能团队', ]; for (const raw of pipelineCases) { it(`"${raw}" → 应路由到KB`, () => { assertPipelineRouteKb(raw); }); } }); describe('产品名ASR错误经流水线后应路由到KB', () => { const productAsrCases = [ ['小洪产品功效', '小红ASR错误'], ['小宏怎么吃', '小红ASR错误'], ['大百功效是什么', '大白ASR错误'], ['大柏怎么吃', '大白ASR错误'], ['小百怎么服用', '小白ASR错误'], ['小柏功效', '小白ASR错误'], ]; for (const [raw, label] of productAsrCases) { it(`${label}: "${raw}" → 应路由到KB`, () => { assertPipelineRouteKb(raw); }); } }); describe('其他ASR错误经流水线后应路由到KB', () => { const otherAsrCases = [ ['暖炉原理是什么意思', '暖炉→火炉'], ['营养配送系统原理', '配送→保送'], ['整应反应是什么', '整应→好转'], ['排毒反应正常吗', '排毒反应→好转反应'], ['盛咖学院怎么用', '学院→学愿'], ['起步三观是什么', '三观→三关'], ['double pm介绍', 'double pm→德国PM'], ['阿玉吠陀是什么', '阿玉→阿育'], ]; for (const [raw, label] of otherAsrCases) { it(`${label}: "${raw}" → 应路由到KB`, () => { assertPipelineRouteKb(raw); }); } }); }); // ================================================================ // 7. ASR识别失败/乱码/噪声场景 // ================================================================ describe('ASR识别失败/异常场景', () => { it('空字符串 → 不崩溃,返回空', () => { assert.equal(correctAsrText(''), ''); assert.equal(correctAsrText(null), ''); assert.equal(correctAsrText(undefined), ''); }); it('纯噪声标点 → 不崩溃', () => { const result = correctAsrText(',,,。。。!!'); assert.equal(typeof result, 'string'); }); it('语气词噪声 → 不应路由到KB', () => { const noises = ['嗯嗯嗯', '啊啊啊', '哦哦哦', '额额额']; for (const noise of noises) { const processed = fullAsrPipeline(noise); const result = shouldForceKnowledgeRoute(processed); assert.equal(result, false, `Noise "${noise}" should NOT route to KB`); } }); it('极短识别(单字/双字)→ 不应路由到KB', () => { const shorts = ['嗯', '好', '啊', '是', '对', '哦']; for (const s of shorts) { const processed = fullAsrPipeline(s); const result = shouldForceKnowledgeRoute(processed); assert.equal(result, false, `Short "${s}" should NOT route to KB`); } }); it('混合中英文乱码 → 不崩溃,不误触发KB', () => { const garbled = ['abc123你好', 'test test', '!!!???', '😊😊😊']; for (const g of garbled) { const processed = fullAsrPipeline(g); assert.equal(typeof processed, 'string', 'Should return string'); } }); it('超长ASR文本 → 不崩溃', () => { const longText = '我想问一下关于'.repeat(50) + '基础三合一怎么吃'; const result = correctAsrText(longText); assert.equal(typeof result, 'string'); assert.ok(result.length > 0); }); }); // ================================================================ // 8. ASR部分识别 —— 语音被截断的情况 // ================================================================ describe('ASR部分识别 —— 语音截断/不完整', () => { it('"一成系" → 不完整但不崩溃', () => { const result = correctAsrText('一成系'); assert.equal(typeof result, 'string'); }); it('"基础三合" → 不完整,不应匹配PHRASE_MAP', () => { const result = correctAsrText('基础三合'); assert.ok(!result.includes('基础套装'), `Incomplete "基础三合" should not trigger full phrase mapping, got "${result}"`); }); it('"小红怎" → 不完整但产品名应被扩展', () => { const result = correctAsrText('小红怎'); // 小红 后面是 怎,在suffix list里有 怎么 但没有单独的 怎 assert.equal(typeof result, 'string'); }); it('"德国P" → 不完整,不应触发', () => { const result = correctAsrText('德国P'); assert.ok(!result.includes('德国PM公司'), `Incomplete should not over-expand, got "${result}"`); }); }); // ================================================================ // 9. 复合ASR错误 —— 一句话里有多个ASR错误 // ================================================================ describe('复合ASR错误 —— 一句话中包含多个识别错误', () => { it('"一城系统的小洪产品功效" → 一成系统 + 小红', () => { const result = correctAsrText('一城系统的小洪产品功效'); assert.ok(result.includes('一成系统'), `Should correct 一城→一成, got "${result}"`); assert.ok(result.includes('小红'), `Should correct 小洪→小红, got "${result}"`); }); it('"大百和小柏的区别" → 大白 + 小白', () => { const result = correctAsrText('大百和小柏的区别'); assert.ok(result.includes('大白'), `Should correct 大百→大白, got "${result}"`); assert.ok(result.includes('小白'), `Should correct 小柏→小白, got "${result}"`); }); it('"爱众享和盛咖学院" → Ai众享 + 盛咖学愿', () => { const result = correctAsrText('爱众享和盛咖学院'); assert.ok(result.includes('Ai众享'), `Should correct 爱众享→Ai众享, got "${result}"`); assert.ok(result.includes('盛咖学愿'), `Should correct 盛咖学院→盛咖学愿, got "${result}"`); }); it('"大窝的暖炉原理" → 大沃 + 火炉原理', () => { const result = correctAsrText('大窝的暖炉原理'); assert.ok(result.includes('大沃'), `Should correct 大窝→大沃, got "${result}"`); assert.ok(result.includes('火炉原理'), `Should correct 暖炉→火炉, got "${result}"`); }); it('复合错误经完整流水线后应路由到KB', () => { assertPipelineRouteKb('一城系统的小洪怎么吃'); assertPipelineRouteKb('大百和小柏的区别是什么'); assertPipelineRouteKb('爱众享和盛咖学院介绍'); }); }); // ================================================================ // 10. 真实语音场景模拟 —— 模拟用户真实说话方式的ASR输出 // ================================================================ describe('真实语音场景 —— 模拟实际用户说话的ASR识别结果', () => { it('"那个一城系统是干嘛的呀" → 应路由到KB', () => { assertPipelineRouteKb('那个一城系统是干嘛的呀'); }); it('"小洪和大百一起吃吗" → 应路由到KB', () => { assertPipelineRouteKb('小洪和大百一起吃吗'); }); it('"我想问一下那个暖炉原理是什么意思" → 应路由到KB', () => { assertPipelineRouteKb('我想问一下那个暖炉原理是什么意思'); }); it('"你们那个double pm是什么公司" → 应路由到KB', () => { assertPipelineRouteKb('你们那个double pm是什么公司'); }); it('"吃了以后有整应反应怎么办" → 应路由到KB', () => { assertPipelineRouteKb('吃了以后有整应反应怎么办'); }); it('"盛咖学院里面的课程怎么看" → 应路由到KB', () => { assertPipelineRouteKb('盛咖学院里面的课程怎么看'); }); it('"那个营养配送系统是怎么回事" → 应路由到KB', () => { assertPipelineRouteKb('那个营养配送系统是怎么回事'); }); it('"新人起步三观是什么" → 应路由到KB', () => { assertPipelineRouteKb('新人起步三观是什么'); }); it('"维适多怎么服用" → 应路由到KB', () => { assertPipelineRouteKb('维适多怎么服用'); }); it('"大窝能帮我介绍一下大百吗" → 应路由到KB', () => { assertPipelineRouteKb('大窝能帮我介绍一下大百吗'); }); it('"五加一活动是什么" → 应路由到KB', () => { assertPipelineRouteKb('五加一活动是什么'); }); it('"阿育费陀跟PM产品有什么关系" → 应路由到KB', () => { assertPipelineRouteKb('阿育费陀跟PM产品有什么关系'); }); }); // ================================================================ // 11. 负面用例 —— 正常文本不应被ASR纠错误改 // ================================================================ describe('负面用例 —— 正常文本不应被误纠', () => { it('"一成不变" → 不应被改为"一成系统不变"', () => { const result = correctAsrText('一成不变'); assert.ok(!result.includes('一成系统'), `"一成不变" should NOT be corrected, got "${result}"`); }); it('"今天天气好" → 保持不变', () => { const result = correctAsrText('今天天气好'); assert.equal(result, '今天天气好'); }); it('"你好" → 保持不变', () => { const result = correctAsrText('你好'); assert.equal(result, '你好'); }); it('"谢谢你帮忙" → 保持不变', () => { const result = correctAsrText('谢谢你帮忙'); assert.equal(result, '谢谢你帮忙'); }); it('"大白天出去" → 不应被纠正为PM产品', () => { // "大白" 后面跟 "天" 不在suffix list,不应触发产品扩展 const result = correctAsrText('大白天出去'); assert.ok(!result.includes('Basics'), `"大白天出去" should NOT trigger product alias, got "${result}"`); }); it('"小白兔" → 不应被纠正为PM产品', () => { const result = correctAsrText('小白兔'); assert.ok(!result.includes('Restorate'), `"小白兔" should NOT trigger product alias, got "${result}"`); }); }); console.log('\n=== ASR覆盖测试加载完成 ===\n');