Files
bigwo/test2/server/tests/test_asr_coverage.js
User 9567eb7358 feat(server): KB prompt优化、字幕修复、S2S重连、助手配置API
- assistantProfileConfig: KB answer prompt改为分层策略(严格产品信息+灵活常识补充)
- nativeVoiceGateway: S2S upstream自动重连(最多50次)、event 351字幕debounce(800ms取最长文本)
- toolExecutor: 确定性query改写增强、KB查询传递session上下文
- contextKeywordTracker: 支持KB话题记忆优先enrichment
- contentSafeGuard: 新增品牌安全内容过滤服务
- assistantProfileService: 新增助手配置CRUD服务
- routes/assistantProfile: 新增助手配置API路由
- knowledgeKeywords: 扩展KB关键词词典
- fastAsrCorrector: ASR纠错规则更新
- tests/: KB prompt测试、保护窗口测试、Viking性能测试
- docs/: 助手配置API文档、系统提示词目录
2026-03-24 17:19:36 +08:00

521 lines
21 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 语音识别(ASR)纠错全覆盖测试
* 覆盖PHRASE_MAP、WORD_MAP、PRODUCT_ALIAS_MAP、激进正则、normalizeKnowledgeAlias、组合流水线
*
* 运行方式: node --test tests/test_asr_coverage.js
*/
const { describe, it } = require('node:test');
const assert = require('node:assert/strict');
const { correctAsrText, PHRASE_MAP, WORD_MAP, PRODUCT_ALIAS_MAP } = require('../services/fastAsrCorrector');
const { normalizeKnowledgeAlias, shouldForceKnowledgeRoute } = require('../services/realtimeDialogRouting');
const { hasKnowledgeRouteKeyword } = require('../services/knowledgeKeywords');
// 辅助完整ASR流水线模拟nativeVoiceGateway.extractUserText + routing
function fullAsrPipeline(rawText) {
const corrected = correctAsrText(rawText);
const normalized = normalizeKnowledgeAlias(corrected);
return normalized;
}
function assertPipelineRouteKb(rawText, msg) {
const processed = fullAsrPipeline(rawText);
const result = shouldForceKnowledgeRoute(processed);
assert.equal(result, true, msg || `ASR "${rawText}" → processed "${processed}" should route to KB`);
}
// ================================================================
// 1. PHRASE_MAP 全覆盖 —— 每条短语映射逐一验证
// ================================================================
describe('PHRASE_MAP —— 短语级ASR纠错全覆盖', () => {
describe('一成系统变体36条', () => {
const yichengVariants = [
'一城系统', '逸城系统', '一程系统', '易成系统', '一诚系统',
'亦成系统', '艺成系统', '溢成系统', '义成系统', '毅成系统',
'怡成系统', '以成系统', '已成系统', '亿成系统', '忆成系统',
'益成系统', '一乘系统', '一承系统', '一丞系统', '一呈系统',
'一澄系统', '一橙系统', '一层系统', '一趁系统', '一陈系统',
'依成系统', '伊成系统', '益生系统', '易诚系统', '易乘系统',
'一声系统', '亿生系统', '义诚系统', '忆诚系统', '以诚系统',
];
for (const variant of yichengVariants) {
it(`"${variant}" → "一成系统"`, () => {
const result = correctAsrText(variant);
assert.ok(result.includes('一成系统'), `"${variant}" should correct to 一成系统, got "${result}"`);
});
}
});
describe('其他短语映射', () => {
const otherPhrases = [
['盛咖学院', '盛咖学愿'],
['圣咖学愿', '盛咖学愿'],
['盛卡学愿', '盛咖学愿'],
['营养配送系统', 'NTC营养保送系统'],
['营养输送系统', 'NTC营养保送系统'],
['营养传送系统', 'NTC营养保送系统'],
['营养传输系统', 'NTC营养保送系统'],
['暖炉原理', '火炉原理'],
['整应反应', '好转反应'],
['整健反应', '好转反应'],
['排毒反应', '好转反应'],
['5加1', '5+1'],
['五加一', '5+1'],
['起步三观', '起步三关'],
['起步三官', '起步三关'],
['doublepm', '德国PM'],
['double pm', '德国PM'],
['DoublePM', '德国PM'],
['Double PM', '德国PM'],
['DOUBLEPM', '德国PM'],
['DOUBLE PM', '德国PM'],
['基础三合一', 'PM细胞营养素 基础套装'],
['三合一基础套', 'PM细胞营养素 基础套装'],
['大白小红小白', 'PM细胞营养素 基础套装'],
];
for (const [input, expected] of otherPhrases) {
it(`"${input}" → 含"${expected}"`, () => {
const result = correctAsrText(input);
assert.ok(result.includes(expected), `"${input}" should contain "${expected}", got "${result}"`);
});
}
});
});
// ================================================================
// 2. WORD_MAP 全覆盖 —— 单词级ASR纠错
// ================================================================
describe('WORD_MAP —— 单词级ASR纠错全覆盖', () => {
describe('一成/一城等同音变体', () => {
const wordVariants = [
['一城', '一成'], ['逸城', '一成'], ['一程', '一成'], ['易成', '一成'],
['一诚', '一成'], ['亦成', '一成'], ['艺成', '一成'], ['溢成', '一成'],
['义成', '一成'], ['毅成', '一成'], ['怡成', '一成'], ['以成', '一成'],
['已成', '一成'], ['亿成', '一成'], ['忆成', '一成'], ['益成', '一成'],
['一乘', '一成'], ['一承', '一成'], ['一丞', '一成'], ['一呈', '一成'],
['一澄', '一成'], ['一橙', '一成'], ['一层', '一成'], ['一陈', '一成'],
['依成', '一成'], ['伊成', '一成'],
['益生', '一成'], ['易诚', '一成'], ['义诚', '一成'], ['忆诚', '一成'], ['以诚', '一成'],
['一声', '一成'], ['亿生', '一成'], ['易乘', '一成'],
];
for (const [input, expected] of wordVariants) {
it(`"${input}" → "${expected}"`, () => {
const result = correctAsrText(`${input}的介绍`);
assert.ok(result.includes(expected), `"${input}" should correct to "${expected}", got "${result}"`);
});
}
});
describe('大沃同音变体', () => {
const dawoVariants = ['大窝', '大握', '大我', '大卧'];
for (const v of dawoVariants) {
it(`"${v}" → "大沃"`, () => {
const result = correctAsrText(v);
assert.ok(result.includes('大沃'), `"${v}" should correct to 大沃, got "${result}"`);
});
}
});
describe('Ai众享同音变体', () => {
const aiVariants = ['爱众享', '艾众享', '哎众享'];
for (const v of aiVariants) {
it(`"${v}" → "Ai众享"`, () => {
const result = correctAsrText(v);
assert.ok(result.includes('Ai众享'), `"${v}" should correct to Ai众享, got "${result}"`);
});
}
});
describe('产品名同音变体', () => {
const productVariants = [
['小洪', '小红'], ['小宏', '小红'], ['小鸿', '小红'],
['大百', '大白'], ['大柏', '大白'],
['小百', '小白'], ['小柏', '小白'], ['维适多', '小白'],
];
for (const [input, expected] of productVariants) {
it(`"${input}" → "${expected}"`, () => {
const result = correctAsrText(`${input}产品功效`);
assert.ok(result.includes(expected), `"${input}" should correct to "${expected}", got "${result}"`);
});
}
});
describe('其他同音变体', () => {
const others = [
['营养配送', '营养保送'],
['营养输送', '营养保送'],
['阿玉吠陀', '阿育吠陀'],
['阿育费陀', '阿育吠陀'],
];
for (const [input, expected] of others) {
it(`"${input}" → "${expected}"`, () => {
const result = correctAsrText(input);
assert.ok(result.includes(expected), `"${input}" should correct to "${expected}", got "${result}"`);
});
}
});
});
// ================================================================
// 3. PRODUCT_ALIAS_MAP —— 产品别名扩展
// ================================================================
describe('PRODUCT_ALIAS_MAP —— 产品别名扩展', () => {
const aliasCases = [
['小红怎么吃', 'Activize'],
['小红功效', 'Activize'],
['Activize是什么', 'Activize Oxyplus'],
['大白怎么吃', 'Basics'],
['大白功效', 'Basics'],
['Basics成分', 'Basics'],
['小白怎么吃', 'Restorate'],
['Restorate功效', 'Restorate'],
['FitLine是什么', 'PM-FitLine'],
['PM FitLine的功效', 'PM-FitLine'],
['PM细胞营养', 'PM细胞营养素'],
['PM营养素功效', 'PM细胞营养素'],
['德国PM营养素', 'PM细胞营养素'],
];
for (const [input, expectContain] of aliasCases) {
it(`"${input}" → 扩展含"${expectContain}"`, () => {
const result = correctAsrText(input);
assert.ok(result.includes(expectContain), `"${input}" should expand to contain "${expectContain}", got "${result}"`);
});
}
it('非追问位置不应触发产品扩展(如"小红帽"', () => {
const result = correctAsrText('小红帽故事');
// 小红帽 doesn't match the expansion pattern because 帽 is not in the suffix list
assert.ok(!result.includes('Activize'), `"小红帽故事" should NOT expand 小红, got "${result}"`);
});
});
// ================================================================
// 4. 激进正则 —— X+成/城/程...+系统 统一纠正
// ================================================================
describe('激进正则 —— 未在字典中的"X成系统"变体', () => {
const aggressiveCases = [
'翼成系统', '奕成系统', '弈成系统', '颐成系统',
'译成系统', '蚁成系统', '壹成系统',
'一盛系统', '一胜系统', '一生系统',
'一称系统', '一撑系统',
'双成系统', '半成系统',
];
for (const variant of aggressiveCases) {
it(`"${variant}" → "一成系统"(激进正则兜底)`, () => {
const result = correctAsrText(variant);
assert.ok(result.includes('一成系统'), `"${variant}" should correct to 一成系统 via aggressive regex, got "${result}"`);
});
}
});
// ================================================================
// 5. normalizeKnowledgeAlias —— 路由层额外归一化
// ================================================================
describe('normalizeKnowledgeAlias —— 路由层归一化', () => {
it('一成,系统(带标点间隔)→ 一成系统', () => {
const result = normalizeKnowledgeAlias('一成,系统');
assert.ok(result.includes('一成系统'), `Got "${result}"`);
});
it('一成 系统(带空格间隔)→ 一成系统', () => {
const result = normalizeKnowledgeAlias('一成 系统');
assert.ok(result.includes('一成系统'), `Got "${result}"`);
});
it('XX系统 → 一成系统', () => {
const result = normalizeKnowledgeAlias('XX系统');
assert.ok(result.includes('一成系统'), `Got "${result}"`);
});
it('大窝 → 大沃', () => {
const result = normalizeKnowledgeAlias('大窝');
assert.ok(result.includes('大沃'), `Got "${result}"`);
});
it('暖炉原理 → 火炉原理', () => {
const result = normalizeKnowledgeAlias('暖炉原理');
assert.ok(result.includes('火炉原理'), `Got "${result}"`);
});
it('AI众享大写→ Ai众享', () => {
const result = normalizeKnowledgeAlias('AI众享怎么用');
assert.ok(result.includes('Ai众享'), `Got "${result}"`);
});
it('圣咖学院 → 盛咖学愿', () => {
const result = normalizeKnowledgeAlias('圣咖学院');
assert.ok(result.includes('盛咖学愿'), `Got "${result}"`);
});
});
// ================================================================
// 6. 完整ASR流水线 —— correctAsrText + normalizeKnowledgeAlias 组合
// ================================================================
describe('完整ASR流水线 —— 纠错+归一化组合', () => {
describe('一成系统变体经流水线后应路由到KB', () => {
const pipelineCases = [
'一城系统是什么',
'逸城系统怎么用',
'易成系统介绍',
'益生系统怎么样',
'义诚系统核心优势',
'壹成系统三大平台',
'一声系统有什么用',
'翼成系统赋能团队',
];
for (const raw of pipelineCases) {
it(`"${raw}" → 应路由到KB`, () => {
assertPipelineRouteKb(raw);
});
}
});
describe('产品名ASR错误经流水线后应路由到KB', () => {
const productAsrCases = [
['小洪产品功效', '小红ASR错误'],
['小宏怎么吃', '小红ASR错误'],
['大百功效是什么', '大白ASR错误'],
['大柏怎么吃', '大白ASR错误'],
['小百怎么服用', '小白ASR错误'],
['小柏功效', '小白ASR错误'],
];
for (const [raw, label] of productAsrCases) {
it(`${label}: "${raw}" → 应路由到KB`, () => {
assertPipelineRouteKb(raw);
});
}
});
describe('其他ASR错误经流水线后应路由到KB', () => {
const otherAsrCases = [
['暖炉原理是什么意思', '暖炉→火炉'],
['营养配送系统原理', '配送→保送'],
['整应反应是什么', '整应→好转'],
['排毒反应正常吗', '排毒反应→好转反应'],
['盛咖学院怎么用', '学院→学愿'],
['起步三观是什么', '三观→三关'],
['double pm介绍', 'double pm→德国PM'],
['阿玉吠陀是什么', '阿玉→阿育'],
];
for (const [raw, label] of otherAsrCases) {
it(`${label}: "${raw}" → 应路由到KB`, () => {
assertPipelineRouteKb(raw);
});
}
});
});
// ================================================================
// 7. ASR识别失败/乱码/噪声场景
// ================================================================
describe('ASR识别失败/异常场景', () => {
it('空字符串 → 不崩溃,返回空', () => {
assert.equal(correctAsrText(''), '');
assert.equal(correctAsrText(null), '');
assert.equal(correctAsrText(undefined), '');
});
it('纯噪声标点 → 不崩溃', () => {
const result = correctAsrText(',,,。。。!!');
assert.equal(typeof result, 'string');
});
it('语气词噪声 → 不应路由到KB', () => {
const noises = ['嗯嗯嗯', '啊啊啊', '哦哦哦', '额额额'];
for (const noise of noises) {
const processed = fullAsrPipeline(noise);
const result = shouldForceKnowledgeRoute(processed);
assert.equal(result, false, `Noise "${noise}" should NOT route to KB`);
}
});
it('极短识别(单字/双字)→ 不应路由到KB', () => {
const shorts = ['嗯', '好', '啊', '是', '对', '哦'];
for (const s of shorts) {
const processed = fullAsrPipeline(s);
const result = shouldForceKnowledgeRoute(processed);
assert.equal(result, false, `Short "${s}" should NOT route to KB`);
}
});
it('混合中英文乱码 → 不崩溃不误触发KB', () => {
const garbled = ['abc123你好', 'test test', '!!!???', '😊😊😊'];
for (const g of garbled) {
const processed = fullAsrPipeline(g);
assert.equal(typeof processed, 'string', 'Should return string');
}
});
it('超长ASR文本 → 不崩溃', () => {
const longText = '我想问一下关于'.repeat(50) + '基础三合一怎么吃';
const result = correctAsrText(longText);
assert.equal(typeof result, 'string');
assert.ok(result.length > 0);
});
});
// ================================================================
// 8. ASR部分识别 —— 语音被截断的情况
// ================================================================
describe('ASR部分识别 —— 语音截断/不完整', () => {
it('"一成系" → 不完整但不崩溃', () => {
const result = correctAsrText('一成系');
assert.equal(typeof result, 'string');
});
it('"基础三合" → 不完整不应匹配PHRASE_MAP', () => {
const result = correctAsrText('基础三合');
assert.ok(!result.includes('基础套装'), `Incomplete "基础三合" should not trigger full phrase mapping, got "${result}"`);
});
it('"小红怎" → 不完整但产品名应被扩展', () => {
const result = correctAsrText('小红怎');
// 小红 后面是 怎在suffix list里有 怎么 但没有单独的 怎
assert.equal(typeof result, 'string');
});
it('"德国P" → 不完整,不应触发', () => {
const result = correctAsrText('德国P');
assert.ok(!result.includes('德国PM公司'), `Incomplete should not over-expand, got "${result}"`);
});
});
// ================================================================
// 9. 复合ASR错误 —— 一句话里有多个ASR错误
// ================================================================
describe('复合ASR错误 —— 一句话中包含多个识别错误', () => {
it('"一城系统的小洪产品功效" → 一成系统 + 小红', () => {
const result = correctAsrText('一城系统的小洪产品功效');
assert.ok(result.includes('一成系统'), `Should correct 一城→一成, got "${result}"`);
assert.ok(result.includes('小红'), `Should correct 小洪→小红, got "${result}"`);
});
it('"大百和小柏的区别" → 大白 + 小白', () => {
const result = correctAsrText('大百和小柏的区别');
assert.ok(result.includes('大白'), `Should correct 大百→大白, got "${result}"`);
assert.ok(result.includes('小白'), `Should correct 小柏→小白, got "${result}"`);
});
it('"爱众享和盛咖学院" → Ai众享 + 盛咖学愿', () => {
const result = correctAsrText('爱众享和盛咖学院');
assert.ok(result.includes('Ai众享'), `Should correct 爱众享→Ai众享, got "${result}"`);
assert.ok(result.includes('盛咖学愿'), `Should correct 盛咖学院→盛咖学愿, got "${result}"`);
});
it('"大窝的暖炉原理" → 大沃 + 火炉原理', () => {
const result = correctAsrText('大窝的暖炉原理');
assert.ok(result.includes('大沃'), `Should correct 大窝→大沃, got "${result}"`);
assert.ok(result.includes('火炉原理'), `Should correct 暖炉→火炉, got "${result}"`);
});
it('复合错误经完整流水线后应路由到KB', () => {
assertPipelineRouteKb('一城系统的小洪怎么吃');
assertPipelineRouteKb('大百和小柏的区别是什么');
assertPipelineRouteKb('爱众享和盛咖学院介绍');
});
});
// ================================================================
// 10. 真实语音场景模拟 —— 模拟用户真实说话方式的ASR输出
// ================================================================
describe('真实语音场景 —— 模拟实际用户说话的ASR识别结果', () => {
it('"那个一城系统是干嘛的呀" → 应路由到KB', () => {
assertPipelineRouteKb('那个一城系统是干嘛的呀');
});
it('"小洪和大百一起吃吗" → 应路由到KB', () => {
assertPipelineRouteKb('小洪和大百一起吃吗');
});
it('"我想问一下那个暖炉原理是什么意思" → 应路由到KB', () => {
assertPipelineRouteKb('我想问一下那个暖炉原理是什么意思');
});
it('"你们那个double pm是什么公司" → 应路由到KB', () => {
assertPipelineRouteKb('你们那个double pm是什么公司');
});
it('"吃了以后有整应反应怎么办" → 应路由到KB', () => {
assertPipelineRouteKb('吃了以后有整应反应怎么办');
});
it('"盛咖学院里面的课程怎么看" → 应路由到KB', () => {
assertPipelineRouteKb('盛咖学院里面的课程怎么看');
});
it('"那个营养配送系统是怎么回事" → 应路由到KB', () => {
assertPipelineRouteKb('那个营养配送系统是怎么回事');
});
it('"新人起步三观是什么" → 应路由到KB', () => {
assertPipelineRouteKb('新人起步三观是什么');
});
it('"维适多怎么服用" → 应路由到KB', () => {
assertPipelineRouteKb('维适多怎么服用');
});
it('"大窝能帮我介绍一下大百吗" → 应路由到KB', () => {
assertPipelineRouteKb('大窝能帮我介绍一下大百吗');
});
it('"五加一活动是什么" → 应路由到KB', () => {
assertPipelineRouteKb('五加一活动是什么');
});
it('"阿育费陀跟PM产品有什么关系" → 应路由到KB', () => {
assertPipelineRouteKb('阿育费陀跟PM产品有什么关系');
});
});
// ================================================================
// 11. 负面用例 —— 正常文本不应被ASR纠错误改
// ================================================================
describe('负面用例 —— 正常文本不应被误纠', () => {
it('"一成不变" → 不应被改为"一成系统不变"', () => {
const result = correctAsrText('一成不变');
assert.ok(!result.includes('一成系统'), `"一成不变" should NOT be corrected, got "${result}"`);
});
it('"今天天气好" → 保持不变', () => {
const result = correctAsrText('今天天气好');
assert.equal(result, '今天天气好');
});
it('"你好" → 保持不变', () => {
const result = correctAsrText('你好');
assert.equal(result, '你好');
});
it('"谢谢你帮忙" → 保持不变', () => {
const result = correctAsrText('谢谢你帮忙');
assert.equal(result, '谢谢你帮忙');
});
it('"大白天出去" → 不应被纠正为PM产品', () => {
// "大白" 后面跟 "天" 不在suffix list不应触发产品扩展
const result = correctAsrText('大白天出去');
assert.ok(!result.includes('Basics'), `"大白天出去" should NOT trigger product alias, got "${result}"`);
});
it('"小白兔" → 不应被纠正为PM产品', () => {
const result = correctAsrText('小白兔');
assert.ok(!result.includes('Restorate'), `"小白兔" should NOT trigger product alias, got "${result}"`);
});
});
console.log('\n=== ASR覆盖测试加载完成 ===\n');