332 lines
14 KiB
JavaScript
332 lines
14 KiB
JavaScript
|
|
/**
|
|||
|
|
* raw 模式集成测试
|
|||
|
|
* 覆盖:resolveReply raw/answer 模式切换、ragItems 格式、字幕过滤、缓存双写
|
|||
|
|
*
|
|||
|
|
* 运行方式: node --test tests/test_raw_mode_integration.js
|
|||
|
|
*/
|
|||
|
|
const { describe, it, beforeEach, afterEach } = require('node:test');
|
|||
|
|
const assert = require('node:assert/strict');
|
|||
|
|
const path = require('path');
|
|||
|
|
const fs = require('fs');
|
|||
|
|
|
|||
|
|
// 加载 .env
|
|||
|
|
const envPath = path.join(__dirname, '../.env');
|
|||
|
|
if (fs.existsSync(envPath)) {
|
|||
|
|
fs.readFileSync(envPath, 'utf8').split('\n').forEach(line => {
|
|||
|
|
const trimmed = line.trim();
|
|||
|
|
if (!trimmed || trimmed.startsWith('#')) return;
|
|||
|
|
const idx = trimmed.indexOf('=');
|
|||
|
|
if (idx > 0) {
|
|||
|
|
const key = trimmed.slice(0, idx).trim();
|
|||
|
|
let val = trimmed.slice(idx + 1).trim();
|
|||
|
|
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
|
|||
|
|
val = val.slice(1, -1);
|
|||
|
|
}
|
|||
|
|
if (!process.env[key]) process.env[key] = val;
|
|||
|
|
}
|
|||
|
|
});
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
const { normalizeTextForSpeech } = require('../services/realtimeDialogRouting');
|
|||
|
|
|
|||
|
|
// ================================================================
|
|||
|
|
// 1. ragItems 格式验证 — raw 模式 vs answer 模式
|
|||
|
|
// ================================================================
|
|||
|
|
describe('ragItems 格式 — raw 模式 vs answer 模式', () => {
|
|||
|
|
|
|||
|
|
it('raw 模式 toolResult 应包含 retrieval_mode 标记', () => {
|
|||
|
|
const rawToolResult = {
|
|||
|
|
query: '小红怎么吃',
|
|||
|
|
results: [
|
|||
|
|
{ title: '对话上下文', content: '用户: 你好\n助手: 你好!' },
|
|||
|
|
{ title: '产品手册', content: '小红Activize Oxyplus...' },
|
|||
|
|
{ title: 'FAQ', content: '小红每天一包...' },
|
|||
|
|
{ title: '知识库片段3', content: '用40度以下温水冲服...' },
|
|||
|
|
],
|
|||
|
|
total: 4,
|
|||
|
|
source: 'ark_knowledge',
|
|||
|
|
hit: true,
|
|||
|
|
reason: 'reranked_hit',
|
|||
|
|
retrieval_mode: 'raw',
|
|||
|
|
top_score: 0.85,
|
|||
|
|
chunks_count: 3,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
assert.equal(rawToolResult.retrieval_mode, 'raw');
|
|||
|
|
assert.equal(rawToolResult.results.length, 4, 'Should have context + 3 chunks');
|
|||
|
|
assert.equal(rawToolResult.results[0].title, '对话上下文', 'First item should be context');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('answer 模式 toolResult 不应包含 retrieval_mode 或为 undefined', () => {
|
|||
|
|
const answerToolResult = {
|
|||
|
|
query: '小红怎么吃',
|
|||
|
|
results: [{ title: '方舟知识库检索结果', content: '根据知识库信息,小红Activize每天一包...' }],
|
|||
|
|
total: 1,
|
|||
|
|
source: 'ark_knowledge',
|
|||
|
|
hit: true,
|
|||
|
|
reason: 'classified_hit',
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
assert.ok(!answerToolResult.retrieval_mode || answerToolResult.retrieval_mode !== 'raw');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('raw 模式下 ragItems 应透传多条而非合并为单条', () => {
|
|||
|
|
const rawResults = [
|
|||
|
|
{ title: '对话上下文', content: '用户: 大白怎么吃\n助手: ...' },
|
|||
|
|
{ title: '产品手册', content: '片段1内容' },
|
|||
|
|
{ title: 'FAQ', content: '片段2内容' },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
const isRawMode = true;
|
|||
|
|
let finalRagItems;
|
|||
|
|
if (isRawMode) {
|
|||
|
|
finalRagItems = rawResults;
|
|||
|
|
} else {
|
|||
|
|
const replyText = rawResults.map(r => r.content).join(' ');
|
|||
|
|
finalRagItems = [{ title: '知识库结果', content: replyText }];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
assert.equal(finalRagItems.length, 3, 'Raw mode should keep 3 items');
|
|||
|
|
assert.equal(finalRagItems[0].title, '对话上下文');
|
|||
|
|
assert.equal(finalRagItems[1].title, '产品手册');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('answer 模式下 ragItems 应合并为单条并清理前缀', () => {
|
|||
|
|
const replyText = '根据知识库信息,小红Activize每天一包冲服';
|
|||
|
|
const isRawMode = false;
|
|||
|
|
|
|||
|
|
let finalRagItems;
|
|||
|
|
if (!isRawMode) {
|
|||
|
|
const speechText = normalizeTextForSpeech(replyText);
|
|||
|
|
const cleanedText = speechText.replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, '');
|
|||
|
|
finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
assert.equal(finalRagItems.length, 1, 'Answer mode should merge into 1 item');
|
|||
|
|
assert.ok(!finalRagItems[0].content.startsWith('根据知识库信息'), 'Should strip prefix');
|
|||
|
|
assert.ok(finalRagItems[0].content.includes('小红Activize'), 'Should preserve core content');
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// ================================================================
|
|||
|
|
// 2. 字幕过滤 — raw 模式排除"对话上下文"
|
|||
|
|
// ================================================================
|
|||
|
|
describe('字幕过滤 — raw 模式排除对话上下文', () => {
|
|||
|
|
|
|||
|
|
it('raw 模式的 ragContent 过滤后字幕不应包含上下文', () => {
|
|||
|
|
const ragContent = [
|
|||
|
|
{ title: '对话上下文', content: '用户: 你好\n助手: 你好' },
|
|||
|
|
{ title: '产品手册', content: '小红每天一包' },
|
|||
|
|
{ title: 'FAQ', content: '40度以下温水' },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文');
|
|||
|
|
const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent)
|
|||
|
|
.map((item) => item.content).join(' ');
|
|||
|
|
|
|||
|
|
assert.ok(!ragSubtitleText.includes('用户: 你好'), 'Subtitle should not include context');
|
|||
|
|
assert.ok(ragSubtitleText.includes('小红每天一包'), 'Subtitle should include KB content');
|
|||
|
|
assert.ok(ragSubtitleText.includes('40度以下温水'), 'Subtitle should include all KB chunks');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('answer 模式无"对话上下文"条目,字幕应正常', () => {
|
|||
|
|
const ragContent = [
|
|||
|
|
{ title: '知识库结果', content: '小红Activize每天一包冲服' },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文');
|
|||
|
|
const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent)
|
|||
|
|
.map((item) => item.content).join(' ');
|
|||
|
|
|
|||
|
|
assert.equal(ragSubtitleText, '小红Activize每天一包冲服');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('全部都是上下文条目时应降级显示全部(兜底)', () => {
|
|||
|
|
const ragContent = [
|
|||
|
|
{ title: '对话上下文', content: '用户: 测试' },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文');
|
|||
|
|
const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent)
|
|||
|
|
.map((item) => item.content).join(' ');
|
|||
|
|
|
|||
|
|
assert.equal(ragSubtitleText, '用户: 测试', 'Should fallback to showing context');
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// ================================================================
|
|||
|
|
// 3. 环境变量模式切换
|
|||
|
|
// ================================================================
|
|||
|
|
describe('环境变量模式切换', () => {
|
|||
|
|
const ENV_ORIG = {};
|
|||
|
|
|
|||
|
|
beforeEach(() => {
|
|||
|
|
ENV_ORIG.mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE;
|
|||
|
|
ENV_ORIG.reranker = process.env.ENABLE_RERANKER;
|
|||
|
|
ENV_ORIG.redis = process.env.ENABLE_REDIS_CONTEXT;
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
afterEach(() => {
|
|||
|
|
if (ENV_ORIG.mode !== undefined) process.env.VOLC_ARK_KB_RETRIEVAL_MODE = ENV_ORIG.mode;
|
|||
|
|
else delete process.env.VOLC_ARK_KB_RETRIEVAL_MODE;
|
|||
|
|
if (ENV_ORIG.reranker !== undefined) process.env.ENABLE_RERANKER = ENV_ORIG.reranker;
|
|||
|
|
else delete process.env.ENABLE_RERANKER;
|
|||
|
|
if (ENV_ORIG.redis !== undefined) process.env.ENABLE_REDIS_CONTEXT = ENV_ORIG.redis;
|
|||
|
|
else delete process.env.ENABLE_REDIS_CONTEXT;
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('VOLC_ARK_KB_RETRIEVAL_MODE=raw 应使用新链路', () => {
|
|||
|
|
process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'raw';
|
|||
|
|
const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer';
|
|||
|
|
assert.equal(mode, 'raw');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('VOLC_ARK_KB_RETRIEVAL_MODE=answer 应使用旧链路', () => {
|
|||
|
|
process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'answer';
|
|||
|
|
const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer';
|
|||
|
|
assert.equal(mode, 'answer');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('VOLC_ARK_KB_RETRIEVAL_MODE 未设置时默认 answer', () => {
|
|||
|
|
delete process.env.VOLC_ARK_KB_RETRIEVAL_MODE;
|
|||
|
|
const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer';
|
|||
|
|
assert.equal(mode, 'answer');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('ENABLE_RERANKER=false 应关闭重排', () => {
|
|||
|
|
process.env.ENABLE_RERANKER = 'false';
|
|||
|
|
assert.equal(process.env.ENABLE_RERANKER !== 'false', false);
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('ENABLE_REDIS_CONTEXT=false 应关闭 Redis 上下文', () => {
|
|||
|
|
process.env.ENABLE_REDIS_CONTEXT = 'false';
|
|||
|
|
assert.equal(process.env.ENABLE_REDIS_CONTEXT !== 'false', false);
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('所有降级开关独立,互不影响', () => {
|
|||
|
|
process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'raw';
|
|||
|
|
process.env.ENABLE_RERANKER = 'false';
|
|||
|
|
process.env.ENABLE_REDIS_CONTEXT = 'true';
|
|||
|
|
|
|||
|
|
assert.equal(process.env.VOLC_ARK_KB_RETRIEVAL_MODE, 'raw', 'mode should be raw');
|
|||
|
|
assert.equal(process.env.ENABLE_RERANKER, 'false', 'reranker should be off');
|
|||
|
|
assert.equal(process.env.ENABLE_REDIS_CONTEXT, 'true', 'redis context should be on');
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// ================================================================
|
|||
|
|
// 4. normalizeTextForSpeech 对 raw 内容的处理
|
|||
|
|
// ================================================================
|
|||
|
|
describe('normalizeTextForSpeech — raw 片段处理', () => {
|
|||
|
|
|
|||
|
|
it('应去除 Markdown 标记', () => {
|
|||
|
|
const input = '## 产品介绍\n**小红Activize** 是一款_细胞营养素_';
|
|||
|
|
const result = normalizeTextForSpeech(input);
|
|||
|
|
assert.ok(!result.includes('##'), 'Should remove heading marks');
|
|||
|
|
assert.ok(!result.includes('**'), 'Should remove bold marks');
|
|||
|
|
assert.ok(!result.includes('_细胞'), 'Should remove italic marks');
|
|||
|
|
assert.ok(result.includes('小红Activize'), 'Should preserve content');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('纯文本内容不应被破坏', () => {
|
|||
|
|
const input = '小红Activize Oxyplus每天一包,用40度以下温水冲服,搅拌均匀后饮用。';
|
|||
|
|
const result = normalizeTextForSpeech(input);
|
|||
|
|
assert.equal(result, input, 'Plain text should not be modified');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('空字符串应返回空', () => {
|
|||
|
|
assert.equal(normalizeTextForSpeech(''), '');
|
|||
|
|
assert.equal(normalizeTextForSpeech(null), '');
|
|||
|
|
assert.equal(normalizeTextForSpeech(undefined), '');
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
// ================================================================
|
|||
|
|
// 5. 端到端数据流验证
|
|||
|
|
// ================================================================
|
|||
|
|
describe('端到端数据流 — raw 模式完整 ragPayload → S2S', () => {
|
|||
|
|
|
|||
|
|
it('模拟完整 raw 模式数据流:query → chunks → rerank → payload → ragItems', () => {
|
|||
|
|
// Step 1: 模拟检索结果
|
|||
|
|
const retrievedChunks = [
|
|||
|
|
{ id: 'c1', content: '小红Activize Oxyplus是德国PM的核心产品', score: 0.9, doc_name: '产品手册' },
|
|||
|
|
{ id: 'c2', content: '每天一包,40度以下温水冲服', score: 0.85, doc_name: '使用说明' },
|
|||
|
|
{ id: 'c3', content: '含有瓜拉纳提取物,提供天然能量', score: 0.75, doc_name: '成分表' },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
// Step 2: 模拟重排(已排序,直接取 top3)
|
|||
|
|
const rerankedChunks = retrievedChunks.slice(0, 3);
|
|||
|
|
|
|||
|
|
// Step 3: 模拟历史
|
|||
|
|
const history = [
|
|||
|
|
{ role: 'user', content: '你们有什么产品' },
|
|||
|
|
{ role: 'assistant', content: '我们有基础三合一,包括大白小红小白...' },
|
|||
|
|
];
|
|||
|
|
|
|||
|
|
// Step 4: 构建 payload
|
|||
|
|
const kbRetriever = require('../services/kbRetriever');
|
|||
|
|
const ragPayload = kbRetriever.buildRagPayload(rerankedChunks, history);
|
|||
|
|
|
|||
|
|
// 验证 payload 结构
|
|||
|
|
assert.equal(ragPayload.length, 4, 'Should be context(1) + chunks(3)');
|
|||
|
|
assert.equal(ragPayload[0].title, '对话上下文');
|
|||
|
|
assert.ok(ragPayload[0].content.includes('你们有什么产品'));
|
|||
|
|
assert.equal(ragPayload[1].title, '产品手册');
|
|||
|
|
assert.equal(ragPayload[2].title, '使用说明');
|
|||
|
|
assert.equal(ragPayload[3].title, '成分表');
|
|||
|
|
|
|||
|
|
// Step 5: 模拟 toolResult
|
|||
|
|
const toolResult = {
|
|||
|
|
results: ragPayload.map(item => ({ title: item.title, content: item.content })),
|
|||
|
|
hit: true,
|
|||
|
|
retrieval_mode: 'raw',
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// Step 6: 模拟 resolveReply 中的 ragItems 构建
|
|||
|
|
const ragItems = toolResult.results.filter(i => i && i.content).map(i => ({
|
|||
|
|
title: i.title || '知识库结果',
|
|||
|
|
content: i.content,
|
|||
|
|
}));
|
|||
|
|
|
|||
|
|
const isRawMode = toolResult.retrieval_mode === 'raw';
|
|||
|
|
const finalRagItems = isRawMode ? ragItems : [{ title: '知识库结果', content: ragItems.map(i => i.content).join(' ') }];
|
|||
|
|
|
|||
|
|
assert.equal(finalRagItems.length, 4, 'Raw mode: 4 items pass-through');
|
|||
|
|
|
|||
|
|
// Step 7: 模拟字幕过滤
|
|||
|
|
const subtitleItems = finalRagItems.filter(item => item.title !== '对话上下文');
|
|||
|
|
const subtitle = subtitleItems.map(item => item.content).join(' ');
|
|||
|
|
assert.ok(!subtitle.includes('你们有什么产品'), 'Subtitle should exclude context');
|
|||
|
|
assert.ok(subtitle.includes('小红Activize'), 'Subtitle should include chunk content');
|
|||
|
|
|
|||
|
|
// Step 8: 模拟 sendExternalRag 的 JSON 序列化
|
|||
|
|
const jsonStr = JSON.stringify(finalRagItems);
|
|||
|
|
const parsed = JSON.parse(jsonStr);
|
|||
|
|
assert.equal(parsed.length, 4, 'JSON roundtrip should preserve all items');
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
it('模拟完整 answer 模式数据流(对比验证)', () => {
|
|||
|
|
const toolResult = {
|
|||
|
|
results: [{ title: '方舟知识库检索结果', content: '根据知识库信息,小红Activize每天一包冲服' }],
|
|||
|
|
hit: true,
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
const ragItems = toolResult.results.filter(i => i && i.content).map(i => ({
|
|||
|
|
title: i.title || '知识库结果',
|
|||
|
|
content: i.content,
|
|||
|
|
}));
|
|||
|
|
|
|||
|
|
const isRawMode = !!(toolResult.retrieval_mode === 'raw');
|
|||
|
|
assert.equal(isRawMode, false, 'Should be answer mode');
|
|||
|
|
|
|||
|
|
const speechText = normalizeTextForSpeech(ragItems[0].content);
|
|||
|
|
const cleanedText = speechText.replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, '');
|
|||
|
|
const finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }];
|
|||
|
|
|
|||
|
|
assert.equal(finalRagItems.length, 1, 'Answer mode: single merged item');
|
|||
|
|
assert.ok(!finalRagItems[0].content.startsWith('根据知识库'), 'Prefix should be stripped');
|
|||
|
|
});
|
|||
|
|
});
|
|||
|
|
|
|||
|
|
console.log('\n=== raw模式集成测试加载完成 ===\n');
|