feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导

- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异

- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写

- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery

- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优

- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式

- app.js: 健康检查新增 redis/reranker/kbRetrievalMode

- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
This commit is contained in:
User
2026-03-26 14:30:32 +08:00
parent 9567eb7358
commit 56940676f6
15 changed files with 2096 additions and 170 deletions

View File

@@ -0,0 +1,92 @@
const { Client } = require('ssh2');
const SSH_CONFIG = { host: '119.45.10.34', port: 22, username: 'root', password: '#xyzh%CS#2512@28' };
function sshExec(conn, cmd) {
return new Promise((resolve, reject) => {
conn.exec(cmd, (err, s) => {
if (err) return reject(err);
let o = '';
s.on('data', d => o += d);
s.stderr.on('data', d => o += d);
s.on('close', () => resolve(o));
});
});
}
const fs = require('fs');
const path = require('path');
function sftpUpload(conn, localPath, remotePath) {
return new Promise((resolve, reject) => {
conn.sftp((err, sftp) => {
if (err) return reject(err);
const rs = fs.createReadStream(localPath);
const ws = sftp.createWriteStream(remotePath);
ws.on('close', () => resolve());
ws.on('error', reject);
rs.pipe(ws);
});
});
}
const REMOTE_BASE = '/www/wwwroot/demo.tensorgrove.com.cn/server';
const LOCAL_BASE = path.join(__dirname, '..');
async function main() {
const conn = new Client();
await new Promise((resolve, reject) => {
conn.on('ready', resolve).on('error', reject).connect(SSH_CONFIG);
});
// 1. 上传文件
const files = [
{ name: 'toolExecutor.js', sub: 'services' },
{ name: 'kbRetriever.js', sub: 'services' },
];
console.log('=== 上传 ===');
for (const f of files) {
const localFile = path.join(LOCAL_BASE, f.sub, f.name);
const remoteFile = `${REMOTE_BASE}/${f.sub}/${f.name}`;
await sftpUpload(conn, localFile, remoteFile);
console.log(` \u2705 ${f.name}`);
const sc = await sshExec(conn, `node -c ${remoteFile} 2>&1`);
if (sc.includes('SyntaxError')) { console.log('SYNTAX ERROR!'); conn.end(); process.exit(1); }
}
// 2. 刷 Redis KB 缓存
console.log('\n=== 刷 Redis KB 缓存 ===');
console.log(await sshExec(conn, `cd ${REMOTE_BASE} && node -e "
require('dotenv').config();
const Redis = require('ioredis');
const r = new Redis(process.env.REDIS_URL || 'redis://127.0.0.1:6379', {
password: process.env.REDIS_PASSWORD || undefined,
db: parseInt(process.env.REDIS_DB) || 0,
keyPrefix: process.env.REDIS_KEY_PREFIX || 'bigwo:',
lazyConnect: true, maxRetriesPerRequest: 2, connectTimeout: 5000,
});
r.connect().then(async () => {
const keys = await r.keys('kb_cache:*');
if (keys.length > 0) { await r.del(...keys); console.log('Deleted ' + keys.length + ' keys'); }
else { console.log('No keys'); }
r.quit(); process.exit(0);
}).catch(e => { console.log('Redis: ' + e.message); process.exit(0); });
" 2>&1`));
// 3. 重启
await sshExec(conn, '> /var/log/bigwo/server-error.log && > /var/log/bigwo/server-out.log');
await sshExec(conn, 'pm2 stop bigwo-server');
await new Promise(r => setTimeout(r, 1000));
await sshExec(conn, `cd ${REMOTE_BASE} && pm2 start bigwo-server --update-env`);
console.log('\n=== PM2 重启等待5s ===');
await new Promise(r => setTimeout(r, 5000));
console.log(await sshExec(conn, 'pm2 status bigwo-server'));
const errLog = await sshExec(conn, 'cat /var/log/bigwo/server-error.log');
console.log('=== 错误 ===');
console.log(errLog || '(空 ✅)');
console.log('\n=== 健康 ===');
console.log(await sshExec(conn, 'curl -s --max-time 5 http://localhost:3012/api/health 2>&1'));
conn.end();
}
main().catch(e => { console.error('FAILED:', e.message); process.exit(1); });

View File

@@ -0,0 +1,43 @@
/**
* A/B 测试:原始查询 vs 别名扩展后查询的检索效果对比
* 直接调用 VikingDB + reranker比较 topScore 和 hit 状态
*/
require('dotenv').config({ path: require('path').join(__dirname, '..', '.env') });
const kbRetriever = require('../services/kbRetriever');
const TEST_QUERIES = [
// 中文俗名 → 中文全名(语义接近)
{ raw: '牙膏怎么用', alias: '草本护理牙膏 Med Dental+怎么用', label: '牙膏(俗名→全名)' },
{ raw: '喷雾功效', alias: 'IB5 口腔免疫喷雾功效', label: '喷雾(俗名→全名)' },
{ raw: '乳酪怎么喝', alias: '乳酪煲 乳酪饮品怎么喝', label: '乳酪(俗名→全名)' },
// 中文昵称 → 英文产品名(语义无关联)
{ raw: '小红怎么吃', alias: '小红产品 Activize Oxyplus怎么吃', label: '小红(昵称→英文名)' },
{ raw: '大白功效', alias: '大白产品 Basics功效', label: '大白(昵称→英文名)' },
{ raw: '小绿排毒', alias: 'D-Drink 小绿 排毒饮排毒', label: '小绿(昵称→英文名)' },
{ raw: '小黑适合谁', alias: 'MEN+ 倍力健 小黑适合谁', label: '小黑(昵称→英文名)' },
// 通用词 → 特定产品
{ raw: '氨基酸', alias: 'ProShape Amino 氨基酸', label: '氨基酸(通用→产品)' },
{ raw: '胶原蛋白', alias: '胶原蛋白肽', label: '胶原蛋白(通用→产品)' },
{ raw: '细胞抗氧素功效', alias: 'Zellschutz 细胞抗氧素功效', label: '细胞抗氧素(中→英)' },
];
async function runTest() {
console.log('=== A/B 测试:原始查询 vs 别名扩展 ===\n');
console.log('| 场景 | 原始 topScore | 扩展 topScore | 差值 | 原始hit | 扩展hit |');
console.log('|------|-------------|-------------|------|---------|---------|');
for (const t of TEST_QUERIES) {
const rawRes = await kbRetriever.searchAndRerank(t.raw, {});
await new Promise(r => setTimeout(r, 1500));
const aliasRes = await kbRetriever.searchAndRerank(t.alias, {});
await new Promise(r => setTimeout(r, 1500));
const rawScore = rawRes.topScore?.toFixed(3) || '0.000';
const aliasScore = aliasRes.topScore?.toFixed(3) || '0.000';
const diff = ((aliasRes.topScore || 0) - (rawRes.topScore || 0)).toFixed(3);
const diffStr = diff > 0 ? `+${diff}` : diff;
console.log(`| ${t.label} | ${rawScore} | ${aliasScore} | ${diffStr} | ${rawRes.hit ? 'HIT' : 'MISS'} | ${aliasRes.hit ? 'HIT' : 'MISS'} |`);
}
console.log('\n阈值: reranker hit >= 0.1');
}
runTest().catch(e => { console.error('FAILED:', e.message); process.exit(1); });

View File

@@ -0,0 +1,322 @@
/**
* kbRetriever.js 单元测试
* 覆盖配置读取、rerankChunks降级、buildRagPayload组装、hit/no-hit判断
* 纯本地测试不依赖外部API
*
* 运行方式: node --test tests/test_kb_retriever.js
*/
const { describe, it, beforeEach, afterEach } = require('node:test');
const assert = require('node:assert/strict');
// 设置测试环境变量在require之前
const ENV_BACKUP = {};
function setEnv(overrides) {
for (const [k, v] of Object.entries(overrides)) {
ENV_BACKUP[k] = process.env[k];
process.env[k] = v;
}
}
function restoreEnv() {
for (const [k, v] of Object.entries(ENV_BACKUP)) {
if (v === undefined) delete process.env[k];
else process.env[k] = v;
}
}
// 设置基本环境变量避免模块加载出错
setEnv({
VOLC_ARK_API_KEY: 'test_key',
VOLC_ARK_ENDPOINT_ID: 'test_endpoint',
VOLC_ARK_KNOWLEDGE_BASE_IDS: 'ds_test1,ds_test2',
VOLC_ARK_RERANKER_ENDPOINT_ID: 'reranker_test',
VOLC_ARK_RERANKER_TOP_N: '3',
VOLC_ARK_KB_RETRIEVAL_TOP_K: '10',
VOLC_ARK_KNOWLEDGE_THRESHOLD: '0.1',
ENABLE_RERANKER: 'true',
ENABLE_REDIS_CONTEXT: 'false', // 测试中不连Redis
});
const kbRetriever = require('../services/kbRetriever');
// ================================================================
// 1. getConfig — 配置读取
// ================================================================
describe('kbRetriever.getConfig — 配置读取', () => {
afterEach(() => restoreEnv());
it('应正确读取所有配置项', () => {
const config = kbRetriever.getConfig();
assert.equal(config.authKey, 'test_key');
assert.equal(config.rerankerTopN, 3);
assert.equal(config.retrievalTopK, 10);
assert.equal(config.enableReranker, true);
assert.equal(config.enableRedisContext, false);
assert.ok(config.kbIds.includes('ds_test1'));
assert.ok(config.kbIds.includes('ds_test2'));
});
it('ENABLE_RERANKER=false 应正确关闭', () => {
setEnv({ ENABLE_RERANKER: 'false' });
const config = kbRetriever.getConfig();
assert.equal(config.enableReranker, false);
});
it('无 RERANKER_MODEL 时应默认为 doubao-seed-rerank', () => {
setEnv({ VOLC_ARK_RERANKER_MODEL: '', VOLC_ARK_RERANKER_ENDPOINT_ID: '' });
const config = kbRetriever.getConfig();
assert.equal(config.rerankerModel, 'doubao-seed-rerank');
});
it('retrievalMode 默认应为 raw', () => {
setEnv({ VOLC_ARK_KB_RETRIEVAL_MODE: 'raw' });
const config = kbRetriever.getConfig();
assert.equal(config.retrievalMode, 'raw');
});
it('retrievalMode 为空时默认 raw', () => {
setEnv({ VOLC_ARK_KB_RETRIEVAL_MODE: '' });
const config = kbRetriever.getConfig();
// 空字符串 || 'raw' → 'raw'... 不对实际是空字符串是falsy
// 代码: process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'raw'
assert.equal(config.retrievalMode, 'raw');
});
it('dataset_ids 分割应正确处理空格和逗号', () => {
setEnv({ VOLC_ARK_KNOWLEDGE_BASE_IDS: ' ds_a , ds_b , ds_c ' });
const config = kbRetriever.getConfig();
assert.deepEqual(config.kbIds, ['ds_a', 'ds_b', 'ds_c']);
});
});
// ================================================================
// 2. rerankChunks — 重排降级逻辑
// ================================================================
describe('kbRetriever.rerankChunks — 降级与边界', () => {
afterEach(() => restoreEnv());
it('空 chunks 应返回空数组', async () => {
const result = await kbRetriever.rerankChunks('测试', [], 3);
assert.deepEqual(result, []);
});
it('chunks 数量 <= topN 时应直接返回全部', async () => {
const chunks = [
{ id: '1', content: '片段1', score: 0.9 },
{ id: '2', content: '片段2', score: 0.8 },
];
const result = await kbRetriever.rerankChunks('测试', chunks, 3);
assert.equal(result.length, 2, 'Should return all chunks when count <= topN');
assert.equal(result[0].content, '片段1');
});
it('ENABLE_RERANKER=false 时应返回前 topN 条(按原序)', async () => {
setEnv({ ENABLE_RERANKER: 'false' });
const chunks = [
{ id: '1', content: 'A', score: 0.9 },
{ id: '2', content: 'B', score: 0.8 },
{ id: '3', content: 'C', score: 0.7 },
{ id: '4', content: 'D', score: 0.6 },
{ id: '5', content: 'E', score: 0.5 },
];
const result = await kbRetriever.rerankChunks('测试', chunks, 3);
assert.equal(result.length, 3);
assert.equal(result[0].content, 'A');
assert.equal(result[1].content, 'B');
assert.equal(result[2].content, 'C');
});
it('无 RERANKER_ENDPOINT_ID 时应降级为按检索排序取 topN', async () => {
setEnv({ VOLC_ARK_RERANKER_ENDPOINT_ID: '' });
const chunks = Array.from({ length: 8 }, (_, i) => ({
id: `c${i}`, content: `内容${i}`, score: 1 - i * 0.1,
}));
const result = await kbRetriever.rerankChunks('测试', chunks, 3);
assert.equal(result.length, 3);
assert.equal(result[0].content, '内容0', 'First chunk should be highest score');
});
it('reranker API 超时/失败时应降级返回前 topN', async () => {
// 设置一个不存在的 endpoint会导致 API 调用失败
setEnv({ ENABLE_RERANKER: 'true', VOLC_ARK_RERANKER_ENDPOINT_ID: 'invalid_endpoint' });
const chunks = [
{ id: '1', content: '片段1', score: 0.9 },
{ id: '2', content: '片段2', score: 0.8 },
{ id: '3', content: '片段3', score: 0.7 },
{ id: '4', content: '片段4', score: 0.6 },
];
const result = await kbRetriever.rerankChunks('测试', chunks, 3);
assert.equal(result.length, 3, 'Should fallback to top 3');
assert.equal(result[0].content, '片段1');
});
});
// ================================================================
// 3. buildRagPayload — RAG payload 组装
// ================================================================
describe('kbRetriever.buildRagPayload — payload 组装', () => {
it('无上下文时应只包含 KB 片段', () => {
const chunks = [
{ content: '片段A', doc_name: '产品手册' },
{ content: '片段B', doc_name: 'FAQ' },
];
const payload = kbRetriever.buildRagPayload(chunks, []);
assert.equal(payload.length, 2, 'Should have 2 items (chunks only)');
assert.equal(payload[0].title, '产品手册');
assert.equal(payload[0].content, '片段A');
assert.equal(payload[1].title, 'FAQ');
});
it('有上下文时应在片段前注入上下文条目', () => {
const chunks = [{ content: '片段A', doc_name: '' }];
const history = [
{ role: 'user', content: '小红怎么吃' },
{ role: 'assistant', content: '小红每天一包...' },
];
const payload = kbRetriever.buildRagPayload(chunks, history);
assert.equal(payload.length, 2, 'Should have context + 1 chunk');
assert.equal(payload[0].title, '对话上下文');
assert.ok(payload[0].content.includes('用户: 小红怎么吃'), 'Context should include user message');
assert.ok(payload[0].content.includes('助手: 小红每天一包'), 'Context should include assistant message');
assert.equal(payload[1].content, '片段A');
});
it('无 doc_name 的片段应使用默认标题"知识库片段N"', () => {
const chunks = [
{ content: '内容1', doc_name: '' },
{ content: '内容2', doc_name: '' },
{ content: '内容3', doc_name: '' },
];
const payload = kbRetriever.buildRagPayload(chunks, []);
assert.equal(payload[0].title, '知识库片段1');
assert.equal(payload[1].title, '知识库片段2');
assert.equal(payload[2].title, '知识库片段3');
});
it('空 chunks 应返回空数组(无上下文时)', () => {
const payload = kbRetriever.buildRagPayload([], []);
assert.equal(payload.length, 0);
});
it('空 chunks + 有上下文 应只返回上下文条目', () => {
const history = [{ role: 'user', content: '你好' }];
const payload = kbRetriever.buildRagPayload([], history);
assert.equal(payload.length, 1);
assert.equal(payload[0].title, '对话上下文');
});
it('5轮对话上下文应完整保留', () => {
const history = [];
for (let i = 1; i <= 5; i++) {
history.push({ role: 'user', content: `问题${i}` });
history.push({ role: 'assistant', content: `回答${i}` });
}
const payload = kbRetriever.buildRagPayload([{ content: '片段', doc_name: '' }], history);
const contextContent = payload[0].content;
for (let i = 1; i <= 5; i++) {
assert.ok(contextContent.includes(`问题${i}`), `Should include question ${i}`);
assert.ok(contextContent.includes(`回答${i}`), `Should include answer ${i}`);
}
});
});
// ================================================================
// 4. searchAndRerank — 主流程无API调用的边界测试
// ================================================================
describe('kbRetriever.searchAndRerank — 主流程边界', () => {
afterEach(() => restoreEnv());
it('endpoint 未配置时应返回 hit=false + error', async () => {
setEnv({ VOLC_ARK_ENDPOINT_ID: 'your_ark_endpoint_id', VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: '' });
const result = await kbRetriever.searchAndRerank('测试');
assert.equal(result.hit, false);
assert.ok(result.reason, 'Should have reason');
assert.equal(result.source, 'ark_knowledge');
});
it('无 dataset_ids 时应返回 hit=false', async () => {
setEnv({ VOLC_ARK_KNOWLEDGE_BASE_IDS: '' });
const result = await kbRetriever.searchAndRerank('测试');
assert.equal(result.hit, false);
});
it('返回结构应包含所有必需字段(或抛出可捕获的异常)', async () => {
// 使用假 endpointAPI 调用会失败
setEnv({
VOLC_ARK_ENDPOINT_ID: 'ep_test',
VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: 'ep_test',
VOLC_ARK_KNOWLEDGE_BASE_IDS: 'ds_test',
});
try {
const result = await kbRetriever.searchAndRerank('测试查询');
// 如果返回了结果(非抛出),验证结构
assert.ok('hit' in result, 'Should have hit field');
assert.ok('reason' in result, 'Should have reason field');
assert.ok('source' in result, 'Should have source field');
assert.ok('latencyMs' in result, 'Should have latencyMs field');
assert.equal(result.source, 'ark_knowledge');
} catch (err) {
// API 调用失败抛出异常也是合理行为(由上层 toolExecutor catch 处理)
assert.ok(err instanceof Error, 'Should throw an Error instance');
console.log(` searchAndRerank threw as expected: ${err.message.slice(0, 80)}`);
}
});
});
// ================================================================
// 5. hit/no-hit 判定逻辑
// ================================================================
describe('hit/no-hit 判定 — 基于 reranker score', () => {
afterEach(() => restoreEnv());
it('buildRagPayload 有片段 + score > 0.3 应判为 hit通过 searchAndRerank 返回值验证)', () => {
// 直接验证判定逻辑
const highScoreChunks = [{ content: '有效内容', score: 0.8, doc_name: '' }];
const payload = kbRetriever.buildRagPayload(highScoreChunks, []);
assert.ok(payload.length > 0, 'High score chunks should produce payload');
assert.ok(highScoreChunks[0].score >= 0.3, 'Score 0.8 >= 0.3 should be hit');
});
it('score < 0.3 的片段应判为 no-hit', () => {
const lowScoreChunks = [{ content: '弱相关内容', score: 0.1, doc_name: '' }];
assert.ok(lowScoreChunks[0].score < 0.3, 'Score 0.1 < 0.3 should be no-hit');
});
it('无重排器时 hitThreshold 应为 0.5', () => {
setEnv({ ENABLE_RERANKER: 'false' });
// 验证逻辑无重排器时0.4的分数应该不算hit阈值0.5
const config = kbRetriever.getConfig();
const hitThreshold = config.enableReranker && config.rerankerModel ? 0.3 : 0.5;
assert.equal(hitThreshold, 0.5, 'Without reranker, threshold should be 0.5');
});
it('有重排器时 hitThreshold 应为 0.3', () => {
setEnv({ ENABLE_RERANKER: 'true', VOLC_ARK_RERANKER_MODEL: 'doubao-seed-rerank' });
const config = kbRetriever.getConfig();
const hitThreshold = config.enableReranker && config.rerankerModel ? 0.3 : 0.5;
assert.equal(hitThreshold, 0.3, 'With reranker, threshold should be 0.3');
});
});
// ================================================================
// 6. retrieveChunks — 解析逻辑模拟response
// ================================================================
describe('retrieveChunks — 边界', () => {
afterEach(() => restoreEnv());
it('endpoint 未配置时应返回 error', async () => {
setEnv({ VOLC_ARK_ENDPOINT_ID: 'your_ark_endpoint_id', VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: '' });
const result = await kbRetriever.retrieveChunks('测试', ['ds1'], 5, 0.1);
assert.equal(result.error, 'endpoint_not_configured');
assert.equal(result.chunks.length, 0);
});
it('无 datasetIds 且环境变量也为空时应返回 error', async () => {
setEnv({ VOLC_ARK_KNOWLEDGE_BASE_IDS: '', VOLC_ARK_ENDPOINT_ID: 'ep_valid', VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: 'ep_valid' });
const result = await kbRetriever.retrieveChunks('测试', [], 5, 0.1);
assert.equal(result.error, 'no_dataset_ids');
});
});
console.log('\n=== kbRetriever 测试加载完成 ===\n');

View File

@@ -0,0 +1,331 @@
/**
* raw 模式集成测试
* 覆盖resolveReply raw/answer 模式切换、ragItems 格式、字幕过滤、缓存双写
*
* 运行方式: node --test tests/test_raw_mode_integration.js
*/
const { describe, it, beforeEach, afterEach } = require('node:test');
const assert = require('node:assert/strict');
const path = require('path');
const fs = require('fs');
// 加载 .env
const envPath = path.join(__dirname, '../.env');
if (fs.existsSync(envPath)) {
fs.readFileSync(envPath, 'utf8').split('\n').forEach(line => {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) return;
const idx = trimmed.indexOf('=');
if (idx > 0) {
const key = trimmed.slice(0, idx).trim();
let val = trimmed.slice(idx + 1).trim();
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
val = val.slice(1, -1);
}
if (!process.env[key]) process.env[key] = val;
}
});
}
const { normalizeTextForSpeech } = require('../services/realtimeDialogRouting');
// ================================================================
// 1. ragItems 格式验证 — raw 模式 vs answer 模式
// ================================================================
describe('ragItems 格式 — raw 模式 vs answer 模式', () => {
it('raw 模式 toolResult 应包含 retrieval_mode 标记', () => {
const rawToolResult = {
query: '小红怎么吃',
results: [
{ title: '对话上下文', content: '用户: 你好\n助手: 你好!' },
{ title: '产品手册', content: '小红Activize Oxyplus...' },
{ title: 'FAQ', content: '小红每天一包...' },
{ title: '知识库片段3', content: '用40度以下温水冲服...' },
],
total: 4,
source: 'ark_knowledge',
hit: true,
reason: 'reranked_hit',
retrieval_mode: 'raw',
top_score: 0.85,
chunks_count: 3,
};
assert.equal(rawToolResult.retrieval_mode, 'raw');
assert.equal(rawToolResult.results.length, 4, 'Should have context + 3 chunks');
assert.equal(rawToolResult.results[0].title, '对话上下文', 'First item should be context');
});
it('answer 模式 toolResult 不应包含 retrieval_mode 或为 undefined', () => {
const answerToolResult = {
query: '小红怎么吃',
results: [{ title: '方舟知识库检索结果', content: '根据知识库信息小红Activize每天一包...' }],
total: 1,
source: 'ark_knowledge',
hit: true,
reason: 'classified_hit',
};
assert.ok(!answerToolResult.retrieval_mode || answerToolResult.retrieval_mode !== 'raw');
});
it('raw 模式下 ragItems 应透传多条而非合并为单条', () => {
const rawResults = [
{ title: '对话上下文', content: '用户: 大白怎么吃\n助手: ...' },
{ title: '产品手册', content: '片段1内容' },
{ title: 'FAQ', content: '片段2内容' },
];
const isRawMode = true;
let finalRagItems;
if (isRawMode) {
finalRagItems = rawResults;
} else {
const replyText = rawResults.map(r => r.content).join(' ');
finalRagItems = [{ title: '知识库结果', content: replyText }];
}
assert.equal(finalRagItems.length, 3, 'Raw mode should keep 3 items');
assert.equal(finalRagItems[0].title, '对话上下文');
assert.equal(finalRagItems[1].title, '产品手册');
});
it('answer 模式下 ragItems 应合并为单条并清理前缀', () => {
const replyText = '根据知识库信息小红Activize每天一包冲服';
const isRawMode = false;
let finalRagItems;
if (!isRawMode) {
const speechText = normalizeTextForSpeech(replyText);
const cleanedText = speechText.replace(/^(根据知识库信息[,:\s]*|根据.*?[,]\s*)/i, '');
finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }];
}
assert.equal(finalRagItems.length, 1, 'Answer mode should merge into 1 item');
assert.ok(!finalRagItems[0].content.startsWith('根据知识库信息'), 'Should strip prefix');
assert.ok(finalRagItems[0].content.includes('小红Activize'), 'Should preserve core content');
});
});
// ================================================================
// 2. 字幕过滤 — raw 模式排除"对话上下文"
// ================================================================
describe('字幕过滤 — raw 模式排除对话上下文', () => {
it('raw 模式的 ragContent 过滤后字幕不应包含上下文', () => {
const ragContent = [
{ title: '对话上下文', content: '用户: 你好\n助手: 你好' },
{ title: '产品手册', content: '小红每天一包' },
{ title: 'FAQ', content: '40度以下温水' },
];
const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文');
const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent)
.map((item) => item.content).join(' ');
assert.ok(!ragSubtitleText.includes('用户: 你好'), 'Subtitle should not include context');
assert.ok(ragSubtitleText.includes('小红每天一包'), 'Subtitle should include KB content');
assert.ok(ragSubtitleText.includes('40度以下温水'), 'Subtitle should include all KB chunks');
});
it('answer 模式无"对话上下文"条目,字幕应正常', () => {
const ragContent = [
{ title: '知识库结果', content: '小红Activize每天一包冲服' },
];
const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文');
const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent)
.map((item) => item.content).join(' ');
assert.equal(ragSubtitleText, '小红Activize每天一包冲服');
});
it('全部都是上下文条目时应降级显示全部(兜底)', () => {
const ragContent = [
{ title: '对话上下文', content: '用户: 测试' },
];
const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文');
const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent)
.map((item) => item.content).join(' ');
assert.equal(ragSubtitleText, '用户: 测试', 'Should fallback to showing context');
});
});
// ================================================================
// 3. 环境变量模式切换
// ================================================================
describe('环境变量模式切换', () => {
const ENV_ORIG = {};
beforeEach(() => {
ENV_ORIG.mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE;
ENV_ORIG.reranker = process.env.ENABLE_RERANKER;
ENV_ORIG.redis = process.env.ENABLE_REDIS_CONTEXT;
});
afterEach(() => {
if (ENV_ORIG.mode !== undefined) process.env.VOLC_ARK_KB_RETRIEVAL_MODE = ENV_ORIG.mode;
else delete process.env.VOLC_ARK_KB_RETRIEVAL_MODE;
if (ENV_ORIG.reranker !== undefined) process.env.ENABLE_RERANKER = ENV_ORIG.reranker;
else delete process.env.ENABLE_RERANKER;
if (ENV_ORIG.redis !== undefined) process.env.ENABLE_REDIS_CONTEXT = ENV_ORIG.redis;
else delete process.env.ENABLE_REDIS_CONTEXT;
});
it('VOLC_ARK_KB_RETRIEVAL_MODE=raw 应使用新链路', () => {
process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'raw';
const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer';
assert.equal(mode, 'raw');
});
it('VOLC_ARK_KB_RETRIEVAL_MODE=answer 应使用旧链路', () => {
process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'answer';
const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer';
assert.equal(mode, 'answer');
});
it('VOLC_ARK_KB_RETRIEVAL_MODE 未设置时默认 answer', () => {
delete process.env.VOLC_ARK_KB_RETRIEVAL_MODE;
const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer';
assert.equal(mode, 'answer');
});
it('ENABLE_RERANKER=false 应关闭重排', () => {
process.env.ENABLE_RERANKER = 'false';
assert.equal(process.env.ENABLE_RERANKER !== 'false', false);
});
it('ENABLE_REDIS_CONTEXT=false 应关闭 Redis 上下文', () => {
process.env.ENABLE_REDIS_CONTEXT = 'false';
assert.equal(process.env.ENABLE_REDIS_CONTEXT !== 'false', false);
});
it('所有降级开关独立,互不影响', () => {
process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'raw';
process.env.ENABLE_RERANKER = 'false';
process.env.ENABLE_REDIS_CONTEXT = 'true';
assert.equal(process.env.VOLC_ARK_KB_RETRIEVAL_MODE, 'raw', 'mode should be raw');
assert.equal(process.env.ENABLE_RERANKER, 'false', 'reranker should be off');
assert.equal(process.env.ENABLE_REDIS_CONTEXT, 'true', 'redis context should be on');
});
});
// ================================================================
// 4. normalizeTextForSpeech 对 raw 内容的处理
// ================================================================
describe('normalizeTextForSpeech — raw 片段处理', () => {
it('应去除 Markdown 标记', () => {
const input = '## 产品介绍\n**小红Activize** 是一款_细胞营养素_';
const result = normalizeTextForSpeech(input);
assert.ok(!result.includes('##'), 'Should remove heading marks');
assert.ok(!result.includes('**'), 'Should remove bold marks');
assert.ok(!result.includes('_细胞'), 'Should remove italic marks');
assert.ok(result.includes('小红Activize'), 'Should preserve content');
});
it('纯文本内容不应被破坏', () => {
const input = '小红Activize Oxyplus每天一包用40度以下温水冲服搅拌均匀后饮用。';
const result = normalizeTextForSpeech(input);
assert.equal(result, input, 'Plain text should not be modified');
});
it('空字符串应返回空', () => {
assert.equal(normalizeTextForSpeech(''), '');
assert.equal(normalizeTextForSpeech(null), '');
assert.equal(normalizeTextForSpeech(undefined), '');
});
});
// ================================================================
// 5. 端到端数据流验证
// ================================================================
describe('端到端数据流 — raw 模式完整 ragPayload → S2S', () => {
it('模拟完整 raw 模式数据流query → chunks → rerank → payload → ragItems', () => {
// Step 1: 模拟检索结果
const retrievedChunks = [
{ id: 'c1', content: '小红Activize Oxyplus是德国PM的核心产品', score: 0.9, doc_name: '产品手册' },
{ id: 'c2', content: '每天一包40度以下温水冲服', score: 0.85, doc_name: '使用说明' },
{ id: 'c3', content: '含有瓜拉纳提取物,提供天然能量', score: 0.75, doc_name: '成分表' },
];
// Step 2: 模拟重排(已排序,直接取 top3
const rerankedChunks = retrievedChunks.slice(0, 3);
// Step 3: 模拟历史
const history = [
{ role: 'user', content: '你们有什么产品' },
{ role: 'assistant', content: '我们有基础三合一,包括大白小红小白...' },
];
// Step 4: 构建 payload
const kbRetriever = require('../services/kbRetriever');
const ragPayload = kbRetriever.buildRagPayload(rerankedChunks, history);
// 验证 payload 结构
assert.equal(ragPayload.length, 4, 'Should be context(1) + chunks(3)');
assert.equal(ragPayload[0].title, '对话上下文');
assert.ok(ragPayload[0].content.includes('你们有什么产品'));
assert.equal(ragPayload[1].title, '产品手册');
assert.equal(ragPayload[2].title, '使用说明');
assert.equal(ragPayload[3].title, '成分表');
// Step 5: 模拟 toolResult
const toolResult = {
results: ragPayload.map(item => ({ title: item.title, content: item.content })),
hit: true,
retrieval_mode: 'raw',
};
// Step 6: 模拟 resolveReply 中的 ragItems 构建
const ragItems = toolResult.results.filter(i => i && i.content).map(i => ({
title: i.title || '知识库结果',
content: i.content,
}));
const isRawMode = toolResult.retrieval_mode === 'raw';
const finalRagItems = isRawMode ? ragItems : [{ title: '知识库结果', content: ragItems.map(i => i.content).join(' ') }];
assert.equal(finalRagItems.length, 4, 'Raw mode: 4 items pass-through');
// Step 7: 模拟字幕过滤
const subtitleItems = finalRagItems.filter(item => item.title !== '对话上下文');
const subtitle = subtitleItems.map(item => item.content).join(' ');
assert.ok(!subtitle.includes('你们有什么产品'), 'Subtitle should exclude context');
assert.ok(subtitle.includes('小红Activize'), 'Subtitle should include chunk content');
// Step 8: 模拟 sendExternalRag 的 JSON 序列化
const jsonStr = JSON.stringify(finalRagItems);
const parsed = JSON.parse(jsonStr);
assert.equal(parsed.length, 4, 'JSON roundtrip should preserve all items');
});
it('模拟完整 answer 模式数据流(对比验证)', () => {
const toolResult = {
results: [{ title: '方舟知识库检索结果', content: '根据知识库信息小红Activize每天一包冲服' }],
hit: true,
};
const ragItems = toolResult.results.filter(i => i && i.content).map(i => ({
title: i.title || '知识库结果',
content: i.content,
}));
const isRawMode = !!(toolResult.retrieval_mode === 'raw');
assert.equal(isRawMode, false, 'Should be answer mode');
const speechText = normalizeTextForSpeech(ragItems[0].content);
const cleanedText = speechText.replace(/^(根据知识库信息[,:\s]*|根据.*?[,]\s*)/i, '');
const finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }];
assert.equal(finalRagItems.length, 1, 'Answer mode: single merged item');
assert.ok(!finalRagItems[0].content.startsWith('根据知识库'), 'Prefix should be stripped');
});
});
console.log('\n=== raw模式集成测试加载完成 ===\n');

View File

@@ -0,0 +1,294 @@
/**
* redisClient.js 单元测试
* 覆盖连接状态、对话历史读写、KB缓存读写、降级行为、TTL/LTRIM逻辑
*
* 运行方式: node --test tests/test_redis_client.js
* 注意: 需要本地Redis可用redis://127.0.0.1:6379否则降级测试仍会通过
*/
const { describe, it, beforeEach, afterEach } = require('node:test');
const assert = require('node:assert/strict');
const path = require('path');
const fs = require('fs');
// 加载 .env
const envPath = path.join(__dirname, '../.env');
if (fs.existsSync(envPath)) {
fs.readFileSync(envPath, 'utf8').split('\n').forEach(line => {
const trimmed = line.trim();
if (!trimmed || trimmed.startsWith('#')) return;
const idx = trimmed.indexOf('=');
if (idx > 0) {
const key = trimmed.slice(0, idx).trim();
let val = trimmed.slice(idx + 1).trim();
if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) {
val = val.slice(1, -1);
}
if (!process.env[key]) process.env[key] = val;
}
});
}
const { after } = require('node:test');
const redisClient = require('../services/redisClient');
const TEST_SESSION_ID = `test_session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
const TEST_KB_CACHE_KEY = `test_kb_cache_${Date.now()}`;
// 测试结束后断开 Redis防止进程挂起
after(async () => {
await redisClient.disconnect();
});
// ================================================================
// 1. 连接与可用性
// ================================================================
describe('redisClient — 连接与可用性', () => {
it('createClient 应返回客户端实例', () => {
const client = redisClient.createClient();
assert.ok(client, 'createClient should return a client');
});
it('getClient 应返回同一个实例(单例模式)', () => {
const c1 = redisClient.getClient();
const c2 = redisClient.getClient();
assert.strictEqual(c1, c2, 'getClient should return singleton');
});
it('isAvailable 应返回 boolean', () => {
const result = redisClient.isAvailable();
assert.equal(typeof result, 'boolean', 'isAvailable should return boolean');
});
});
// ================================================================
// 2. 对话历史 — pushMessage + getRecentHistory
// ================================================================
describe('redisClient — 对话历史读写', () => {
const sessionId = TEST_SESSION_ID;
beforeEach(async () => {
await redisClient.clearSession(sessionId);
});
afterEach(async () => {
await redisClient.clearSession(sessionId);
});
it('pushMessage 写入后 getRecentHistory 应能读取', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
const ok = await redisClient.pushMessage(sessionId, { role: 'user', content: '你好' });
assert.equal(ok, true, 'pushMessage should return true');
const history = await redisClient.getRecentHistory(sessionId, 5);
assert.ok(Array.isArray(history), 'getRecentHistory should return array');
assert.equal(history.length, 1, 'Should have 1 message');
assert.equal(history[0].role, 'user');
assert.equal(history[0].content, '你好');
});
it('多条消息应保持时间顺序(最旧在前)', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
await redisClient.pushMessage(sessionId, { role: 'user', content: '第1条' });
await redisClient.pushMessage(sessionId, { role: 'assistant', content: '第2条' });
await redisClient.pushMessage(sessionId, { role: 'user', content: '第3条' });
const history = await redisClient.getRecentHistory(sessionId, 5);
assert.equal(history.length, 3);
assert.equal(history[0].content, '第1条', '最旧的应在前面');
assert.equal(history[1].content, '第2条');
assert.equal(history[2].content, '第3条', '最新的应在后面');
});
it('超过10条应自动截断LTRIM只保留最近10条', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
for (let i = 1; i <= 15; i++) {
await redisClient.pushMessage(sessionId, { role: i % 2 === 1 ? 'user' : 'assistant', content: `${i}` });
}
const history = await redisClient.getRecentHistory(sessionId, 10);
assert.ok(history.length <= 10, `Should have at most 10 messages, got ${history.length}`);
// 最旧的应该是第6条前5条被截断
assert.equal(history[0].content, '第6条', '最旧的应该是第6条');
assert.equal(history[history.length - 1].content, '第15条', '最新的应该是第15条');
});
it('getRecentHistory maxRounds 参数应限制返回数量', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
for (let i = 1; i <= 8; i++) {
await redisClient.pushMessage(sessionId, { role: i % 2 === 1 ? 'user' : 'assistant', content: `消息${i}` });
}
const history2 = await redisClient.getRecentHistory(sessionId, 2);
assert.ok(history2.length <= 4, `maxRounds=2 should return at most 4 messages, got ${history2.length}`);
});
it('clearSession 后 getRecentHistory 应返回空', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
await redisClient.pushMessage(sessionId, { role: 'user', content: '会被清除' });
await redisClient.clearSession(sessionId);
const history = await redisClient.getRecentHistory(sessionId, 5);
assert.equal(history.length, 0, 'Should be empty after clear');
});
it('消息应包含 ts 时间戳', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
const before = Date.now();
await redisClient.pushMessage(sessionId, { role: 'user', content: '带时间戳' });
const after = Date.now();
const history = await redisClient.getRecentHistory(sessionId, 1);
assert.ok(history[0].ts >= before && history[0].ts <= after, 'ts should be within time range');
});
});
// ================================================================
// 3. KB缓存读写
// ================================================================
describe('redisClient — KB缓存读写', () => {
const cacheKey = TEST_KB_CACHE_KEY;
afterEach(async () => {
if (redisClient.isAvailable()) {
try {
const client = redisClient.getClient();
await client.del(`kb_cache:${cacheKey}`);
} catch {}
}
});
it('setKbCache + getKbCache 应正确读写', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
const result = { hit: true, query: '测试', results: [{ content: '测试内容' }] };
const ok = await redisClient.setKbCache(cacheKey, result);
assert.equal(ok, true, 'setKbCache should return true');
const cached = await redisClient.getKbCache(cacheKey);
assert.ok(cached, 'getKbCache should return data');
assert.equal(cached.hit, true);
assert.equal(cached.query, '测试');
});
it('不存在的key应返回null', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
const cached = await redisClient.getKbCache('nonexistent_key_' + Date.now());
assert.equal(cached, null, 'Should return null for nonexistent key');
});
});
// ================================================================
// 4. 降级行为Redis不可用时
// ================================================================
describe('redisClient — 降级行为', () => {
it('pushMessage 在 Redis 不可用时应返回 false 而非报错', async () => {
// 即使 Redis 可用,这也验证接口契约
const result = await redisClient.pushMessage('fake_session', { role: 'user', content: 'test' });
assert.equal(typeof result, 'boolean', 'Should return boolean');
});
it('getRecentHistory 在不存在的 session 应返回空数组', async () => {
const result = await redisClient.getRecentHistory('nonexistent_session_' + Date.now(), 5);
if (result === null) {
// Redis不可用的降级
assert.equal(result, null);
} else {
assert.ok(Array.isArray(result), 'Should return array');
assert.equal(result.length, 0, 'Should be empty for nonexistent session');
}
});
it('clearSession 对不存在的 session 不应报错', async () => {
const result = await redisClient.clearSession('nonexistent_session_' + Date.now());
assert.equal(typeof result, 'boolean', 'Should return boolean');
});
it('getKbCache 在 Redis 不可用时应返回 null', async () => {
const result = await redisClient.getKbCache('test_key');
// 无论 Redis 是否可用,都不应抛出异常
assert.ok(result === null || typeof result === 'object', 'Should return null or object');
});
it('setKbCache 在 Redis 不可用时应返回 false', async () => {
const result = await redisClient.setKbCache('test_key', { hit: false });
assert.equal(typeof result, 'boolean', 'Should return boolean');
});
});
// ================================================================
// 5. 数据完整性
// ================================================================
describe('redisClient — 数据完整性', () => {
const sessionId = TEST_SESSION_ID + '_integrity';
afterEach(async () => {
await redisClient.clearSession(sessionId);
});
it('特殊字符消息应正确存取', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
const specialContent = '产品价格¥299.00 "双引号" \'单引号\' \n换行 \t制表符 emoji🎉';
await redisClient.pushMessage(sessionId, { role: 'user', content: specialContent });
const history = await redisClient.getRecentHistory(sessionId, 1);
assert.equal(history[0].content, specialContent, 'Should preserve special characters');
});
it('空内容消息应正确存取', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
await redisClient.pushMessage(sessionId, { role: 'user', content: '' });
const history = await redisClient.getRecentHistory(sessionId, 1);
assert.equal(history[0].content, '', 'Should handle empty content');
});
it('source 字段应正确保存', async () => {
if (!redisClient.isAvailable()) {
console.log(' ⏭️ Redis不可用跳过');
return;
}
await redisClient.pushMessage(sessionId, { role: 'assistant', content: '回答', source: 'voice_tool' });
const history = await redisClient.getRecentHistory(sessionId, 1);
assert.equal(history[0].source, 'voice_tool', 'Should preserve source field');
});
});
console.log('\n=== redisClient 测试加载完成 ===\n');