From 56940676f63c5e1dd819c61a67084f9f9e7a9baf Mon Sep 17 00:00:00 2001 From: User Date: Thu, 26 Mar 2026 14:30:32 +0800 Subject: [PATCH] =?UTF-8?q?feat(kb):=20VikingDB=E7=BA=AF=E6=A3=80=E7=B4=A2?= =?UTF-8?q?+=E9=87=8D=E6=8E=92+Redis=E4=B8=8A=E4=B8=8B=E6=96=87+=E5=85=A8?= =?UTF-8?q?=E5=BA=93=E6=90=9C=E7=B4=A2+=E5=88=AB=E5=90=8D=E6=89=A9?= =?UTF-8?q?=E5=B1=95+KB=E4=BF=9D=E6=8A=A4=E7=AA=97=E5=8F=A3+RAG=E8=AF=AD?= =?UTF-8?q?=E6=B0=94=E5=BC=95=E5=AF=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异 - 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写 - toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery - nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优 - realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式 - app.js: 健康检查新增 redis/reranker/kbRetrievalMode - 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试 --- test2/server/app.js | 15 + test2/server/package-lock.json | 174 +++++++ test2/server/package.json | 2 + .../server/services/assistantProfileConfig.js | 2 +- .../server/services/contextKeywordTracker.js | 9 +- test2/server/services/kbRetriever.js | 448 ++++++++++++++++++ test2/server/services/nativeVoiceGateway.js | 40 +- .../server/services/realtimeDialogRouting.js | 65 ++- test2/server/services/redisClient.js | 184 +++++++ test2/server/services/toolExecutor.js | 245 +++++----- test2/server/tests/_check_logs.cjs | 92 ++++ test2/server/tests/_test_alias_ab.cjs | 43 ++ test2/server/tests/test_kb_retriever.js | 322 +++++++++++++ .../server/tests/test_raw_mode_integration.js | 331 +++++++++++++ test2/server/tests/test_redis_client.js | 294 ++++++++++++ 15 files changed, 2096 insertions(+), 170 deletions(-) create mode 100644 test2/server/services/kbRetriever.js create mode 100644 test2/server/services/redisClient.js create mode 100644 test2/server/tests/_check_logs.cjs create mode 100644 test2/server/tests/_test_alias_ab.cjs create mode 100644 test2/server/tests/test_kb_retriever.js create mode 100644 test2/server/tests/test_raw_mode_integration.js create mode 100644 test2/server/tests/test_redis_client.js diff --git a/test2/server/app.js b/test2/server/app.js index d9291f3..c253d56 100644 --- a/test2/server/app.js +++ b/test2/server/app.js @@ -9,6 +9,7 @@ const voiceRoutes = require('./routes/voice'); const chatRoutes = require('./routes/chat'); const sessionRoutes = require('./routes/session'); const { setupNativeVoiceGateway } = require('./services/nativeVoiceGateway'); +const redisClient = require('./services/redisClient'); // ========== 环境变量校验 ========== function validateEnv() { @@ -54,6 +55,8 @@ function validateEnv() { { key: 'VOLC_S2S_SPEAKER_ID', desc: '自定义音色' }, { key: 'VOLC_ARK_KNOWLEDGE_BASE_IDS', desc: '方舟私域知识库(语音)' }, { key: 'ASSISTANT_PROFILE_API_URL', desc: '外接助手资料接口' }, + { key: 'REDIS_URL', desc: 'Redis(对话上下文+KB缓存)' }, + { key: 'ENABLE_RERANKER', desc: '重排模型' }, ]; const configuredOptional = optional.filter(({ key }) => { const v = process.env[key]; @@ -107,6 +110,9 @@ app.get('/api/health', (req, res) => { webSearch: !!process.env.VOLC_WEBSEARCH_API_KEY && !process.env.VOLC_WEBSEARCH_API_KEY.startsWith('your_'), customSpeaker: !!process.env.VOLC_S2S_SPEAKER_ID && !process.env.VOLC_S2S_SPEAKER_ID.startsWith('your_'), arkKnowledgeBase: !!process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS && !process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS.startsWith('your_'), + redis: redisClient.isAvailable(), + reranker: process.env.ENABLE_RERANKER === 'true' ? (process.env.VOLC_ARK_RERANKER_MODEL || 'doubao-seed-rerank') : false, + kbRetrievalMode: process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer', }, }); }); @@ -144,6 +150,15 @@ async function start() { console.warn('[DB] Continuing without database — context switching will use in-memory fallback'); } + // Redis 初始化(可选,失败不阻塞启动) + try { + redisClient.createClient(); + console.log('[Redis] Client created, connecting...'); + } catch (err) { + console.warn('[Redis] Initialization failed:', err.message); + console.warn('[Redis] Continuing without Redis — will use in-memory fallback'); + } + if (process.env.ENABLE_NATIVE_VOICE_GATEWAY !== 'false') { setupNativeVoiceGateway(server); console.log('[NativeVoice] Gateway enabled at /ws/realtime-dialog'); diff --git a/test2/server/package-lock.json b/test2/server/package-lock.json index 57fc3cf..45a6919 100644 --- a/test2/server/package-lock.json +++ b/test2/server/package-lock.json @@ -14,12 +14,20 @@ "crypto-js": "^4.2.0", "dotenv": "^16.6.1", "express": "^4.18.2", + "ioredis": "^5.4.0", "mysql2": "^3.19.1", "node-fetch": "^2.7.0", + "ssh2": "^1.17.0", "uuid": "^9.0.0", "ws": "^8.19.0" } }, + "node_modules/@ioredis/commands": { + "version": "1.5.1", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/@ioredis/commands/-/commands-1.5.1.tgz", + "integrity": "sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw==", + "license": "MIT" + }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://mirrors.huaweicloud.com/repository/npm/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -191,6 +199,15 @@ "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==", "license": "MIT" }, + "node_modules/asn1": { + "version": "0.2.6", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/asn1/-/asn1-0.2.6.tgz", + "integrity": "sha512-ix/FxPn0MDjeyJ7i/yoHGFt/EX6LyNbxSEhPPXODPL+KB0VPk86UYfL0lMdy+KCnv+fmvIzySwaK5COwqVbWTQ==", + "license": "MIT", + "dependencies": { + "safer-buffer": "~2.1.0" + } + }, "node_modules/asynckit": { "version": "0.4.0", "resolved": "https://mirrors.huaweicloud.com/repository/npm/asynckit/-/asynckit-0.4.0.tgz", @@ -217,6 +234,15 @@ "proxy-from-env": "^1.1.0" } }, + "node_modules/bcrypt-pbkdf": { + "version": "1.0.2", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", + "integrity": "sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==", + "license": "BSD-3-Clause", + "dependencies": { + "tweetnacl": "^0.14.3" + } + }, "node_modules/body-parser": { "version": "1.20.4", "resolved": "https://mirrors.huaweicloud.com/repository/npm/body-parser/-/body-parser-1.20.4.tgz", @@ -241,6 +267,15 @@ "npm": "1.2.8000 || >= 1.4.16" } }, + "node_modules/buildcheck": { + "version": "0.0.7", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/buildcheck/-/buildcheck-0.0.7.tgz", + "integrity": "sha512-lHblz4ahamxpTmnsk+MNTRWsjYKv965MwOrSJyeD588rR3Jcu7swE+0wN5F+PbL5cjgu/9ObkhfzEPuofEMwLA==", + "optional": true, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://mirrors.huaweicloud.com/repository/npm/bytes/-/bytes-3.1.2.tgz", @@ -279,6 +314,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/cluster-key-slot": { + "version": "1.1.2", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz", + "integrity": "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/combined-stream": { "version": "1.0.8", "resolved": "https://mirrors.huaweicloud.com/repository/npm/combined-stream/-/combined-stream-1.0.8.tgz", @@ -344,6 +388,20 @@ "url": "https://opencollective.com/express" } }, + "node_modules/cpu-features": { + "version": "0.0.10", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/cpu-features/-/cpu-features-0.0.10.tgz", + "integrity": "sha512-9IkYqtX3YHPCzoVg1Py+o9057a3i0fp7S530UWokCSaFVTc7CwXPRiOjRjBQQ18ZCNafx78YfnG+HALxtVmOGA==", + "hasInstallScript": true, + "optional": true, + "dependencies": { + "buildcheck": "~0.0.6", + "nan": "^2.19.0" + }, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/crc": { "version": "4.3.2", "resolved": "https://mirrors.huaweicloud.com/repository/npm/crc/-/crc-4.3.2.tgz", @@ -782,6 +840,53 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "license": "ISC" }, + "node_modules/ioredis": { + "version": "5.4.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/ioredis/-/ioredis-5.4.0.tgz", + "integrity": "sha512-lGiiZyWFOskPu3pH4P8+uicHOZHpzYpgfKZFre68wLK6059zBo+KDTQpwxAVibBajKqpcrRJ5k+jl/uNHcCo3A==", + "license": "MIT", + "dependencies": { + "@ioredis/commands": "^1.1.1", + "cluster-key-slot": "^1.1.0", + "debug": "^4.3.4", + "denque": "^2.1.0", + "lodash.defaults": "^4.2.0", + "lodash.isarguments": "^3.1.0", + "redis-errors": "^1.2.0", + "redis-parser": "^3.0.0", + "standard-as-callback": "^2.1.0" + }, + "engines": { + "node": ">=12.22.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/ioredis" + } + }, + "node_modules/ioredis/node_modules/debug": { + "version": "4.4.3", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/ioredis/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, "node_modules/ipaddr.js": { "version": "1.9.1", "resolved": "https://mirrors.huaweicloud.com/repository/npm/ipaddr.js/-/ipaddr.js-1.9.1.tgz", @@ -797,6 +902,12 @@ "integrity": "sha512-Ks/IoX00TtClbGQr4TWXemAnktAQvYB7HzcCxDGqEZU6oCmb2INHuOoKxbtR+HFkmYWBKv/dOZtGRiAjDhj92g==", "license": "MIT" }, + "node_modules/lodash.defaults": { + "version": "4.2.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/lodash.defaults/-/lodash.defaults-4.2.0.tgz", + "integrity": "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ==", + "license": "MIT" + }, "node_modules/lodash.get": { "version": "4.4.2", "resolved": "https://mirrors.huaweicloud.com/repository/npm/lodash.get/-/lodash.get-4.4.2.tgz", @@ -804,6 +915,12 @@ "deprecated": "This package is deprecated. Use the optional chaining (?.) operator instead.", "license": "MIT" }, + "node_modules/lodash.isarguments": { + "version": "3.1.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz", + "integrity": "sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg==", + "license": "MIT" + }, "node_modules/long": { "version": "5.3.2", "resolved": "https://mirrors.huaweicloud.com/repository/npm/long/-/long-5.3.2.tgz", @@ -950,6 +1067,13 @@ "node": ">=8.0.0" } }, + "node_modules/nan": { + "version": "2.26.2", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/nan/-/nan-2.26.2.tgz", + "integrity": "sha512-0tTvBTYkt3tdGw22nrAy50x7gpbGCCFH3AFcyS5WiUu7Eu4vWlri1woE6qHBSfy11vksDqkiwjOnlR7WV8G1Hw==", + "license": "MIT", + "optional": true + }, "node_modules/negotiator": { "version": "0.6.3", "resolved": "https://mirrors.huaweicloud.com/repository/npm/negotiator/-/negotiator-0.6.3.tgz", @@ -1124,6 +1248,27 @@ "node": ">= 0.8" } }, + "node_modules/redis-errors": { + "version": "1.2.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/redis-errors/-/redis-errors-1.2.0.tgz", + "integrity": "sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/redis-parser": { + "version": "3.0.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/redis-parser/-/redis-parser-3.0.0.tgz", + "integrity": "sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A==", + "license": "MIT", + "dependencies": { + "redis-errors": "^1.0.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://mirrors.huaweicloud.com/repository/npm/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -1288,6 +1433,29 @@ "url": "https://github.com/mysqljs/sql-escaper?sponsor=1" } }, + "node_modules/ssh2": { + "version": "1.17.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/ssh2/-/ssh2-1.17.0.tgz", + "integrity": "sha512-wPldCk3asibAjQ/kziWQQt1Wh3PgDFpC0XpwclzKcdT1vql6KeYxf5LIt4nlFkUeR8WuphYMKqUA56X4rjbfgQ==", + "hasInstallScript": true, + "dependencies": { + "asn1": "^0.2.6", + "bcrypt-pbkdf": "^1.0.2" + }, + "engines": { + "node": ">=10.16.0" + }, + "optionalDependencies": { + "cpu-features": "~0.0.10", + "nan": "^2.23.0" + } + }, + "node_modules/standard-as-callback": { + "version": "2.1.0", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/standard-as-callback/-/standard-as-callback-2.1.0.tgz", + "integrity": "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==", + "license": "MIT" + }, "node_modules/statuses": { "version": "2.0.2", "resolved": "https://mirrors.huaweicloud.com/repository/npm/statuses/-/statuses-2.0.2.tgz", @@ -1312,6 +1480,12 @@ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", "license": "MIT" }, + "node_modules/tweetnacl": { + "version": "0.14.5", + "resolved": "https://mirrors.huaweicloud.com/repository/npm/tweetnacl/-/tweetnacl-0.14.5.tgz", + "integrity": "sha512-KXXFFdAbFXY4geFIwoyNK+f5Z1b7swfXABfL7HXCmoIWMKU3dmS26672A4EeQtDzLKy7SXmfBu51JolvEKwtGA==", + "license": "Unlicense" + }, "node_modules/type-is": { "version": "1.6.18", "resolved": "https://mirrors.huaweicloud.com/repository/npm/type-is/-/type-is-1.6.18.tgz", diff --git a/test2/server/package.json b/test2/server/package.json index 6762a2f..eed3c1b 100644 --- a/test2/server/package.json +++ b/test2/server/package.json @@ -14,8 +14,10 @@ "crypto-js": "^4.2.0", "dotenv": "^16.6.1", "express": "^4.18.2", + "ioredis": "^5.4.0", "mysql2": "^3.19.1", "node-fetch": "^2.7.0", + "ssh2": "^1.17.0", "uuid": "^9.0.0", "ws": "^8.19.0" } diff --git a/test2/server/services/assistantProfileConfig.js b/test2/server/services/assistantProfileConfig.js index a4872ec..43afde4 100644 --- a/test2/server/services/assistantProfileConfig.js +++ b/test2/server/services/assistantProfileConfig.js @@ -58,7 +58,7 @@ function buildVoiceSystemRole(profileOverrides = null) { const personalInfoBlock = personalInfoLines.length > 0 ? ` 当用户询问“我”的联系方式、微信、手机号、个人介绍、签名或故事时,优先使用以下个人信息:${personalInfoLines.join(' ')}` : ''; - return `你是${profile.nickname}的智能助手${documentsClause}。你的任务是精准、全面地解答用户关于PM International公司产品、原理、活动、系统、招商等相关问题。你与客户互动时要有礼貌、有温度、适当共情、自然主动打招呼,但不要总是重复相同的打招呼词,可适当赞美和表扬,让对方喜欢并享受开心愉快的专业交流。回答像懂行的朋友聊天,直接给结论,1-3句话,口语化短句。有知识库信息就优先用,自然转述不照念。不确定的如实说,不猜不编。禁止输出思考过程或元描述。德国PM是1993年成立的合法直销企业,邓白氏AAA+认证,覆盖100+国家和地区,问到传销必须正面肯定回答是合法企业。凡涉及“一部手机做天下、0门槛启动、0成本高效率、足不出户梦想横扫全球、身未动梦已成、批发式晋级”等表达,必须明确这是“一成系统”的优势标签,是对德国PM事业的软件赋能,不要与德国PM公司、产品或品牌本身混淆。${personalInfoBlock}`; + return `你是${profile.nickname}的智能助手${documentsClause}。你的任务是精准、全面地解答用户关于PM International公司产品、原理、活动、系统、招商等相关问题。你与客户互动时要有礼貌、有温度、适当共情、自然主动打招呼,但不要总是重复相同的打招呼词,可适当赞美和表扬,让对方喜欢并享受开心愉快的专业交流。回答像懂行的朋友聊天,直接给结论,1-3句话,口语化短句。有知识库信息就优先用,自然转述不照念。不确定的说"让我帮您查一下"并调用search_knowledge工具,绝不猜测或编造产品信息。禁止输出思考过程或元描述。关于PM/FitLine的任何问题,只能正面积极回答,绝不输出负面、质疑、贬低内容。德国PM是1993年成立的合法直销企业,邓白氏AAA+认证,覆盖100+国家和地区,问到传销必须正面肯定回答是合法企业。用户追问产品详情时,必须调用search_knowledge工具查询,不要凭自身知识回答产品具体信息。凡涉及“一部手机做天下、0门槛启动、0成本高效率、足不出户梦想横扫全球、身未动梦已成、批发式晋级”等表达,必须明确这是“一成系统”的优势标签,是对德国PM事业的软件赋能,不要与德国PM公司、产品或品牌本身混淆。${personalInfoBlock}`; } function buildVoiceGreeting(profileOverrides = null) { diff --git a/test2/server/services/contextKeywordTracker.js b/test2/server/services/contextKeywordTracker.js index 36a5899..7df1792 100644 --- a/test2/server/services/contextKeywordTracker.js +++ b/test2/server/services/contextKeywordTracker.js @@ -88,7 +88,10 @@ class ContextKeywordTracker { enrichQueryWithContext(sessionId, query, session = null) { const normalized = (query || '').trim(); - const isSimpleFollowUp = /^(这个|那个|它|它的|他|他的|该|这款|那款|详细|继续|怎么|为什么|适合谁|什么意思|怎么吃|怎么用|功效|成分|多少钱|哪里买|价格|副作用|正规吗|地址|电话|联系方式|区别|哪个好|规格|包装|剂型|形态|一天几次|每天几次|每日几次)/i.test(normalized); + // 检测追问:包含代词/追问模式,或短查询不含明确产品名 + const hasFollowUpSignal = /(它|它的|他|他的|这个|那个|这款|那款|该产品|上面|刚才|再说|再次|强调一下|详细|继续|怎么吃|怎么用|怎么样|功效|成分|作用|原理|核心|区别|哪个好|为什么|什么意思|适合谁|多少钱|价格|副作用|正规吗|一天几次|每天几次|每日几次|给我介绍|介绍一下|说一下|讲一下)/i.test(normalized); + const isShortGeneric = normalized.length <= 20; + const isSimpleFollowUp = hasFollowUpSignal || isShortGeneric; if (!isSimpleFollowUp) { return normalized; @@ -102,12 +105,12 @@ class ContextKeywordTracker { return `${session._lastKbTopic} ${normalized}`; } - // fallback: 原有keyword tracker逻辑 + // fallback: 原有keyword tracker逻辑(只取最后1个最具体关键词,避免查询过长导致向量稀释) const keywords = this.getSessionKeywords(sessionId); if (keywords.length === 0) { return normalized; } - const keywordStr = keywords.slice(-3).join(' '); + const keywordStr = keywords[keywords.length - 1]; console.log(`[ContextTracker] Enriching: "${normalized}" + "${keywordStr}"`); return `${keywordStr} ${normalized}`; } diff --git a/test2/server/services/kbRetriever.js b/test2/server/services/kbRetriever.js new file mode 100644 index 0000000..b9b3e02 --- /dev/null +++ b/test2/server/services/kbRetriever.js @@ -0,0 +1,448 @@ +const axios = require('axios'); +const https = require('https'); +const crypto = require('crypto'); +const redisClient = require('./redisClient'); + +// HTTP keep-alive agent:复用TCP连接 +const kbHttpAgent = new https.Agent({ + keepAlive: true, + keepAliveMsecs: 30000, + maxSockets: 6, + timeout: 15000, +}); + +// ============ Volcengine SignerV4 (minimal) ============ +function hmacSHA256(key, data) { + return crypto.createHmac('sha256', key).update(data, 'utf8').digest(); +} +function sha256Hex(data) { + return crypto.createHash('sha256').update(data, 'utf8').digest('hex'); +} + +function signRequest({ method, host, path, body, ak, sk, service, region }) { + const now = new Date(); + const dateStamp = now.toISOString().replace(/[-:]/g, '').slice(0, 8); + const amzDate = now.toISOString().replace(/[-:]/g, '').replace(/\.\d{3}/, ''); + const credentialScope = `${dateStamp}/${region}/${service}/request`; + const bodyHash = sha256Hex(body || ''); + const headers = { + 'content-type': 'application/json', + 'host': host, + 'x-content-sha256': bodyHash, + 'x-date': amzDate, + }; + const signedHeaders = Object.keys(headers).sort().join(';'); + const canonicalHeaders = Object.keys(headers).sort().map(k => `${k}:${headers[k]}\n`).join(''); + const canonicalRequest = [method, path, '', canonicalHeaders, signedHeaders, bodyHash].join('\n'); + const stringToSign = ['HMAC-SHA256', amzDate, credentialScope, sha256Hex(canonicalRequest)].join('\n'); + let signingKey = hmacSHA256(sk, dateStamp); + signingKey = hmacSHA256(signingKey, region); + signingKey = hmacSHA256(signingKey, service); + signingKey = hmacSHA256(signingKey, 'request'); + const signature = hmacSHA256(signingKey, stringToSign).toString('hex'); + return { + ...headers, + 'authorization': `HMAC-SHA256 Credential=${ak}/${credentialScope}, SignedHeaders=${signedHeaders}, Signature=${signature}`, + }; +} + +// 默认 KB ID → VikingDB collection name 映射 +const DEFAULT_COLLECTION_MAP = { + 'kb-ad2e0ea30902421b': 'product_details', + 'kb-d45d3056a7b75ac5': 'faq_qa', + 'kb-d0ef0b7b8f36a839': 'science_training', + 'kb-6a170ab7b1bc024f': 'system_training', + 'kb-a69b0928e1714de7': 'test', +}; + +// 连接预热:服务启动后自动建立到 VikingDB API 的 TLS 连接 +setTimeout(() => { + const ak = process.env.VOLC_ACCESS_KEY_ID; + if (ak) { + axios.get('https://api-knowledgebase.mlp.cn-beijing.volces.com/', { + timeout: 5000, + httpsAgent: kbHttpAgent, + }).catch(() => {}); + console.log('[KBRetriever] VikingDB connection pool warmup sent'); + } +}, 2500); + +// ============ 配置读取 ============ +function getConfig() { + const authKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID; + const ak = process.env.VOLC_ACCESS_KEY_ID; + const sk = process.env.VOLC_SECRET_ACCESS_KEY; + const kbEndpointId = process.env.VOLC_ARK_KNOWLEDGE_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID; + const kbModel = process.env.VOLC_ARK_KB_MODEL || kbEndpointId; + const kbIds = (process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS || '').split(',').map(id => id.trim()).filter(Boolean); + const retrievalTopK = parseInt(process.env.VOLC_ARK_KB_RETRIEVAL_TOP_K) || 25; + const threshold = parseFloat(process.env.VOLC_ARK_KNOWLEDGE_THRESHOLD) || 0.1; + const rerankerModel = process.env.VOLC_ARK_RERANKER_MODEL || process.env.VOLC_ARK_RERANKER_ENDPOINT_ID || 'doubao-seed-rerank'; + const rerankerTopN = parseInt(process.env.VOLC_ARK_RERANKER_TOP_N) || 5; + const enableReranker = process.env.ENABLE_RERANKER !== 'false'; + const enableRedisContext = process.env.ENABLE_REDIS_CONTEXT !== 'false'; + const retrievalMode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'raw'; + + // VikingDB collection 映射:环境变量覆盖或使用默认映射 + let collectionMap = DEFAULT_COLLECTION_MAP; + if (process.env.VIKINGDB_COLLECTION_MAP) { + try { collectionMap = JSON.parse(process.env.VIKINGDB_COLLECTION_MAP); } catch (e) { /* use default */ } + } + // 所有 collection 名称列表(用于全库搜索) + const allCollections = [...new Set(Object.values(collectionMap))]; + + return { + authKey, + ak, + sk, + kbEndpointId, + kbModel, + kbIds, + retrievalTopK, + threshold, + rerankerModel, + rerankerTopN, + enableReranker, + enableRedisContext, + retrievalMode, + collectionMap, + allCollections, + }; +} + +// ============ VikingDB 单 collection 搜索 ============ +async function searchVikingDB(collectionName, query, limit, config) { + const host = 'api-knowledgebase.mlp.cn-beijing.volces.com'; + const apiPath = '/api/knowledge/collection/search_knowledge'; + + const requestBody = { + project: 'default', + name: collectionName, + query: query, + limit: limit, + pre_processing: { need_instruction: true, rewrite: false }, + dense_weight: 0.5, + post_processing: { rerank_switch: false, chunk_group: false }, + }; + + const bodyStr = JSON.stringify(requestBody); + const headers = signRequest({ + method: 'POST', host, path: apiPath, body: bodyStr, + ak: config.ak, sk: config.sk, + service: 'air', region: 'cn-north-1', + }); + + const response = await axios.post(`https://${host}${apiPath}`, bodyStr, { + headers, + timeout: 10000, + httpsAgent: kbHttpAgent, + }); + + if (response.data?.code !== 0) { + console.warn(`[KBRetriever] VikingDB search "${collectionName}" error: ${response.data?.message}`); + return []; + } + + const resultList = response.data?.data?.result_list || []; + return resultList.map((item, idx) => ({ + id: item.chunk_id || item.id || `vdb_${collectionName}_${idx}`, + content: (item.content || '').replace(//g, '').trim(), + score: item.score || 0, + doc_name: item.doc_info?.doc_name || item.doc_info?.title || '', + chunk_title: item.chunk_title || '', + metadata: item.doc_info || {}, + collection: collectionName, + })).filter(c => c.content); +} + +// ============ 1. 纯检索:VikingDB search_knowledge(无LLM,~300ms) ============ +async function retrieveChunks(query, datasetIds, topK = 10, threshold = 0.1) { + const config = getConfig(); + + // 检查 AK/SK 配置 + if (!config.ak || !config.sk) { + console.warn('[KBRetriever] retrieveChunks skipped: AK/SK not configured'); + return { chunks: [], error: 'aksk_not_configured' }; + } + + const effectiveQuery = (query && query.trim()) ? query : '请介绍你们的产品和服务'; + const startTime = Date.now(); + + // 确定要搜索的 collection:根据 datasetIds 映射,或搜索全部 + const effectiveDatasetIds = (Array.isArray(datasetIds) && datasetIds.length > 0) + ? datasetIds + : config.kbIds; + + let collectionNames = []; + if (effectiveDatasetIds.length > 0) { + collectionNames = effectiveDatasetIds + .map(id => config.collectionMap[id]) + .filter(Boolean); + } + // 如果没有映射,搜索所有 collection + if (collectionNames.length === 0) { + collectionNames = config.allCollections; + } + + if (collectionNames.length === 0) { + console.warn('[KBRetriever] retrieveChunks skipped: no collections to search'); + return { chunks: [], error: 'no_collections' }; + } + + // 并行搜索所有相关 collection + const perCollectionLimit = Math.max(3, Math.ceil(topK / collectionNames.length)); + const searchPromises = collectionNames.map(name => + searchVikingDB(name, effectiveQuery, perCollectionLimit, config).catch(err => { + console.warn(`[KBRetriever] VikingDB search "${name}" failed: ${err.message}`); + return []; + }) + ); + const results = await Promise.all(searchPromises); + + // 合并所有结果,按分数排序 + let allChunks = results.flat(); + allChunks.sort((a, b) => (b.score || 0) - (a.score || 0)); + allChunks = allChunks.slice(0, topK); + + // 按阈值过滤 + const beforeFilter = allChunks.length; + if (threshold > 0) { + allChunks = allChunks.filter(c => (c.score || 0) >= threshold); + } + + const latencyMs = Date.now() - startTime; + const topScore = allChunks.length > 0 ? allChunks[0].score?.toFixed(3) : 'N/A'; + console.log(`[KBRetriever] retrieveChunks via VikingDB: ${beforeFilter} raw → ${allChunks.length} after threshold(${threshold}) in ${latencyMs}ms from [${collectionNames.join(',')}] topScore=${topScore}`); + + return { + chunks: allChunks, + latencyMs, + kbHasContent: allChunks.length > 0, + usage: {}, + hasReferences: true, + }; +} + +// ============ 2. 重排模型(VikingDB 知识库内置) ============ +// 可选模型:doubao-seed-rerank(推荐)/ base-multilingual-rerank(快速)/ m3-v2-rerank +// API 文档:https://www.volcengine.com/docs/84313/1254474 +async function rerankChunks(query, chunks, topN = 3) { + const config = getConfig(); + + if (!chunks || chunks.length === 0) { + return []; + } + + // 如果 chunks 数量 <= topN,直接返回 + if (chunks.length <= topN) { + return chunks; + } + + if (!config.enableReranker) { + console.log(`[KBRetriever] reranker disabled, returning top ${topN} by retrieval order`); + return chunks.slice(0, topN); + } + + try { + const startTime = Date.now(); + + // VikingDB rerank 请求格式:每个 item 包含 query + content + const datas = chunks.map(c => ({ + query: query, + content: c.content || '', + title: c.doc_name || '', + })); + + const body = { + rerank_model: config.rerankerModel, + datas: datas, + }; + + const rerankHost = 'api-knowledgebase.mlp.cn-beijing.volces.com'; + const rerankPath = '/api/knowledge/service/rerank'; + const bodyStr = JSON.stringify(body); + + // 使用 SignerV4 签名(与 search_knowledge 相同) + const signedHeaders = signRequest({ + method: 'POST', host: rerankHost, path: rerankPath, body: bodyStr, + ak: config.ak, sk: config.sk, + service: 'air', region: 'cn-north-1', + }); + + const response = await axios.post( + `https://${rerankHost}${rerankPath}`, + bodyStr, + { + headers: signedHeaders, + timeout: 5000, + } + ); + + const latencyMs = Date.now() - startTime; + const responseData = response.data; + + // VikingDB 返回格式:{code: 0, data: [score1, score2, ...]} 或 {data: {scores: [...]}} + let scores = []; + if (responseData?.data?.scores && Array.isArray(responseData.data.scores)) { + scores = responseData.data.scores; + } else if (Array.isArray(responseData?.data)) { + scores = responseData.data; + } + + if (scores.length > 0 && scores.length === chunks.length) { + // 将分数与 chunks 配对,按分数降序排列 + const scored = chunks.map((chunk, idx) => ({ + ...chunk, + score: scores[idx] ?? chunk.score, + reranked: true, + })); + + const reranked = scored + .sort((a, b) => (b.score || 0) - (a.score || 0)) + .slice(0, topN); + + console.log(`[KBRetriever] reranked ${chunks.length} → ${reranked.length} chunks in ${latencyMs}ms (${config.rerankerModel}), scores=[${reranked.map(c => (c.score || 0).toFixed(3)).join(',')}]`); + return reranked; + } + + console.warn(`[KBRetriever] reranker returned ${scores.length} scores (expected ${chunks.length}) in ${latencyMs}ms, fallback to retrieval order`); + return chunks.slice(0, topN); + } catch (err) { + const errDetail = err.response?.data?.message || err.message; + console.warn(`[KBRetriever] reranker failed: ${errDetail}, fallback to retrieval order`); + return chunks.slice(0, topN); + } +} + +// ============ 3. 构建 RAG payload ============ +function buildRagPayload(rerankedChunks, conversationHistory = []) { + const ragItems = []; + + // 语气引导:让 S2S 用口语化方式复述 KB 内容,保持与自由对话一致的语音风格 + ragItems.push({ + title: '回答要求', + content: '用口语化、简洁的方式回答,像朋友聊天一样自然地说出来,不要念稿、不要播音腔。先给结论,再补充关键信息。', + }); + + // 注入对话上下文(如果有) + if (conversationHistory && conversationHistory.length > 0) { + const contextLines = conversationHistory.map(msg => { + const roleName = msg.role === 'user' ? '用户' : '助手'; + return `${roleName}: ${msg.content}`; + }); + ragItems.push({ + title: '对话上下文', + content: contextLines.join('\n'), + }); + } + + // 注入重排后的 KB 片段 + for (let i = 0; i < rerankedChunks.length; i++) { + const chunk = rerankedChunks[i]; + ragItems.push({ + title: chunk.doc_name || `知识库片段${i + 1}`, + content: chunk.content, + }); + } + + return ragItems; +} + +// ============ 4. 主入口:检索 → 重排 → 组装 ============ +async function searchAndRerank(query, opts = {}) { + const { + datasetIds = null, + sessionId = null, + session = null, + originalQuery = null, + } = opts; + + const config = getConfig(); + const startTime = Date.now(); + + // Step 1: 纯检索(用极低阈值,让 reranker 做质量判断) + const RETRIEVAL_THRESHOLD = 0.01; + const retrievalResult = await retrieveChunks( + query, + datasetIds, + config.retrievalTopK, + RETRIEVAL_THRESHOLD + ); + + if (retrievalResult.error) { + return { + hit: false, + reason: retrievalResult.error, + chunks: [], + rerankedChunks: [], + ragPayload: [], + latencyMs: Date.now() - startTime, + source: 'ark_knowledge', + }; + } + + if (retrievalResult.chunks.length === 0) { + return { + hit: false, + reason: retrievalResult.kbHasContent ? 'chunks_parse_failed' : 'no_relevant_content', + chunks: [], + rerankedChunks: [], + ragPayload: [], + latencyMs: Date.now() - startTime, + source: 'ark_knowledge', + }; + } + + // Step 2: 重排 + const rerankedChunks = await rerankChunks( + originalQuery || query, + retrievalResult.chunks, + config.rerankerTopN + ); + + // Step 3: 获取对话上下文(Redis → 降级 MySQL) + let conversationHistory = []; + if (config.enableRedisContext && sessionId) { + const redisHistory = await redisClient.getRecentHistory(sessionId, 5); + if (redisHistory && redisHistory.length > 0) { + conversationHistory = redisHistory; + console.log(`[KBRetriever] loaded ${redisHistory.length} history items from Redis`); + } + } + + // Step 4: 组装 payload + const ragPayload = buildRagPayload(rerankedChunks, conversationHistory); + + // Step 5: 判断 hit/no-hit + // 基于重排分数判断:最高分 > 0.3 视为 hit + const topScore = rerankedChunks.length > 0 ? (rerankedChunks[0].score || 0) : 0; + const hitThreshold = config.enableReranker && config.rerankerModel ? 0.1 : 0.3; + const hit = rerankedChunks.length > 0 && topScore >= hitThreshold; + + const totalLatencyMs = Date.now() - startTime; + console.log(`[KBRetriever] searchAndRerank completed in ${totalLatencyMs}ms: ${retrievalResult.chunks.length} retrieved → ${rerankedChunks.length} reranked, hit=${hit}, topScore=${topScore.toFixed(3)}`); + + return { + query, + originalQuery: originalQuery || query, + hit, + reason: hit ? 'reranked_hit' : 'below_threshold', + chunks: retrievalResult.chunks, + rerankedChunks, + ragPayload, + topScore, + latencyMs: totalLatencyMs, + retrievalLatencyMs: retrievalResult.latencyMs, + source: 'ark_knowledge', + hasReferences: retrievalResult.hasReferences, + usage: retrievalResult.usage, + }; +} + +module.exports = { + retrieveChunks, + rerankChunks, + buildRagPayload, + searchAndRerank, + getConfig, +}; diff --git a/test2/server/services/nativeVoiceGateway.js b/test2/server/services/nativeVoiceGateway.js index 0cc62e3..754ff07 100644 --- a/test2/server/services/nativeVoiceGateway.js +++ b/test2/server/services/nativeVoiceGateway.js @@ -23,6 +23,7 @@ const { shouldForceKnowledgeRoute, resolveReply, } = require('./realtimeDialogRouting'); +const ToolExecutor = require('./toolExecutor'); const { DEFAULT_VOICE_ASSISTANT_PROFILE, resolveAssistantProfile, @@ -30,6 +31,7 @@ const { buildVoiceGreeting, } = require('./assistantProfileConfig'); const { getAssistantProfile } = require('./assistantProfileService'); +const redisClient = require('./redisClient'); const sessions = new Map(); @@ -163,6 +165,7 @@ function persistUserSpeech(session, text) { session.latestUserTurnSeq = (session.latestUserTurnSeq || 0) + 1; resetIdleTimer(session); db.addMessage(session.sessionId, 'user', cleanText, 'voice_asr').catch((e) => console.warn('[NativeVoice][DB] add user failed:', e.message)); + redisClient.pushMessage(session.sessionId, { role: 'user', content: cleanText, source: 'voice_asr' }).catch(() => {}); sendJson(session.client, { type: 'subtitle', role: 'user', @@ -185,6 +188,7 @@ function persistAssistantSpeech(session, text, { source = 'voice_bot', toolName resetIdleTimer(session); if (persistToDb) { db.addMessage(session.sessionId, 'assistant', cleanText, source, toolName, meta).catch((e) => console.warn('[NativeVoice][DB] add assistant failed:', e.message)); + redisClient.pushMessage(session.sessionId, { role: 'assistant', content: cleanText, source }).catch(() => {}); } sendJson(session.client, { type: 'subtitle', @@ -418,7 +422,7 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq | // 防止用户质疑/纠正产品信息时S2S自由编造(如"粉末来的呀你搞错了吧") const KB_PROTECTION_WINDOW_MS = 60000; if (!isKnowledgeCandidate && session._lastKbHitAt && (Date.now() - session._lastKbHitAt < KB_PROTECTION_WINDOW_MS)) { - const isPureChitchat = /^(喂|你好|嗨|谢谢|再见|拜拜|好的|嗯|哦|行|没事了|不用了|可以了)[,,。!?\s]*$/.test(cleanText); + const isPureChitchat = /^(喂|你好|嗨|hi|hello|谢谢|谢谢你|谢谢啦|多谢|感谢|再见|拜拜|拜|好的|嗯|哦|行|没事了|不用了|可以了|好的谢谢|没问题|知道了|明白了|了解了|好嘞|好吧|行吧|ok|okay)[,,。!?~\s]*$/i.test(cleanText); if (!isPureChitchat) { isKnowledgeCandidate = true; console.log(`[NativeVoice] KB protection window active, promoting to kbCandidate session=${session.sessionId} lastKbHit=${Math.round((Date.now() - session._lastKbHitAt) / 1000)}s ago`); @@ -450,8 +454,14 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq | isSimilar = overlap / shorter.length >= 0.45; } if (isSimilar) { - console.log(`[NativeVoice] using KB prequery cache session=${session.sessionId} preText=${JSON.stringify(session._kbPrequeryText.slice(0, 60))}`); - resolveResult = await session.pendingKbPrequery; + const prequeryResult = await session.pendingKbPrequery; + // 只复用 hit 结果;no-hit 可能因 partial 文本路由不完整,用完整文本 re-search + if (prequeryResult && prequeryResult.delivery !== 'upstream_chat') { + console.log(`[NativeVoice] using KB prequery cache (hit) session=${session.sessionId} preText=${JSON.stringify(session._kbPrequeryText.slice(0, 60))}`); + resolveResult = prequeryResult; + } else { + console.log(`[NativeVoice] prequery no-hit, re-searching with full text session=${session.sessionId} preText=${JSON.stringify((session._kbPrequeryText || '').slice(0, 40))} finalText=${JSON.stringify(cleanText.slice(0, 40))}`); + } } else { console.log(`[NativeVoice] KB prequery text mismatch, re-querying session=${session.sessionId} pre=${JSON.stringify(preText.slice(0, 40))} final=${JSON.stringify(finalText.slice(0, 40))}`); } @@ -469,13 +479,12 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq | return; } if (delivery === 'upstream_chat') { + // kbCandidate 但 S2S 未调工具 → 放开 S2S 自然回复 + // 依赖:1) system prompt 品牌保护指令引导 S2S 调工具 2) isBrandHarmful 流式拦截兜底 if (isKnowledgeCandidate) { - console.log(`[NativeVoice] processReply kb-nohit retrigger session=${session.sessionId}`); - session.discardNextAssistantResponse = true; - await sendExternalRag(session, [{ title: '用户问题', content: cleanText }]); - } else { - session.blockUpstreamAudio = false; + console.log(`[NativeVoice] processReply kbCandidate+upstream_chat, unblock S2S session=${session.sessionId}`); } + session.blockUpstreamAudio = false; session._lastPartialAt = 0; session.awaitingUpstreamReply = true; session.pendingAssistantSource = 'voice_bot'; @@ -499,10 +508,8 @@ async function processReply(session, text, turnSeq = session.latestUserTurnSeq | session._lastKbTopic = cleanText; session._lastKbHitAt = Date.now(); } - // 直接用KB原始回答作为字幕,不依赖S2S event 351(S2S可能拆段/改写/丢失内容) - const ragSubtitleText = ragContent.map((item) => item.content).join(' '); - persistAssistantSpeech(session, ragSubtitleText, { source, toolName, meta: responseMeta }); - session.lastDeliveredAssistantTurnSeq = activeTurnSeq; + // 不提前发KB原文作字幕:等S2S event 351返回实际语音文本后再更新字幕 + // 这样字幕和语音保持一致(S2S会基于RAG内容生成自然口语化的回答) session._pendingExternalRagReply = true; await sendExternalRag(session, ragContent); session.awaitingUpstreamReply = true; @@ -891,9 +898,10 @@ function handleUpstreamMessage(session, data) { }); } } - // 用户开口说话时立即打断所有 AI 播放(包括 S2S 默认 TTS) - if (isDirectSpeaking || isChatTTSSpeaking) { - console.log(`[NativeVoice] user barge-in (partial) session=${session.sessionId} direct=${isDirectSpeaking} chatTTS=${isChatTTSSpeaking}`); + // 用户开口说话时立即打断所有 AI 播放(包括 S2S external_rag 音频) + const isS2SAudioPlaying = !session.blockUpstreamAudio && session.currentTtsType === 'external_rag'; + if (isDirectSpeaking || isChatTTSSpeaking || isS2SAudioPlaying) { + console.log(`[NativeVoice] user barge-in (partial) session=${session.sessionId} direct=${isDirectSpeaking} chatTTS=${isChatTTSSpeaking} s2sRag=${isS2SAudioPlaying}`); session.directSpeakUntil = 0; session.isSendingChatTTSText = false; session.chatTTSUntil = 0; @@ -902,6 +910,8 @@ function handleUpstreamMessage(session, data) { if (session.suppressReplyTimer || session.suppressUpstreamUntil) { clearUpstreamSuppression(session); } + // 阻断 S2S 音频转发,防止用户打断后仍听到残留音频 + session.blockUpstreamAudio = true; } // 无论当前是否在播放,都发送 tts_reset 确保客户端停止所有音频播放 if (!session._lastBargeInResetAt || now - session._lastBargeInResetAt > 500) { diff --git a/test2/server/services/realtimeDialogRouting.js b/test2/server/services/realtimeDialogRouting.js index ca7c3d7..a772f1f 100644 --- a/test2/server/services/realtimeDialogRouting.js +++ b/test2/server/services/realtimeDialogRouting.js @@ -1,5 +1,6 @@ const ToolExecutor = require('./toolExecutor'); const db = require('../db'); +const redisClient = require('./redisClient'); const { hasKnowledgeRouteKeyword } = require('./knowledgeKeywords'); function normalizeTextForSpeech(text) { @@ -270,14 +271,18 @@ async function resolveReply(sessionId, session, text) { const ragItems = fastResult.hit && Array.isArray(fastResult.results) ? fastResult.results.filter(i => i && i.content).map(i => ({ title: i.title || '知识库结果', content: i.content })) : []; - console.log(`[resolveReply] fast-path hit in ${Date.now() - _resolveStart}ms session=${sessionId} source=${fastResult.hot_answer ? 'hot_answer' : (fastResult.cache_hit ? 'cache' : 'direct')}`); + console.log(`[resolveReply] fast-path hit in ${Date.now() - _resolveStart}ms session=${sessionId} source=${fastResult.hot_answer ? 'hot_answer' : (fastResult.cache_hit ? 'cache' : 'direct')} mode=${fastResult.retrieval_mode || 'answer'}`); if (ragItems.length > 0) { - const cleanedText = normalizeTextForSpeech(replyText).replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, ''); session.handoffSummaryUsed = true; + // raw 模式:ragItems 已包含上下文 + 多个 KB 片段,直接透传 + const isRawMode = fastResult.retrieval_mode === 'raw'; + const finalRagItems = isRawMode + ? ragItems + : [{ title: '知识库结果', content: normalizeTextForSpeech(replyText).replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, '') || replyText }]; return { delivery: 'external_rag', speechText: '', - ragItems: [{ title: '知识库结果', content: cleanedText || replyText }], + ragItems: finalRagItems, source: 'voice_tool', toolName: 'search_knowledge', routeDecision: { route: 'search_knowledge', args: { query: originalText } }, @@ -295,10 +300,22 @@ async function resolveReply(sessionId, session, text) { } } + // 上下文加载:优先 Redis(~5ms),降级 MySQL(~100ms) const _dbStart = Date.now(); - const recentMessages = await db.getRecentMessages(sessionId, 10).catch(() => []); - const _dbMs = Date.now() - _dbStart; - if (_dbMs > 50) console.log(`[resolveReply] DB getRecentMessages took ${_dbMs}ms session=${sessionId}`); + let recentMessages = null; + if (process.env.ENABLE_REDIS_CONTEXT !== 'false') { + const redisHistory = await redisClient.getRecentHistory(sessionId, 5).catch(() => null); + if (redisHistory && redisHistory.length > 0) { + recentMessages = redisHistory; + const _dbMs = Date.now() - _dbStart; + if (_dbMs > 5) console.log(`[resolveReply] Redis getRecentHistory took ${_dbMs}ms session=${sessionId} items=${redisHistory.length}`); + } + } + if (!recentMessages) { + recentMessages = await db.getRecentMessages(sessionId, 10).catch(() => []); + const _dbMs = Date.now() - _dbStart; + if (_dbMs > 50) console.log(`[resolveReply] DB getRecentMessages took ${_dbMs}ms session=${sessionId}`); + } const scopedMessages = session?.handoffSummaryUsed ? recentMessages.filter((item) => !/^chat_/i.test(String(item?.source || ''))) : recentMessages; @@ -310,6 +327,16 @@ async function resolveReply(sessionId, session, text) { if (routeDecision.route === 'chat' && shouldForceKnowledgeRoute(originalText, context)) { routeDecision = { route: 'search_knowledge', args: { query: originalText } }; } + // KB保护窗口:60秒内有KB命中,当前非纯闲聊,强制走KB搜索 + // 防止追问(如"它需要漱口吗")绕过KB走S2S自由编造 + const KB_PROTECTION_WINDOW_MS = 60000; + if (routeDecision.route === 'chat' && session?._lastKbHitAt && (Date.now() - session._lastKbHitAt < KB_PROTECTION_WINDOW_MS)) { + const isPureChitchat = /^(喂|你好|嗨|hi|hello|谢谢|谢谢你|谢谢啦|多谢|感谢|再见|拜拜|拜|好的|嗯|哦|行|没事了|不用了|可以了|好的谢谢|没问题|知道了|明白了|了解了|好嘞|好吧|行吧|ok|okay)[,,。!?~\s]*$/i.test(originalText); + if (!isPureChitchat) { + routeDecision = { route: 'search_knowledge', args: { query: originalText } }; + console.log(`[resolveReply] KB protection window active, forcing KB route session=${sessionId} lastKbHit=${Math.round((Date.now() - session._lastKbHitAt) / 1000)}s ago`); + } + } let replyText = ''; let source = 'voice_bot'; let toolName = null; @@ -368,24 +395,24 @@ async function resolveReply(sessionId, session, text) { : []); if (ragItems.length > 0) { - let speechText = normalizeTextForSpeech(replyText); session.handoffSummaryUsed = true; - if (toolName === 'search_knowledge' && speechText) { - const cleanedText = speechText.replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, ''); - return { - delivery: 'external_rag', - speechText: '', - ragItems: [{ title: '知识库结果', content: cleanedText || speechText }], - source, - toolName, - routeDecision, - responseMeta, - }; + const isRawMode = toolResult?.retrieval_mode === 'raw'; + let finalRagItems = ragItems; + + if (toolName === 'search_knowledge' && !isRawMode) { + // 旧模式:LLM 加工过的文本,清理后合并为单条 + const speechText = normalizeTextForSpeech(replyText); + if (speechText) { + const cleanedText = speechText.replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, ''); + finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }]; + } } + // raw 模式:ragItems 已包含上下文 + 多个 KB 片段,直接透传给 S2S + return { delivery: 'external_rag', speechText: '', - ragItems, + ragItems: finalRagItems, source, toolName, routeDecision, diff --git a/test2/server/services/redisClient.js b/test2/server/services/redisClient.js new file mode 100644 index 0000000..7b611c6 --- /dev/null +++ b/test2/server/services/redisClient.js @@ -0,0 +1,184 @@ +const Redis = require('ioredis'); + +// ============ 配置 ============ +const REDIS_URL = process.env.REDIS_URL || 'redis://127.0.0.1:6379'; +const REDIS_PASSWORD = process.env.REDIS_PASSWORD || ''; +const REDIS_DB = parseInt(process.env.REDIS_DB) || 0; +const KEY_PREFIX = process.env.REDIS_KEY_PREFIX || 'bigwo:'; + +const HISTORY_MAX_LEN = 10; // 5轮 × 2条/轮 +const HISTORY_TTL_S = 1800; // 30分钟 +const KB_CACHE_HIT_TTL_S = 300; // 5分钟 +const KB_CACHE_NOHIT_TTL_S = 120; // 2分钟 + +// ============ 连接管理 ============ +let client = null; +let isReady = false; + +function createClient() { + if (client) return client; + + const opts = { + db: REDIS_DB, + keyPrefix: KEY_PREFIX, + retryStrategy(times) { + if (times > 10) { + console.error('[Redis] max retries reached, giving up'); + return null; + } + return Math.min(times * 200, 3000); + }, + reconnectOnError(err) { + const targetErrors = ['READONLY', 'ECONNRESET']; + return targetErrors.some(e => err.message.includes(e)); + }, + lazyConnect: true, + maxRetriesPerRequest: 2, + connectTimeout: 5000, + commandTimeout: 3000, + }; + + if (REDIS_PASSWORD) { + opts.password = REDIS_PASSWORD; + } + + client = new Redis(REDIS_URL, opts); + + client.on('ready', () => { + isReady = true; + console.log('[Redis] connected and ready'); + }); + client.on('error', (err) => { + console.warn('[Redis] error:', err.message); + }); + client.on('close', () => { + isReady = false; + console.warn('[Redis] connection closed'); + }); + client.on('reconnecting', () => { + console.log('[Redis] reconnecting...'); + }); + + client.connect().catch((err) => { + console.warn('[Redis] initial connect failed:', err.message); + }); + + return client; +} + +function getClient() { + if (!client) createClient(); + return client; +} + +function isAvailable() { + return isReady && client && client.status === 'ready'; +} + +// ============ 对话历史 ============ +const historyKey = (sessionId) => `voice:history:${sessionId}`; + +async function pushMessage(sessionId, msg) { + if (!isAvailable()) return false; + try { + const key = historyKey(sessionId); + const value = JSON.stringify({ + role: msg.role, + content: msg.content, + source: msg.source || '', + ts: Date.now(), + }); + await client.lpush(key, value); + await client.ltrim(key, 0, HISTORY_MAX_LEN - 1); + await client.expire(key, HISTORY_TTL_S); + return true; + } catch (err) { + console.warn('[Redis] pushMessage failed:', err.message); + return false; + } +} + +async function getRecentHistory(sessionId, maxRounds = 5) { + if (!isAvailable()) return null; + try { + const key = historyKey(sessionId); + const items = await client.lrange(key, 0, maxRounds * 2 - 1); + if (!items || items.length === 0) return []; + // lpush 是倒序插入,lrange 取出的也是最新在前,需要 reverse 恢复时间顺序 + return items + .map((item) => { + try { return JSON.parse(item); } catch { return null; } + }) + .filter(Boolean) + .reverse(); + } catch (err) { + console.warn('[Redis] getRecentHistory failed:', err.message); + return null; + } +} + +async function clearSession(sessionId) { + if (!isAvailable()) return false; + try { + await client.del(historyKey(sessionId)); + return true; + } catch (err) { + console.warn('[Redis] clearSession failed:', err.message); + return false; + } +} + +// ============ KB 缓存 ============ +const kbCacheKey = (cacheKey) => `kb_cache:${cacheKey}`; + +async function setKbCache(cacheKey, result) { + // 只缓存 hit 结果;no-hit 不写入 Redis,避免阻止后续 retry + if (!isAvailable() || !result.hit) return false; + try { + const key = kbCacheKey(cacheKey); + await client.set(key, JSON.stringify(result), 'EX', KB_CACHE_HIT_TTL_S); + return true; + } catch (err) { + console.warn('[Redis] setKbCache failed:', err.message); + return false; + } +} + +async function getKbCache(cacheKey) { + if (!isAvailable()) return null; + try { + const key = kbCacheKey(cacheKey); + const data = await client.get(key); + if (!data) return null; + return JSON.parse(data); + } catch (err) { + console.warn('[Redis] getKbCache failed:', err.message); + return null; + } +} + +// ============ 优雅关闭 ============ +async function disconnect() { + if (client) { + try { + await client.quit(); + } catch { + client.disconnect(); + } + client = null; + isReady = false; + console.log('[Redis] disconnected'); + } +} + +module.exports = { + createClient, + getClient, + isAvailable, + pushMessage, + getRecentHistory, + clearSession, + setKbCache, + getKbCache, + disconnect, +}; diff --git a/test2/server/services/toolExecutor.js b/test2/server/services/toolExecutor.js index 80d47ed..ac4cf4e 100644 --- a/test2/server/services/toolExecutor.js +++ b/test2/server/services/toolExecutor.js @@ -3,6 +3,8 @@ const https = require('https'); const arkChatService = require('./arkChatService'); const { buildKnowledgeAnswerPrompt, resolveAssistantProfile } = require('./assistantProfileConfig'); const { getAssistantProfile } = require('./assistantProfileService'); +const kbRetriever = require('./kbRetriever'); +const redisClient = require('./redisClient'); // HTTP keep-alive agent:复用TCP连接,避免每次请求重新握手 const kbHttpAgent = new https.Agent({ @@ -51,13 +53,15 @@ const KB_CACHE_MAX_SIZE = 200; const kbQueryCache = new Map(); function getKbCacheKey(query, datasetIds, profileScope = 'global') { - return `${String(profileScope || 'global').trim() || 'global'}|${(query || '').trim()}|${(datasetIds || []).sort().join(',')}`; + const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; + return `vdb2|${mode}|${String(profileScope || 'global').trim() || 'global'}|${(query || '').trim()}|${(datasetIds || []).sort().join(',')}`; } function getKbCache(key) { const entry = kbQueryCache.get(key); if (!entry) return null; - const ttl = entry.hit ? KB_CACHE_TTL_MS : KB_CACHE_NOHIT_TTL_MS; + // hit: 5min TTL; no-hit: 10s 短 TTL(仅防同一轮次重复查 VikingDB) + const ttl = entry.hit ? KB_CACHE_TTL_MS : 10000; if (Date.now() - entry.timestamp > ttl) { kbQueryCache.delete(key); return null; @@ -70,6 +74,7 @@ function setKbCache(key, result) { const oldest = kbQueryCache.keys().next().value; kbQueryCache.delete(oldest); } + // hit: 正常缓存; no-hit: 内存 10s 去重(防止同一轮次重复查 VikingDB,不写 Redis) kbQueryCache.set(key, { result, timestamp: Date.now(), hit: !!result.hit }); } @@ -354,10 +359,20 @@ class ToolExecutor { // 确定路由:多意图可并行,只排除真正冲突的组合 const priorityRouteNames = []; if (hasSystemIntent) priorityRouteNames.push('system'); - if (hasProductIntent) priorityRouteNames.push('product'); - if (hasCompanyIntent) priorityRouteNames.push('company'); - if (hasFaqIntent && !hasProductIntent) priorityRouteNames.push('faq'); - if (hasScienceIntent && !hasProductIntent && !hasFaqIntent) priorityRouteNames.push('science'); + if (hasProductIntent) { + priorityRouteNames.push('product'); + // 产品问题同时搜FAQ和科普,获取更全面的回答(好转反应、科普误区等补充信息) + if (!hasFaqIntent) priorityRouteNames.push('faq'); + if (!hasScienceIntent) priorityRouteNames.push('science'); + } + if (hasCompanyIntent) { + priorityRouteNames.push('company'); + // 公司问题同时搜产品和系统培训,test collection 内容有限 + if (!hasProductIntent) priorityRouteNames.push('product'); + if (!hasSystemIntent) priorityRouteNames.push('system'); + } + if (hasFaqIntent) priorityRouteNames.push('faq'); + if (hasScienceIntent) priorityRouteNames.push('science'); if (priorityRouteNames.length > 0) { const routingRules = this.getKnowledgeBaseRoutingRules(); @@ -392,16 +407,14 @@ class ToolExecutor { static buildDeterministicKnowledgeQuery(query, context = []) { const text = String(query || '').trim(); - const recentContextText = (Array.isArray(context) ? context : []) - .slice(-6) - .map((item) => String(item?.content || '').trim()) - .filter(Boolean) - .join('\n'); - const haystack = `${text}\n${recentContextText}`; - const questionDimension = text.match(/(功效|作用|成分|配方|原料|怎么吃|怎么用|怎么服用|服用方法|吃法|用法|用量|一天几次|每天几次|每日几次|副作用|好转反应|价格|多少钱|适合谁|适用人群|区别|不同|搭配|原理|规格|包装|剂型|形态|粉末|胶囊|片剂|颗粒|喷雾|乳霜|口服液)/); - // 第一层:当前查询文本中有明确产品/系统/主题关键词 → 直接改写(不依赖上下文) - if (/(基础三合一|三合一基础套|基础套装|大白小红小白)/i.test(text)) return '德国PM细胞营养素 基础套装 大白 小红 小白'; + // ==================================================================== + // 精简版:只保留 VikingDB 语义检索已知会失败的场景 + // 产品/公司/认证等查询全部交给 VikingDB + reranker 处理原始语义 + // 追问/代词由 enrichQueryWithContext + KB保护窗口 处理 + // ==================================================================== + + // === 一成系统子话题分流(内部术语,向量检索难区分子话题) === if (/(一成系统|Ai众享|数字化工作室|盛咖学愿|三大平台|四大Ai生态|四大生态|智能生产力)/i.test(text)) { if (/(核心竞争力|竞争力|核心优势|优势)/i.test(text)) return '一成系统 核心竞争力 三大平台 四大Ai生态 零成本高效率'; if (/(发展|怎么做|怎么用|如何用|如何做|关键点|关键|方法|步骤)/i.test(text)) return '一成系统 发展PM事业 三大平台 四大Ai生态 零成本高效率 全球市场'; @@ -421,115 +434,24 @@ class ToolExecutor { if (/(身未动,?梦已成|批发式晋级)/i.test(text)) return '一成系统 身未动梦已成 批发式晋级 三大平台 四大Ai生态'; if (/行动圈/i.test(text)) return '一成系统 行动圈 数字化工作室 团队管理 目标考核'; if (/盟主社区/i.test(text)) return '一成系统 盟主社区 AI众享 社区盟主 引流 转化'; - if (/(宣明会|世界宣明会)/i.test(text)) return '德国PM 宣明会 世界宣明会 慈善合作'; - if (/BFH/i.test(text)) return '德国PM BFH AAA+ 合作伙伴收益'; - if (/DSN/i.test(text)) return '德国PM DSN 全球100强 欧洲第1'; - if (/(邓白氏|AAA\+)/i.test(text)) return '德国PM 邓白氏 AAA+ 99分'; - if (/(ELAB|科隆名单|Halal|GMP)/i.test(text)) return '德国PM ELAB 科隆名单 Halal GMP 安全认证'; - if (/(Rolf Sorg|斯派尔|Speyer|卢森堡)/i.test(text)) return '德国PM Rolf Sorg 斯派尔 卢森堡 总部 公司介绍'; - if (/(培安|烟台)/i.test(text)) return '德国PM 培安 烟台 中国市场投资'; - if (/(PM公司|德国PM|公司地址|联系方式|电话|公司实力|公司背景|总部|分公司)/i.test(text)) { - if (/(产品|细胞营养素|基础套装|基础三合一|小红|大白|小白|activize|basics|restorate|fitline|儿童倍适)/i.test(text)) { - return '德国PM FitLine 细胞营养素产品 大白Basics 小红Activize 小白Restorate 儿童倍适'; - } - if (/(地址|电话|联系方式)/i.test(text)) return '德国PM 日本 美国 加拿大 香港 地址 电话'; - if (/(实力|背景)/i.test(text)) return '德国PM 公司实力介绍 邓白氏 99分 AAA+'; - return '德国PM 1993年 创立 100多个国家 FitLine 公司介绍'; - } - if (/(德国PM介绍|介绍德国PM|德国PM公司介绍|PM公司介绍|PM介绍)/i.test(text)) return '德国PM 1993年 创立 100多个国家 FitLine 公司介绍 邓白氏 99分 AAA+'; - if (/(NTC.*(核心优势|核心竞争力|优势|原理|厉害)|核心优势.*NTC|核心竞争力.*NTC)/i.test(text)) return 'NTC营养保送系统 核心优势 吸收利用 原理'; - if (/(PM基础三合一介绍|基础三合一介绍|PM基础套装介绍|基础套装介绍)/i.test(text)) return '德国PM细胞营养素 基础套装 大白 小红 小白 介绍'; - if (/儿童倍适/i.test(text)) return questionDimension ? `儿童倍适 ${questionDimension[0]}` : '儿童倍适'; - if (/(小红产品|小红|Activize Oxyplus|Activize)/i.test(text)) return questionDimension ? `Fitline小红产品 Activize ${questionDimension[0]}` : 'Fitline小红产品提升能量原理'; - if (/(大白产品|大白|倍适|Basics)/i.test(text)) return questionDimension ? `德国PM细胞营养素 大白 Basics ${questionDimension[0]}` : '德国PM细胞营养素 大白 Basics'; - if (/(小白产品|小白|维适多|Restorate)/i.test(text)) return questionDimension ? `德国PM细胞营养素 小白 Restorate ${questionDimension[0]}` : '德国PM细胞营养素 小白'; - if (/(NTC营养保送系统|Nutrient Transport Concept)/i.test(text)) return 'NTC营养保送系统'; - if (/火炉原理/i.test(text)) return '火炉原理'; - if (/(阿育吠陀|Ayurveda)/i.test(text)) return '阿育吠陀医学原理'; - if (/(PM-FitLine|PM细胞营养素)/i.test(text)) return '德国PM细胞营养素 基础套装 大白 小红 小白'; - if (/(我们公司.*产品|公司.*产品|产品.*推荐|推荐.*产品|产品有哪些|产品介绍|产品列表)/i.test(text)) return '德国PM FitLine 细胞营养素产品 大白Basics 小红Activize 小白Restorate 儿童倍适'; - if (/(治病吗|能治病吗|产品治病|治疗疾病|替代药|是不是药)/i.test(text)) return 'PM产品 不是药 不能替代药物 保健食品 营养补充'; - if (/(多久见效|多久有效|多久能见效|多长时间见效|几天见效|什么时候见效)/i.test(text)) return 'PM产品 多久见效 吸收利用 周期 个体差异'; - if (/(为什么.*(全套|搭配|三合一)|为什么要.*(全套|搭配|三合一)|为何.*(全套|搭配|三合一)|产品需要全套)/i.test(text)) return '德国PM细胞营养素 全套搭配 NTC营养保送系统 协同作用'; - if (/(与其它保健品区别|与其他保健品区别|和其它保健品区别|和其他保健品区别|保健品区别)/i.test(text)) return 'PM产品 与其他保健品区别 NTC营养保送系统 吸收利用'; - if (/(新人起步三关|起步三关)/i.test(text)) return '培训新人起步三关'; - if (/(精品会议|会议组织)/i.test(text)) return '培训打造精品会议具体如下'; - if (/成长上总裁/i.test(text)) return '培训成长上总裁'; - if (/(招商|代理|加盟|合作|事业机会|招商稿|代理政策)/i.test(text)) return '一成系统 PM事业 招商与代理 软件赋能 0成本高效率'; - if (/(如何发展PM事业|怎么发展PM事业|PM事业发展逻辑|介绍PM事业|两分钟介绍PM事业|分享.*故事.*自我介绍|自我介绍|商机|PM价值)/i.test(text)) return '一成系统 PM事业 发展逻辑 商机 价值 软件赋能 三大平台 四大Ai生态 0成本高效率'; + + // === 一成系统相关业务话题 === + if (/(招商|代理|加盟|事业机会|招商稿|代理政策)/i.test(text)) return '一成系统 PM事业 招商与代理 软件赋能 0成本高效率'; + if (/(如何发展PM事业|怎么发展PM事业|PM事业发展逻辑|介绍PM事业|两分钟介绍PM事业)/i.test(text)) return '一成系统 PM事业 发展逻辑 商机 价值 软件赋能 三大平台 四大Ai生态 0成本高效率'; if (/(为什么选择德国PM|为何选择德国PM|为什么选德国PM|为什么选PM|为何选PM)/i.test(text)) return '一成系统 德国PM 选择理由 公司实力 产品优势 软件赋能 0成本高效率'; if (/(陌生客户|陌生人).*(沟通|开口|邀约|交流|切入).*(PM事业|德国PM|PM)/i.test(text)) return '一成系统 PM事业 陌生客户 沟通 邀约 话术 软件赋能'; if (/(线上拓客|线上成交|线上开发客户|线上获客|线上成交率)/i.test(text)) return '一成系统 PM事业 线上拓客 成交 获客'; - if (/(团队.*AI智能生产力|AI智能生产力.*团队|团队.*AI生产力|AI生产力.*团队)/i.test(text)) return '一成系统 AI智能生产力 赋能团队'; - if (/(三大平台|四大Ai生态|四大生态)/i.test(text)) return '一成系统 三大平台 四大Ai生态'; - if (/(请分享.*故事.*自我介绍|故事.*自我介绍|个人故事.*自我介绍)/i.test(text)) return '一成系统 PM事业 故事分享 自我介绍 软件赋能'; if (/(一成AI|AI落地|ai落地|转观念|落地对比)/i.test(text)) return '2026一成Ai落地对比与转观念'; - if (/(传销|骗局|骗子|正规吗|合法吗|正不正规|合不合法|是不是传销|直销还是传销|层级分销|非法集资|拉人头|下线|发展下线|报单|人头费)/i.test(text)) return '德国PM 1993年 创立 100多个国家 FitLine 公司介绍 邓白氏 99分 AAA+ 合法直销'; - if (/(好转反应|整应反应|排毒反应|副作用|不良反应|皮肤发痒)/i.test(text)) return 'PM产品整应反应好转反应解析'; - if (/(促销活动|促销|优惠|打折|活动分数|5\+1)/i.test(text)) return '促销活动 5+1活动分数'; - if (/暖炉原理/i.test(text)) return '火炉原理'; - if (/(CC套装|CC胶囊)/i.test(text)) return questionDimension ? `CC套装 CC胶囊 ${questionDimension[0]}` : 'CC套装 CC胶囊'; - if (/(IB5|口腔免疫喷雾)/i.test(text)) return questionDimension ? `IB5 口腔免疫喷雾 ${questionDimension[0]}` : 'IB5 口腔免疫喷雾'; - if (/(Q10|辅酵素|氧修护)/i.test(text)) return questionDimension ? `Q10 辅酵素 氧修护 ${questionDimension[0]}` : 'Q10 辅酵素 氧修护'; - if (/(Med Dental\+|Dental\+|草本护理牙膏)/i.test(text)) return questionDimension ? `Med Dental+ 草本护理牙膏 ${questionDimension[0]}` : 'Med Dental+ 草本护理牙膏'; - if (/(Men Face|全效男士护肤抗衰乳霜)/i.test(text)) return questionDimension ? `Men Face 全效男士护肤抗衰乳霜 ${questionDimension[0]}` : 'Men Face 全效男士护肤抗衰乳霜'; - if (/(CC-Cell|CC Cell|CC乳霜)/i.test(text)) return questionDimension ? `CC-Cell 胶囊 乳霜 ${questionDimension[0]}` : 'CC-Cell 胶囊 乳霜'; - if (/(D-Drink|小绿排毒饮|14天排毒D饮料Plus)/i.test(text)) return questionDimension ? `D-Drink 小绿排毒饮 14天排毒D饮料Plus ${questionDimension[0]}` : 'D-Drink 小绿排毒饮 14天排毒D饮料Plus'; - if (/(ProShape|ProShape® Amino|氨基酸|支链氨基酸|BCAA)/i.test(text)) return questionDimension ? `ProShape Amino 氨基酸 BCAA ${questionDimension[0]}` : 'ProShape Amino 氨基酸 BCAA'; - if (/(Herbal Tea|草本茶)/i.test(text)) return questionDimension ? `Herbal Tea 草本茶 ${questionDimension[0]}` : 'Herbal Tea 草本茶'; - if (/(Hair\+|med Hair\+|口服发宝|外用发健)/i.test(text)) return questionDimension ? `Hair+ med Hair+ 口服发宝 外用发健 ${questionDimension[0]}` : 'Hair+ med Hair+ 口服发宝 外用发健'; - if (/(Fitness-Drink|运动饮料健康饮品|运动饮料)/i.test(text)) return questionDimension ? `Fitness-Drink 运动饮料健康饮品 ${questionDimension[0]}` : 'Fitness-Drink 运动饮料健康饮品'; - if (/(TopShape|孅萃TopShape纤萃减肥|纤萃减肥)/i.test(text)) return questionDimension ? `TopShape 孅萃TopShape纤萃减肥 ${questionDimension[0]}` : 'TopShape 孅萃TopShape纤萃减肥'; - if (/(Generation 50\+|乐活50\+)/i.test(text)) return questionDimension ? `乐活50+ Generation 50+ ${questionDimension[0]}` : '乐活50+ Generation 50+'; - if (/(Apple Antioxy|苹果细胞抗氧素|Antioxy|Zellschutz|细胞抗氧素)/i.test(text)) return questionDimension ? `Apple Antioxy Zellschutz 细胞抗氧素 ${questionDimension[0]}` : 'Apple Antioxy Zellschutz 细胞抗氧素'; - if (/Women\+/i.test(text)) return questionDimension ? `Women+ ${questionDimension[0]}` : 'Women+'; - if (/乐活奶昔|乐活/i.test(text)) return questionDimension ? `乐活奶昔 ${questionDimension[0]}` : '乐活奶昔'; - if (/(乳清蛋白|蛋白粉)/i.test(text)) return questionDimension ? `乳清蛋白粉 ${questionDimension[0]}` : '乳清蛋白粉'; - if (/(乳酪煲|乳酪饮品|乳酪)/i.test(text)) return questionDimension ? `乳酪煲 乳酪饮品 ${questionDimension[0]}` : '乳酪煲 乳酪饮品'; - if (/(基础二合一|二合一)/i.test(text)) return questionDimension ? `基础二合一 ${questionDimension[0]}` : '基础二合一'; - if (/倍力健/i.test(text)) return questionDimension ? `倍力健 ${questionDimension[0]}` : '倍力健'; - if (/(关节套装|关节舒缓)/i.test(text)) return questionDimension ? `关节套装 关节舒缓膏 ${questionDimension[0]}` : '关节套装 关节舒缓膏'; - if (/(男士乳霜|男士护肤)/i.test(text)) return questionDimension ? `全效男士乳霜 ${questionDimension[0]}` : '全效男士乳霜'; - if (/(去角质|面膜)/i.test(text)) return questionDimension ? `去角质面膜 ${questionDimension[0]}` : '去角质面膜'; - if (/发宝/i.test(text)) return questionDimension ? `发宝 ${questionDimension[0]}` : '发宝'; - if (/叶黄素/i.test(text)) return questionDimension ? `叶黄素 ${questionDimension[0]}` : '叶黄素'; - if (/(奶昔)/i.test(text)) return questionDimension ? `奶昔 ${questionDimension[0]}` : '奶昔'; - if (/(健康饮品)/i.test(text)) return questionDimension ? `健康饮品 ${questionDimension[0]}` : '健康饮品'; - // 第二层:当前文本是追问/代词,才通过上下文推断主题 - const isFollowUp = /^(这个|那个|它|它的|他|他的|该|这款|那款|详细|继续|怎么|为什么|适合谁|什么意思|怎么用|怎么吃|功效|成分|好处|原理|规格|包装|剂型|形态|一天几次|每天几次|每日几次)/.test(text); - if (isFollowUp) { - if (/(基础三合一|三合一基础套|基础套装|大白小红小白)/i.test(recentContextText)) return questionDimension ? `德国PM细胞营养素 基础套装 大白 小红 小白 ${questionDimension[0]}` : '德国PM细胞营养素 基础套装 大白 小红 小白'; - if (/(身未动,?梦已成|批发式晋级)/i.test(recentContextText)) return '一成系统 身未动梦已成 批发式晋级 三大平台 四大Ai生态'; - if (/行动圈/i.test(recentContextText)) return '一成系统 行动圈 数字化工作室 团队管理 目标考核'; - if (/盟主社区/i.test(recentContextText)) return '一成系统 盟主社区 AI众享 社区盟主 引流 转化'; - if (/(一成系统|Ai众享|数字化工作室|盛咖学愿)/i.test(recentContextText)) return '一成系统 德国PM事业发展的强大赋能工具 三大平台 四大Ai生态'; - if (/DSN/i.test(recentContextText)) return '德国PM DSN 全球100强 欧洲第1'; - if (/(ELAB|科隆名单|Halal|GMP)/i.test(recentContextText)) return '德国PM ELAB 科隆名单 Halal GMP 安全认证'; - if (/(邓白氏|AAA\+)/i.test(recentContextText)) return '德国PM 邓白氏 AAA+ 99分'; - if (/(宣明会|世界宣明会)/i.test(recentContextText)) return '德国PM 宣明会 世界宣明会 慈善合作'; - if (/(Rolf Sorg|斯派尔|Speyer|卢森堡)/i.test(recentContextText)) return '德国PM Rolf Sorg 斯派尔 卢森堡 总部 公司介绍'; - if (/(培安|烟台)/i.test(recentContextText)) return '德国PM 培安 烟台 中国市场投资'; - if (/(小红产品|小红|Activize)/i.test(recentContextText)) return questionDimension ? `Fitline小红产品 Activize ${questionDimension[0]}` : 'Fitline小红产品提升能量原理'; - if (/(大白产品|大白|Basics)/i.test(recentContextText)) return questionDimension ? `德国PM细胞营养素 大白 Basics ${questionDimension[0]}` : '德国PM细胞营养素 大白 Basics'; - if (/(小白产品|小白|Restorate)/i.test(recentContextText)) return questionDimension ? `德国PM细胞营养素 小白 Restorate ${questionDimension[0]}` : '德国PM细胞营养素 小白'; - if (/儿童倍适/i.test(recentContextText)) return questionDimension ? `儿童倍适 ${questionDimension[0]}` : '儿童倍适'; - if (/火炉原理/i.test(recentContextText)) return '火炉原理'; - if (/(阿育吠陀|Ayurveda)/i.test(recentContextText)) return '阿育吠陀医学原理'; - if (/(NTC营养保送系统)/i.test(recentContextText)) return 'NTC营养保送系统'; - if (/(Med Dental\+|草本护理牙膏)/i.test(recentContextText)) return questionDimension ? `Med Dental+ 草本护理牙膏 ${questionDimension[0]}` : 'Med Dental+ 草本护理牙膏'; - if (/(Men Face|全效男士护肤抗衰乳霜)/i.test(recentContextText)) return questionDimension ? `Men Face 全效男士护肤抗衰乳霜 ${questionDimension[0]}` : 'Men Face 全效男士护肤抗衰乳霜'; - if (/(CC-Cell|CC胶囊|CC乳霜)/i.test(recentContextText)) return questionDimension ? `CC-Cell 胶囊 乳霜 ${questionDimension[0]}` : 'CC-Cell 胶囊 乳霜'; - if (/(D-Drink|小绿排毒饮|14天排毒D饮料Plus)/i.test(recentContextText)) return questionDimension ? `D-Drink 小绿排毒饮 14天排毒D饮料Plus ${questionDimension[0]}` : 'D-Drink 小绿排毒饮 14天排毒D饮料Plus'; - if (/(ProShape|氨基酸|BCAA)/i.test(recentContextText)) return questionDimension ? `ProShape Amino 氨基酸 BCAA ${questionDimension[0]}` : 'ProShape Amino 氨基酸 BCAA'; - if (/(Herbal Tea|草本茶)/i.test(recentContextText)) return questionDimension ? `Herbal Tea 草本茶 ${questionDimension[0]}` : 'Herbal Tea 草本茶'; - if (/(Hair\+|med Hair\+|口服发宝|外用发健)/i.test(recentContextText)) return questionDimension ? `Hair+ med Hair+ 口服发宝 外用发健 ${questionDimension[0]}` : 'Hair+ med Hair+ 口服发宝 外用发健'; - if (/(Fitness-Drink|运动饮料健康饮品|运动饮料)/i.test(recentContextText)) return questionDimension ? `Fitness-Drink 运动饮料健康饮品 ${questionDimension[0]}` : 'Fitness-Drink 运动饮料健康饮品'; - if (/(TopShape|孅萃TopShape纤萃减肥|纤萃减肥)/i.test(recentContextText)) return questionDimension ? `TopShape 孅萃TopShape纤萃减肥 ${questionDimension[0]}` : 'TopShape 孅萃TopShape纤萃减肥'; - if (/(Generation 50\+|乐活50\+)/i.test(recentContextText)) return questionDimension ? `乐活50+ Generation 50+ ${questionDimension[0]}` : '乐活50+ Generation 50+'; - if (/(Apple Antioxy|苹果细胞抗氧素|Antioxy|Zellschutz|细胞抗氧素)/i.test(recentContextText)) return questionDimension ? `Apple Antioxy Zellschutz 细胞抗氧素 ${questionDimension[0]}` : 'Apple Antioxy Zellschutz 细胞抗氧素'; - } - return ''; + // === 敏感话题兜底(必须精确控制回复内容) === + if (/(传销|骗局|骗子|正规吗|合法吗|正不正规|合不合法|是不是传销|直销还是传销|层级分销|非法集资|拉人头|下线|发展下线|报单|人头费)/i.test(text)) return '德国PM 1993年 创立 100多个国家 FitLine 公司介绍 邓白氏 99分 AAA+ 合法直销'; + + // === 别名纠正(向量检索不认的别名) === + if (/暖炉原理/i.test(text)) return '火炉原理'; + + // 所有其它查询(产品/公司/认证/培训等):不做确定性改写 + // 依赖 normalizeKnowledgeQueryAlias(别名归一化)+ enrichQueryWithContext(上下文补充)+ VikingDB + reranker + return null; } static applyKnowledgeQueryAnchor(query) { @@ -560,15 +482,39 @@ class ToolExecutor { .replace(/Basics/gi, 'Basics') .replace(/基础三合一|三合一基础套|大白小红小白|基础套装?/g, 'PM细胞营养素 基础套装') .replace(/儿童倍适|儿童产品/g, '儿童倍适') + .replace(/小红精华液/g, 'Activize Serum 小红精华液') .replace(/小红产品/g, '小红产品 Activize Oxyplus') .replace(/大白产品/g, '大白产品 Basics') .replace(/小白产品/g, '小白产品 Restorate') - .replace(/(? id.trim()).filter(Boolean); + const kbTarget = { datasetIds: allDatasetIds, matchedRoutes: ['all'] }; const effectiveQuery = rewrittenQuery || query; if (rewrittenQuery && rewrittenQuery !== query) { console.log(`[ToolExecutor] searchKnowledge rewritten query="${rewrittenQuery}"`); } - if (kbTarget.datasetIds.length > 0) { - console.log(`[ToolExecutor] searchKnowledge selected dataset_ids=${kbTarget.datasetIds.join(',')} routes=${kbTarget.matchedRoutes.join(',')}`); - } + console.log(`[ToolExecutor] searchKnowledge full-scan all ${allDatasetIds.length} collections`); const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS; if (kbIds && kbIds !== 'your_knowledge_base_dataset_id') { @@ -758,12 +705,13 @@ class ToolExecutor { }; } try { - // 缓存检查:相同effectiveQuery + datasetIds命中缓存时直接返回,避免重复API调用 + // 缓存检查:优先 Redis,降级内存 Map(skipCache 时跳过) const cacheKey = getKbCacheKey(effectiveQuery, kbTarget.datasetIds, profileScope); - const cached = getKbCache(cacheKey); + const redisCached = skipCache ? null : await redisClient.getKbCache(cacheKey).catch(() => null); + const cached = skipCache ? null : (redisCached || getKbCache(cacheKey)); if (cached) { const latencyMs = Date.now() - startTime; - console.log(`[ToolExecutor] Ark KB cache hit in ${latencyMs}ms key="${cacheKey.slice(0, 60)}"`); + console.log(`[ToolExecutor] Ark KB cache hit in ${latencyMs}ms key="${cacheKey.slice(0, 60)}" source=${redisCached ? 'redis' : 'memory'}`); return { ...cached, original_query: query, @@ -774,12 +722,45 @@ class ToolExecutor { cache_hit: true, }; } - console.log('[ToolExecutor] Trying Ark Knowledge Search...'); - const arkResult = await this.searchArkKnowledge(effectiveQuery, context, responseMode, kbTarget.datasetIds, query, assistantProfile); + + // 根据检索模式选择链路 + const retrievalMode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; + let arkResult; + + if (retrievalMode === 'raw') { + // ★ 新链路:纯检索 + 重排,不经 LLM 加工 + console.log('[ToolExecutor] Using RAW retrieval mode (kbRetriever)'); + const rawResult = await kbRetriever.searchAndRerank(effectiveQuery, { + datasetIds: kbTarget.datasetIds, + sessionId: session_id, + session: _session, + originalQuery: query, + }); + // 转换为与旧格式兼容的结构 + arkResult = { + query: rawResult.query, + results: rawResult.ragPayload.length > 0 + ? rawResult.ragPayload.map(item => ({ title: item.title, content: item.content })) + : [{ title: '未找到', content: `知识库中暂未找到与"${query}"直接相关的信息,请换个更具体的问法再试。` }], + total: rawResult.ragPayload.length, + source: 'ark_knowledge', + hit: rawResult.hit, + reason: rawResult.reason, + retrieval_mode: 'raw', + top_score: rawResult.topScore, + chunks_count: rawResult.rerankedChunks?.length || 0, + }; + } else { + // 旧链路:LLM 加工模式 + console.log('[ToolExecutor] Using ANSWER retrieval mode (searchArkKnowledge)'); + arkResult = await this.searchArkKnowledge(effectiveQuery, context, responseMode, kbTarget.datasetIds, query, assistantProfile); + } + const latencyMs = Date.now() - startTime; - console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms`); - // 缓存所有结果(hit用5分钟TTL,no-hit用2分钟TTL),避免重复API调用 + console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms mode=${retrievalMode}`); + // 缓存到 Redis + 内存双写 setKbCache(cacheKey, arkResult); + redisClient.setKbCache(cacheKey, arkResult).catch(() => {}); return { ...arkResult, original_query: query, diff --git a/test2/server/tests/_check_logs.cjs b/test2/server/tests/_check_logs.cjs new file mode 100644 index 0000000..49901c3 --- /dev/null +++ b/test2/server/tests/_check_logs.cjs @@ -0,0 +1,92 @@ +const { Client } = require('ssh2'); +const SSH_CONFIG = { host: '119.45.10.34', port: 22, username: 'root', password: '#xyzh%CS#2512@28' }; + +function sshExec(conn, cmd) { + return new Promise((resolve, reject) => { + conn.exec(cmd, (err, s) => { + if (err) return reject(err); + let o = ''; + s.on('data', d => o += d); + s.stderr.on('data', d => o += d); + s.on('close', () => resolve(o)); + }); + }); +} + +const fs = require('fs'); +const path = require('path'); + +function sftpUpload(conn, localPath, remotePath) { + return new Promise((resolve, reject) => { + conn.sftp((err, sftp) => { + if (err) return reject(err); + const rs = fs.createReadStream(localPath); + const ws = sftp.createWriteStream(remotePath); + ws.on('close', () => resolve()); + ws.on('error', reject); + rs.pipe(ws); + }); + }); +} + +const REMOTE_BASE = '/www/wwwroot/demo.tensorgrove.com.cn/server'; +const LOCAL_BASE = path.join(__dirname, '..'); + +async function main() { + const conn = new Client(); + await new Promise((resolve, reject) => { + conn.on('ready', resolve).on('error', reject).connect(SSH_CONFIG); + }); + + // 1. 上传文件 + const files = [ + { name: 'toolExecutor.js', sub: 'services' }, + { name: 'kbRetriever.js', sub: 'services' }, + ]; + console.log('=== 上传 ==='); + for (const f of files) { + const localFile = path.join(LOCAL_BASE, f.sub, f.name); + const remoteFile = `${REMOTE_BASE}/${f.sub}/${f.name}`; + await sftpUpload(conn, localFile, remoteFile); + console.log(` \u2705 ${f.name}`); + const sc = await sshExec(conn, `node -c ${remoteFile} 2>&1`); + if (sc.includes('SyntaxError')) { console.log('SYNTAX ERROR!'); conn.end(); process.exit(1); } + } + + // 2. 刷 Redis KB 缓存 + console.log('\n=== 刷 Redis KB 缓存 ==='); + console.log(await sshExec(conn, `cd ${REMOTE_BASE} && node -e " + require('dotenv').config(); + const Redis = require('ioredis'); + const r = new Redis(process.env.REDIS_URL || 'redis://127.0.0.1:6379', { + password: process.env.REDIS_PASSWORD || undefined, + db: parseInt(process.env.REDIS_DB) || 0, + keyPrefix: process.env.REDIS_KEY_PREFIX || 'bigwo:', + lazyConnect: true, maxRetriesPerRequest: 2, connectTimeout: 5000, + }); + r.connect().then(async () => { + const keys = await r.keys('kb_cache:*'); + if (keys.length > 0) { await r.del(...keys); console.log('Deleted ' + keys.length + ' keys'); } + else { console.log('No keys'); } + r.quit(); process.exit(0); + }).catch(e => { console.log('Redis: ' + e.message); process.exit(0); }); + " 2>&1`)); + + // 3. 重启 + await sshExec(conn, '> /var/log/bigwo/server-error.log && > /var/log/bigwo/server-out.log'); + await sshExec(conn, 'pm2 stop bigwo-server'); + await new Promise(r => setTimeout(r, 1000)); + await sshExec(conn, `cd ${REMOTE_BASE} && pm2 start bigwo-server --update-env`); + console.log('\n=== PM2 重启,等待5s ==='); + await new Promise(r => setTimeout(r, 5000)); + console.log(await sshExec(conn, 'pm2 status bigwo-server')); + const errLog = await sshExec(conn, 'cat /var/log/bigwo/server-error.log'); + console.log('=== 错误 ==='); + console.log(errLog || '(空 ✅)'); + console.log('\n=== 健康 ==='); + console.log(await sshExec(conn, 'curl -s --max-time 5 http://localhost:3012/api/health 2>&1')); + + conn.end(); +} + +main().catch(e => { console.error('FAILED:', e.message); process.exit(1); }); diff --git a/test2/server/tests/_test_alias_ab.cjs b/test2/server/tests/_test_alias_ab.cjs new file mode 100644 index 0000000..ba80f34 --- /dev/null +++ b/test2/server/tests/_test_alias_ab.cjs @@ -0,0 +1,43 @@ +/** + * A/B 测试:原始查询 vs 别名扩展后查询的检索效果对比 + * 直接调用 VikingDB + reranker,比较 topScore 和 hit 状态 + */ +require('dotenv').config({ path: require('path').join(__dirname, '..', '.env') }); +const kbRetriever = require('../services/kbRetriever'); + +const TEST_QUERIES = [ + // 中文俗名 → 中文全名(语义接近) + { raw: '牙膏怎么用', alias: '草本护理牙膏 Med Dental+怎么用', label: '牙膏(俗名→全名)' }, + { raw: '喷雾功效', alias: 'IB5 口腔免疫喷雾功效', label: '喷雾(俗名→全名)' }, + { raw: '乳酪怎么喝', alias: '乳酪煲 乳酪饮品怎么喝', label: '乳酪(俗名→全名)' }, + // 中文昵称 → 英文产品名(语义无关联) + { raw: '小红怎么吃', alias: '小红产品 Activize Oxyplus怎么吃', label: '小红(昵称→英文名)' }, + { raw: '大白功效', alias: '大白产品 Basics功效', label: '大白(昵称→英文名)' }, + { raw: '小绿排毒', alias: 'D-Drink 小绿 排毒饮排毒', label: '小绿(昵称→英文名)' }, + { raw: '小黑适合谁', alias: 'MEN+ 倍力健 小黑适合谁', label: '小黑(昵称→英文名)' }, + // 通用词 → 特定产品 + { raw: '氨基酸', alias: 'ProShape Amino 氨基酸', label: '氨基酸(通用→产品)' }, + { raw: '胶原蛋白', alias: '胶原蛋白肽', label: '胶原蛋白(通用→产品)' }, + { raw: '细胞抗氧素功效', alias: 'Zellschutz 细胞抗氧素功效', label: '细胞抗氧素(中→英)' }, +]; + +async function runTest() { + console.log('=== A/B 测试:原始查询 vs 别名扩展 ===\n'); + console.log('| 场景 | 原始 topScore | 扩展 topScore | 差值 | 原始hit | 扩展hit |'); + console.log('|------|-------------|-------------|------|---------|---------|'); + + for (const t of TEST_QUERIES) { + const rawRes = await kbRetriever.searchAndRerank(t.raw, {}); + await new Promise(r => setTimeout(r, 1500)); + const aliasRes = await kbRetriever.searchAndRerank(t.alias, {}); + await new Promise(r => setTimeout(r, 1500)); + const rawScore = rawRes.topScore?.toFixed(3) || '0.000'; + const aliasScore = aliasRes.topScore?.toFixed(3) || '0.000'; + const diff = ((aliasRes.topScore || 0) - (rawRes.topScore || 0)).toFixed(3); + const diffStr = diff > 0 ? `+${diff}` : diff; + console.log(`| ${t.label} | ${rawScore} | ${aliasScore} | ${diffStr} | ${rawRes.hit ? 'HIT' : 'MISS'} | ${aliasRes.hit ? 'HIT' : 'MISS'} |`); + } + console.log('\n阈值: reranker hit >= 0.1'); +} + +runTest().catch(e => { console.error('FAILED:', e.message); process.exit(1); }); diff --git a/test2/server/tests/test_kb_retriever.js b/test2/server/tests/test_kb_retriever.js new file mode 100644 index 0000000..9a5495d --- /dev/null +++ b/test2/server/tests/test_kb_retriever.js @@ -0,0 +1,322 @@ +/** + * kbRetriever.js 单元测试 + * 覆盖:配置读取、rerankChunks降级、buildRagPayload组装、hit/no-hit判断 + * 纯本地测试,不依赖外部API + * + * 运行方式: node --test tests/test_kb_retriever.js + */ +const { describe, it, beforeEach, afterEach } = require('node:test'); +const assert = require('node:assert/strict'); + +// 设置测试环境变量(在require之前) +const ENV_BACKUP = {}; +function setEnv(overrides) { + for (const [k, v] of Object.entries(overrides)) { + ENV_BACKUP[k] = process.env[k]; + process.env[k] = v; + } +} +function restoreEnv() { + for (const [k, v] of Object.entries(ENV_BACKUP)) { + if (v === undefined) delete process.env[k]; + else process.env[k] = v; + } +} + +// 设置基本环境变量避免模块加载出错 +setEnv({ + VOLC_ARK_API_KEY: 'test_key', + VOLC_ARK_ENDPOINT_ID: 'test_endpoint', + VOLC_ARK_KNOWLEDGE_BASE_IDS: 'ds_test1,ds_test2', + VOLC_ARK_RERANKER_ENDPOINT_ID: 'reranker_test', + VOLC_ARK_RERANKER_TOP_N: '3', + VOLC_ARK_KB_RETRIEVAL_TOP_K: '10', + VOLC_ARK_KNOWLEDGE_THRESHOLD: '0.1', + ENABLE_RERANKER: 'true', + ENABLE_REDIS_CONTEXT: 'false', // 测试中不连Redis +}); + +const kbRetriever = require('../services/kbRetriever'); + +// ================================================================ +// 1. getConfig — 配置读取 +// ================================================================ +describe('kbRetriever.getConfig — 配置读取', () => { + afterEach(() => restoreEnv()); + + it('应正确读取所有配置项', () => { + const config = kbRetriever.getConfig(); + assert.equal(config.authKey, 'test_key'); + assert.equal(config.rerankerTopN, 3); + assert.equal(config.retrievalTopK, 10); + assert.equal(config.enableReranker, true); + assert.equal(config.enableRedisContext, false); + assert.ok(config.kbIds.includes('ds_test1')); + assert.ok(config.kbIds.includes('ds_test2')); + }); + + it('ENABLE_RERANKER=false 应正确关闭', () => { + setEnv({ ENABLE_RERANKER: 'false' }); + const config = kbRetriever.getConfig(); + assert.equal(config.enableReranker, false); + }); + + it('无 RERANKER_MODEL 时应默认为 doubao-seed-rerank', () => { + setEnv({ VOLC_ARK_RERANKER_MODEL: '', VOLC_ARK_RERANKER_ENDPOINT_ID: '' }); + const config = kbRetriever.getConfig(); + assert.equal(config.rerankerModel, 'doubao-seed-rerank'); + }); + + it('retrievalMode 默认应为 raw', () => { + setEnv({ VOLC_ARK_KB_RETRIEVAL_MODE: 'raw' }); + const config = kbRetriever.getConfig(); + assert.equal(config.retrievalMode, 'raw'); + }); + + it('retrievalMode 为空时默认 raw', () => { + setEnv({ VOLC_ARK_KB_RETRIEVAL_MODE: '' }); + const config = kbRetriever.getConfig(); + // 空字符串 || 'raw' → 'raw'... 不对,实际是空字符串是falsy + // 代码: process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'raw' + assert.equal(config.retrievalMode, 'raw'); + }); + + it('dataset_ids 分割应正确处理空格和逗号', () => { + setEnv({ VOLC_ARK_KNOWLEDGE_BASE_IDS: ' ds_a , ds_b , ds_c ' }); + const config = kbRetriever.getConfig(); + assert.deepEqual(config.kbIds, ['ds_a', 'ds_b', 'ds_c']); + }); +}); + +// ================================================================ +// 2. rerankChunks — 重排降级逻辑 +// ================================================================ +describe('kbRetriever.rerankChunks — 降级与边界', () => { + afterEach(() => restoreEnv()); + + it('空 chunks 应返回空数组', async () => { + const result = await kbRetriever.rerankChunks('测试', [], 3); + assert.deepEqual(result, []); + }); + + it('chunks 数量 <= topN 时应直接返回全部', async () => { + const chunks = [ + { id: '1', content: '片段1', score: 0.9 }, + { id: '2', content: '片段2', score: 0.8 }, + ]; + const result = await kbRetriever.rerankChunks('测试', chunks, 3); + assert.equal(result.length, 2, 'Should return all chunks when count <= topN'); + assert.equal(result[0].content, '片段1'); + }); + + it('ENABLE_RERANKER=false 时应返回前 topN 条(按原序)', async () => { + setEnv({ ENABLE_RERANKER: 'false' }); + const chunks = [ + { id: '1', content: 'A', score: 0.9 }, + { id: '2', content: 'B', score: 0.8 }, + { id: '3', content: 'C', score: 0.7 }, + { id: '4', content: 'D', score: 0.6 }, + { id: '5', content: 'E', score: 0.5 }, + ]; + const result = await kbRetriever.rerankChunks('测试', chunks, 3); + assert.equal(result.length, 3); + assert.equal(result[0].content, 'A'); + assert.equal(result[1].content, 'B'); + assert.equal(result[2].content, 'C'); + }); + + it('无 RERANKER_ENDPOINT_ID 时应降级为按检索排序取 topN', async () => { + setEnv({ VOLC_ARK_RERANKER_ENDPOINT_ID: '' }); + const chunks = Array.from({ length: 8 }, (_, i) => ({ + id: `c${i}`, content: `内容${i}`, score: 1 - i * 0.1, + })); + const result = await kbRetriever.rerankChunks('测试', chunks, 3); + assert.equal(result.length, 3); + assert.equal(result[0].content, '内容0', 'First chunk should be highest score'); + }); + + it('reranker API 超时/失败时应降级返回前 topN', async () => { + // 设置一个不存在的 endpoint,会导致 API 调用失败 + setEnv({ ENABLE_RERANKER: 'true', VOLC_ARK_RERANKER_ENDPOINT_ID: 'invalid_endpoint' }); + const chunks = [ + { id: '1', content: '片段1', score: 0.9 }, + { id: '2', content: '片段2', score: 0.8 }, + { id: '3', content: '片段3', score: 0.7 }, + { id: '4', content: '片段4', score: 0.6 }, + ]; + const result = await kbRetriever.rerankChunks('测试', chunks, 3); + assert.equal(result.length, 3, 'Should fallback to top 3'); + assert.equal(result[0].content, '片段1'); + }); +}); + +// ================================================================ +// 3. buildRagPayload — RAG payload 组装 +// ================================================================ +describe('kbRetriever.buildRagPayload — payload 组装', () => { + + it('无上下文时应只包含 KB 片段', () => { + const chunks = [ + { content: '片段A', doc_name: '产品手册' }, + { content: '片段B', doc_name: 'FAQ' }, + ]; + const payload = kbRetriever.buildRagPayload(chunks, []); + assert.equal(payload.length, 2, 'Should have 2 items (chunks only)'); + assert.equal(payload[0].title, '产品手册'); + assert.equal(payload[0].content, '片段A'); + assert.equal(payload[1].title, 'FAQ'); + }); + + it('有上下文时应在片段前注入上下文条目', () => { + const chunks = [{ content: '片段A', doc_name: '' }]; + const history = [ + { role: 'user', content: '小红怎么吃' }, + { role: 'assistant', content: '小红每天一包...' }, + ]; + const payload = kbRetriever.buildRagPayload(chunks, history); + assert.equal(payload.length, 2, 'Should have context + 1 chunk'); + assert.equal(payload[0].title, '对话上下文'); + assert.ok(payload[0].content.includes('用户: 小红怎么吃'), 'Context should include user message'); + assert.ok(payload[0].content.includes('助手: 小红每天一包'), 'Context should include assistant message'); + assert.equal(payload[1].content, '片段A'); + }); + + it('无 doc_name 的片段应使用默认标题"知识库片段N"', () => { + const chunks = [ + { content: '内容1', doc_name: '' }, + { content: '内容2', doc_name: '' }, + { content: '内容3', doc_name: '' }, + ]; + const payload = kbRetriever.buildRagPayload(chunks, []); + assert.equal(payload[0].title, '知识库片段1'); + assert.equal(payload[1].title, '知识库片段2'); + assert.equal(payload[2].title, '知识库片段3'); + }); + + it('空 chunks 应返回空数组(无上下文时)', () => { + const payload = kbRetriever.buildRagPayload([], []); + assert.equal(payload.length, 0); + }); + + it('空 chunks + 有上下文 应只返回上下文条目', () => { + const history = [{ role: 'user', content: '你好' }]; + const payload = kbRetriever.buildRagPayload([], history); + assert.equal(payload.length, 1); + assert.equal(payload[0].title, '对话上下文'); + }); + + it('5轮对话上下文应完整保留', () => { + const history = []; + for (let i = 1; i <= 5; i++) { + history.push({ role: 'user', content: `问题${i}` }); + history.push({ role: 'assistant', content: `回答${i}` }); + } + const payload = kbRetriever.buildRagPayload([{ content: '片段', doc_name: '' }], history); + const contextContent = payload[0].content; + for (let i = 1; i <= 5; i++) { + assert.ok(contextContent.includes(`问题${i}`), `Should include question ${i}`); + assert.ok(contextContent.includes(`回答${i}`), `Should include answer ${i}`); + } + }); +}); + +// ================================================================ +// 4. searchAndRerank — 主流程(无API调用的边界测试) +// ================================================================ +describe('kbRetriever.searchAndRerank — 主流程边界', () => { + afterEach(() => restoreEnv()); + + it('endpoint 未配置时应返回 hit=false + error', async () => { + setEnv({ VOLC_ARK_ENDPOINT_ID: 'your_ark_endpoint_id', VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: '' }); + const result = await kbRetriever.searchAndRerank('测试'); + assert.equal(result.hit, false); + assert.ok(result.reason, 'Should have reason'); + assert.equal(result.source, 'ark_knowledge'); + }); + + it('无 dataset_ids 时应返回 hit=false', async () => { + setEnv({ VOLC_ARK_KNOWLEDGE_BASE_IDS: '' }); + const result = await kbRetriever.searchAndRerank('测试'); + assert.equal(result.hit, false); + }); + + it('返回结构应包含所有必需字段(或抛出可捕获的异常)', async () => { + // 使用假 endpoint,API 调用会失败 + setEnv({ + VOLC_ARK_ENDPOINT_ID: 'ep_test', + VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: 'ep_test', + VOLC_ARK_KNOWLEDGE_BASE_IDS: 'ds_test', + }); + + try { + const result = await kbRetriever.searchAndRerank('测试查询'); + // 如果返回了结果(非抛出),验证结构 + assert.ok('hit' in result, 'Should have hit field'); + assert.ok('reason' in result, 'Should have reason field'); + assert.ok('source' in result, 'Should have source field'); + assert.ok('latencyMs' in result, 'Should have latencyMs field'); + assert.equal(result.source, 'ark_knowledge'); + } catch (err) { + // API 调用失败抛出异常也是合理行为(由上层 toolExecutor catch 处理) + assert.ok(err instanceof Error, 'Should throw an Error instance'); + console.log(` ℹ️ searchAndRerank threw as expected: ${err.message.slice(0, 80)}`); + } + }); +}); + +// ================================================================ +// 5. hit/no-hit 判定逻辑 +// ================================================================ +describe('hit/no-hit 判定 — 基于 reranker score', () => { + afterEach(() => restoreEnv()); + + it('buildRagPayload 有片段 + score > 0.3 应判为 hit(通过 searchAndRerank 返回值验证)', () => { + // 直接验证判定逻辑 + const highScoreChunks = [{ content: '有效内容', score: 0.8, doc_name: '' }]; + const payload = kbRetriever.buildRagPayload(highScoreChunks, []); + assert.ok(payload.length > 0, 'High score chunks should produce payload'); + assert.ok(highScoreChunks[0].score >= 0.3, 'Score 0.8 >= 0.3 should be hit'); + }); + + it('score < 0.3 的片段应判为 no-hit', () => { + const lowScoreChunks = [{ content: '弱相关内容', score: 0.1, doc_name: '' }]; + assert.ok(lowScoreChunks[0].score < 0.3, 'Score 0.1 < 0.3 should be no-hit'); + }); + + it('无重排器时 hitThreshold 应为 0.5', () => { + setEnv({ ENABLE_RERANKER: 'false' }); + // 验证逻辑:无重排器时,0.4的分数应该不算hit(阈值0.5) + const config = kbRetriever.getConfig(); + const hitThreshold = config.enableReranker && config.rerankerModel ? 0.3 : 0.5; + assert.equal(hitThreshold, 0.5, 'Without reranker, threshold should be 0.5'); + }); + + it('有重排器时 hitThreshold 应为 0.3', () => { + setEnv({ ENABLE_RERANKER: 'true', VOLC_ARK_RERANKER_MODEL: 'doubao-seed-rerank' }); + const config = kbRetriever.getConfig(); + const hitThreshold = config.enableReranker && config.rerankerModel ? 0.3 : 0.5; + assert.equal(hitThreshold, 0.3, 'With reranker, threshold should be 0.3'); + }); +}); + +// ================================================================ +// 6. retrieveChunks — 解析逻辑(模拟response) +// ================================================================ +describe('retrieveChunks — 边界', () => { + afterEach(() => restoreEnv()); + + it('endpoint 未配置时应返回 error', async () => { + setEnv({ VOLC_ARK_ENDPOINT_ID: 'your_ark_endpoint_id', VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: '' }); + const result = await kbRetriever.retrieveChunks('测试', ['ds1'], 5, 0.1); + assert.equal(result.error, 'endpoint_not_configured'); + assert.equal(result.chunks.length, 0); + }); + + it('无 datasetIds 且环境变量也为空时应返回 error', async () => { + setEnv({ VOLC_ARK_KNOWLEDGE_BASE_IDS: '', VOLC_ARK_ENDPOINT_ID: 'ep_valid', VOLC_ARK_KNOWLEDGE_ENDPOINT_ID: 'ep_valid' }); + const result = await kbRetriever.retrieveChunks('测试', [], 5, 0.1); + assert.equal(result.error, 'no_dataset_ids'); + }); +}); + +console.log('\n=== kbRetriever 测试加载完成 ===\n'); diff --git a/test2/server/tests/test_raw_mode_integration.js b/test2/server/tests/test_raw_mode_integration.js new file mode 100644 index 0000000..0d018e2 --- /dev/null +++ b/test2/server/tests/test_raw_mode_integration.js @@ -0,0 +1,331 @@ +/** + * raw 模式集成测试 + * 覆盖:resolveReply raw/answer 模式切换、ragItems 格式、字幕过滤、缓存双写 + * + * 运行方式: node --test tests/test_raw_mode_integration.js + */ +const { describe, it, beforeEach, afterEach } = require('node:test'); +const assert = require('node:assert/strict'); +const path = require('path'); +const fs = require('fs'); + +// 加载 .env +const envPath = path.join(__dirname, '../.env'); +if (fs.existsSync(envPath)) { + fs.readFileSync(envPath, 'utf8').split('\n').forEach(line => { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) return; + const idx = trimmed.indexOf('='); + if (idx > 0) { + const key = trimmed.slice(0, idx).trim(); + let val = trimmed.slice(idx + 1).trim(); + if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) { + val = val.slice(1, -1); + } + if (!process.env[key]) process.env[key] = val; + } + }); +} + +const { normalizeTextForSpeech } = require('../services/realtimeDialogRouting'); + +// ================================================================ +// 1. ragItems 格式验证 — raw 模式 vs answer 模式 +// ================================================================ +describe('ragItems 格式 — raw 模式 vs answer 模式', () => { + + it('raw 模式 toolResult 应包含 retrieval_mode 标记', () => { + const rawToolResult = { + query: '小红怎么吃', + results: [ + { title: '对话上下文', content: '用户: 你好\n助手: 你好!' }, + { title: '产品手册', content: '小红Activize Oxyplus...' }, + { title: 'FAQ', content: '小红每天一包...' }, + { title: '知识库片段3', content: '用40度以下温水冲服...' }, + ], + total: 4, + source: 'ark_knowledge', + hit: true, + reason: 'reranked_hit', + retrieval_mode: 'raw', + top_score: 0.85, + chunks_count: 3, + }; + + assert.equal(rawToolResult.retrieval_mode, 'raw'); + assert.equal(rawToolResult.results.length, 4, 'Should have context + 3 chunks'); + assert.equal(rawToolResult.results[0].title, '对话上下文', 'First item should be context'); + }); + + it('answer 模式 toolResult 不应包含 retrieval_mode 或为 undefined', () => { + const answerToolResult = { + query: '小红怎么吃', + results: [{ title: '方舟知识库检索结果', content: '根据知识库信息,小红Activize每天一包...' }], + total: 1, + source: 'ark_knowledge', + hit: true, + reason: 'classified_hit', + }; + + assert.ok(!answerToolResult.retrieval_mode || answerToolResult.retrieval_mode !== 'raw'); + }); + + it('raw 模式下 ragItems 应透传多条而非合并为单条', () => { + const rawResults = [ + { title: '对话上下文', content: '用户: 大白怎么吃\n助手: ...' }, + { title: '产品手册', content: '片段1内容' }, + { title: 'FAQ', content: '片段2内容' }, + ]; + + const isRawMode = true; + let finalRagItems; + if (isRawMode) { + finalRagItems = rawResults; + } else { + const replyText = rawResults.map(r => r.content).join(' '); + finalRagItems = [{ title: '知识库结果', content: replyText }]; + } + + assert.equal(finalRagItems.length, 3, 'Raw mode should keep 3 items'); + assert.equal(finalRagItems[0].title, '对话上下文'); + assert.equal(finalRagItems[1].title, '产品手册'); + }); + + it('answer 模式下 ragItems 应合并为单条并清理前缀', () => { + const replyText = '根据知识库信息,小红Activize每天一包冲服'; + const isRawMode = false; + + let finalRagItems; + if (!isRawMode) { + const speechText = normalizeTextForSpeech(replyText); + const cleanedText = speechText.replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, ''); + finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }]; + } + + assert.equal(finalRagItems.length, 1, 'Answer mode should merge into 1 item'); + assert.ok(!finalRagItems[0].content.startsWith('根据知识库信息'), 'Should strip prefix'); + assert.ok(finalRagItems[0].content.includes('小红Activize'), 'Should preserve core content'); + }); +}); + +// ================================================================ +// 2. 字幕过滤 — raw 模式排除"对话上下文" +// ================================================================ +describe('字幕过滤 — raw 模式排除对话上下文', () => { + + it('raw 模式的 ragContent 过滤后字幕不应包含上下文', () => { + const ragContent = [ + { title: '对话上下文', content: '用户: 你好\n助手: 你好' }, + { title: '产品手册', content: '小红每天一包' }, + { title: 'FAQ', content: '40度以下温水' }, + ]; + + const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文'); + const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent) + .map((item) => item.content).join(' '); + + assert.ok(!ragSubtitleText.includes('用户: 你好'), 'Subtitle should not include context'); + assert.ok(ragSubtitleText.includes('小红每天一包'), 'Subtitle should include KB content'); + assert.ok(ragSubtitleText.includes('40度以下温水'), 'Subtitle should include all KB chunks'); + }); + + it('answer 模式无"对话上下文"条目,字幕应正常', () => { + const ragContent = [ + { title: '知识库结果', content: '小红Activize每天一包冲服' }, + ]; + + const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文'); + const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent) + .map((item) => item.content).join(' '); + + assert.equal(ragSubtitleText, '小红Activize每天一包冲服'); + }); + + it('全部都是上下文条目时应降级显示全部(兜底)', () => { + const ragContent = [ + { title: '对话上下文', content: '用户: 测试' }, + ]; + + const subtitleItems = ragContent.filter((item) => item.title !== '对话上下文'); + const ragSubtitleText = (subtitleItems.length > 0 ? subtitleItems : ragContent) + .map((item) => item.content).join(' '); + + assert.equal(ragSubtitleText, '用户: 测试', 'Should fallback to showing context'); + }); +}); + +// ================================================================ +// 3. 环境变量模式切换 +// ================================================================ +describe('环境变量模式切换', () => { + const ENV_ORIG = {}; + + beforeEach(() => { + ENV_ORIG.mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE; + ENV_ORIG.reranker = process.env.ENABLE_RERANKER; + ENV_ORIG.redis = process.env.ENABLE_REDIS_CONTEXT; + }); + + afterEach(() => { + if (ENV_ORIG.mode !== undefined) process.env.VOLC_ARK_KB_RETRIEVAL_MODE = ENV_ORIG.mode; + else delete process.env.VOLC_ARK_KB_RETRIEVAL_MODE; + if (ENV_ORIG.reranker !== undefined) process.env.ENABLE_RERANKER = ENV_ORIG.reranker; + else delete process.env.ENABLE_RERANKER; + if (ENV_ORIG.redis !== undefined) process.env.ENABLE_REDIS_CONTEXT = ENV_ORIG.redis; + else delete process.env.ENABLE_REDIS_CONTEXT; + }); + + it('VOLC_ARK_KB_RETRIEVAL_MODE=raw 应使用新链路', () => { + process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'raw'; + const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; + assert.equal(mode, 'raw'); + }); + + it('VOLC_ARK_KB_RETRIEVAL_MODE=answer 应使用旧链路', () => { + process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'answer'; + const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; + assert.equal(mode, 'answer'); + }); + + it('VOLC_ARK_KB_RETRIEVAL_MODE 未设置时默认 answer', () => { + delete process.env.VOLC_ARK_KB_RETRIEVAL_MODE; + const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; + assert.equal(mode, 'answer'); + }); + + it('ENABLE_RERANKER=false 应关闭重排', () => { + process.env.ENABLE_RERANKER = 'false'; + assert.equal(process.env.ENABLE_RERANKER !== 'false', false); + }); + + it('ENABLE_REDIS_CONTEXT=false 应关闭 Redis 上下文', () => { + process.env.ENABLE_REDIS_CONTEXT = 'false'; + assert.equal(process.env.ENABLE_REDIS_CONTEXT !== 'false', false); + }); + + it('所有降级开关独立,互不影响', () => { + process.env.VOLC_ARK_KB_RETRIEVAL_MODE = 'raw'; + process.env.ENABLE_RERANKER = 'false'; + process.env.ENABLE_REDIS_CONTEXT = 'true'; + + assert.equal(process.env.VOLC_ARK_KB_RETRIEVAL_MODE, 'raw', 'mode should be raw'); + assert.equal(process.env.ENABLE_RERANKER, 'false', 'reranker should be off'); + assert.equal(process.env.ENABLE_REDIS_CONTEXT, 'true', 'redis context should be on'); + }); +}); + +// ================================================================ +// 4. normalizeTextForSpeech 对 raw 内容的处理 +// ================================================================ +describe('normalizeTextForSpeech — raw 片段处理', () => { + + it('应去除 Markdown 标记', () => { + const input = '## 产品介绍\n**小红Activize** 是一款_细胞营养素_'; + const result = normalizeTextForSpeech(input); + assert.ok(!result.includes('##'), 'Should remove heading marks'); + assert.ok(!result.includes('**'), 'Should remove bold marks'); + assert.ok(!result.includes('_细胞'), 'Should remove italic marks'); + assert.ok(result.includes('小红Activize'), 'Should preserve content'); + }); + + it('纯文本内容不应被破坏', () => { + const input = '小红Activize Oxyplus每天一包,用40度以下温水冲服,搅拌均匀后饮用。'; + const result = normalizeTextForSpeech(input); + assert.equal(result, input, 'Plain text should not be modified'); + }); + + it('空字符串应返回空', () => { + assert.equal(normalizeTextForSpeech(''), ''); + assert.equal(normalizeTextForSpeech(null), ''); + assert.equal(normalizeTextForSpeech(undefined), ''); + }); +}); + +// ================================================================ +// 5. 端到端数据流验证 +// ================================================================ +describe('端到端数据流 — raw 模式完整 ragPayload → S2S', () => { + + it('模拟完整 raw 模式数据流:query → chunks → rerank → payload → ragItems', () => { + // Step 1: 模拟检索结果 + const retrievedChunks = [ + { id: 'c1', content: '小红Activize Oxyplus是德国PM的核心产品', score: 0.9, doc_name: '产品手册' }, + { id: 'c2', content: '每天一包,40度以下温水冲服', score: 0.85, doc_name: '使用说明' }, + { id: 'c3', content: '含有瓜拉纳提取物,提供天然能量', score: 0.75, doc_name: '成分表' }, + ]; + + // Step 2: 模拟重排(已排序,直接取 top3) + const rerankedChunks = retrievedChunks.slice(0, 3); + + // Step 3: 模拟历史 + const history = [ + { role: 'user', content: '你们有什么产品' }, + { role: 'assistant', content: '我们有基础三合一,包括大白小红小白...' }, + ]; + + // Step 4: 构建 payload + const kbRetriever = require('../services/kbRetriever'); + const ragPayload = kbRetriever.buildRagPayload(rerankedChunks, history); + + // 验证 payload 结构 + assert.equal(ragPayload.length, 4, 'Should be context(1) + chunks(3)'); + assert.equal(ragPayload[0].title, '对话上下文'); + assert.ok(ragPayload[0].content.includes('你们有什么产品')); + assert.equal(ragPayload[1].title, '产品手册'); + assert.equal(ragPayload[2].title, '使用说明'); + assert.equal(ragPayload[3].title, '成分表'); + + // Step 5: 模拟 toolResult + const toolResult = { + results: ragPayload.map(item => ({ title: item.title, content: item.content })), + hit: true, + retrieval_mode: 'raw', + }; + + // Step 6: 模拟 resolveReply 中的 ragItems 构建 + const ragItems = toolResult.results.filter(i => i && i.content).map(i => ({ + title: i.title || '知识库结果', + content: i.content, + })); + + const isRawMode = toolResult.retrieval_mode === 'raw'; + const finalRagItems = isRawMode ? ragItems : [{ title: '知识库结果', content: ragItems.map(i => i.content).join(' ') }]; + + assert.equal(finalRagItems.length, 4, 'Raw mode: 4 items pass-through'); + + // Step 7: 模拟字幕过滤 + const subtitleItems = finalRagItems.filter(item => item.title !== '对话上下文'); + const subtitle = subtitleItems.map(item => item.content).join(' '); + assert.ok(!subtitle.includes('你们有什么产品'), 'Subtitle should exclude context'); + assert.ok(subtitle.includes('小红Activize'), 'Subtitle should include chunk content'); + + // Step 8: 模拟 sendExternalRag 的 JSON 序列化 + const jsonStr = JSON.stringify(finalRagItems); + const parsed = JSON.parse(jsonStr); + assert.equal(parsed.length, 4, 'JSON roundtrip should preserve all items'); + }); + + it('模拟完整 answer 模式数据流(对比验证)', () => { + const toolResult = { + results: [{ title: '方舟知识库检索结果', content: '根据知识库信息,小红Activize每天一包冲服' }], + hit: true, + }; + + const ragItems = toolResult.results.filter(i => i && i.content).map(i => ({ + title: i.title || '知识库结果', + content: i.content, + })); + + const isRawMode = !!(toolResult.retrieval_mode === 'raw'); + assert.equal(isRawMode, false, 'Should be answer mode'); + + const speechText = normalizeTextForSpeech(ragItems[0].content); + const cleanedText = speechText.replace(/^(根据知识库信息[,,::\s]*|根据.*?[,,]\s*)/i, ''); + const finalRagItems = [{ title: '知识库结果', content: cleanedText || speechText }]; + + assert.equal(finalRagItems.length, 1, 'Answer mode: single merged item'); + assert.ok(!finalRagItems[0].content.startsWith('根据知识库'), 'Prefix should be stripped'); + }); +}); + +console.log('\n=== raw模式集成测试加载完成 ===\n'); diff --git a/test2/server/tests/test_redis_client.js b/test2/server/tests/test_redis_client.js new file mode 100644 index 0000000..3a50396 --- /dev/null +++ b/test2/server/tests/test_redis_client.js @@ -0,0 +1,294 @@ +/** + * redisClient.js 单元测试 + * 覆盖:连接状态、对话历史读写、KB缓存读写、降级行为、TTL/LTRIM逻辑 + * + * 运行方式: node --test tests/test_redis_client.js + * 注意: 需要本地Redis可用(redis://127.0.0.1:6379),否则降级测试仍会通过 + */ +const { describe, it, beforeEach, afterEach } = require('node:test'); +const assert = require('node:assert/strict'); +const path = require('path'); +const fs = require('fs'); + +// 加载 .env +const envPath = path.join(__dirname, '../.env'); +if (fs.existsSync(envPath)) { + fs.readFileSync(envPath, 'utf8').split('\n').forEach(line => { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) return; + const idx = trimmed.indexOf('='); + if (idx > 0) { + const key = trimmed.slice(0, idx).trim(); + let val = trimmed.slice(idx + 1).trim(); + if ((val.startsWith('"') && val.endsWith('"')) || (val.startsWith("'") && val.endsWith("'"))) { + val = val.slice(1, -1); + } + if (!process.env[key]) process.env[key] = val; + } + }); +} + +const { after } = require('node:test'); +const redisClient = require('../services/redisClient'); + +const TEST_SESSION_ID = `test_session_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`; +const TEST_KB_CACHE_KEY = `test_kb_cache_${Date.now()}`; + +// 测试结束后断开 Redis,防止进程挂起 +after(async () => { + await redisClient.disconnect(); +}); + +// ================================================================ +// 1. 连接与可用性 +// ================================================================ +describe('redisClient — 连接与可用性', () => { + it('createClient 应返回客户端实例', () => { + const client = redisClient.createClient(); + assert.ok(client, 'createClient should return a client'); + }); + + it('getClient 应返回同一个实例(单例模式)', () => { + const c1 = redisClient.getClient(); + const c2 = redisClient.getClient(); + assert.strictEqual(c1, c2, 'getClient should return singleton'); + }); + + it('isAvailable 应返回 boolean', () => { + const result = redisClient.isAvailable(); + assert.equal(typeof result, 'boolean', 'isAvailable should return boolean'); + }); +}); + +// ================================================================ +// 2. 对话历史 — pushMessage + getRecentHistory +// ================================================================ +describe('redisClient — 对话历史读写', () => { + const sessionId = TEST_SESSION_ID; + + beforeEach(async () => { + await redisClient.clearSession(sessionId); + }); + + afterEach(async () => { + await redisClient.clearSession(sessionId); + }); + + it('pushMessage 写入后 getRecentHistory 应能读取', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + const ok = await redisClient.pushMessage(sessionId, { role: 'user', content: '你好' }); + assert.equal(ok, true, 'pushMessage should return true'); + + const history = await redisClient.getRecentHistory(sessionId, 5); + assert.ok(Array.isArray(history), 'getRecentHistory should return array'); + assert.equal(history.length, 1, 'Should have 1 message'); + assert.equal(history[0].role, 'user'); + assert.equal(history[0].content, '你好'); + }); + + it('多条消息应保持时间顺序(最旧在前)', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + await redisClient.pushMessage(sessionId, { role: 'user', content: '第1条' }); + await redisClient.pushMessage(sessionId, { role: 'assistant', content: '第2条' }); + await redisClient.pushMessage(sessionId, { role: 'user', content: '第3条' }); + + const history = await redisClient.getRecentHistory(sessionId, 5); + assert.equal(history.length, 3); + assert.equal(history[0].content, '第1条', '最旧的应在前面'); + assert.equal(history[1].content, '第2条'); + assert.equal(history[2].content, '第3条', '最新的应在后面'); + }); + + it('超过10条应自动截断(LTRIM),只保留最近10条', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + for (let i = 1; i <= 15; i++) { + await redisClient.pushMessage(sessionId, { role: i % 2 === 1 ? 'user' : 'assistant', content: `第${i}条` }); + } + + const history = await redisClient.getRecentHistory(sessionId, 10); + assert.ok(history.length <= 10, `Should have at most 10 messages, got ${history.length}`); + // 最旧的应该是第6条(前5条被截断) + assert.equal(history[0].content, '第6条', '最旧的应该是第6条'); + assert.equal(history[history.length - 1].content, '第15条', '最新的应该是第15条'); + }); + + it('getRecentHistory maxRounds 参数应限制返回数量', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + for (let i = 1; i <= 8; i++) { + await redisClient.pushMessage(sessionId, { role: i % 2 === 1 ? 'user' : 'assistant', content: `消息${i}` }); + } + + const history2 = await redisClient.getRecentHistory(sessionId, 2); + assert.ok(history2.length <= 4, `maxRounds=2 should return at most 4 messages, got ${history2.length}`); + }); + + it('clearSession 后 getRecentHistory 应返回空', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + await redisClient.pushMessage(sessionId, { role: 'user', content: '会被清除' }); + await redisClient.clearSession(sessionId); + const history = await redisClient.getRecentHistory(sessionId, 5); + assert.equal(history.length, 0, 'Should be empty after clear'); + }); + + it('消息应包含 ts 时间戳', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + const before = Date.now(); + await redisClient.pushMessage(sessionId, { role: 'user', content: '带时间戳' }); + const after = Date.now(); + + const history = await redisClient.getRecentHistory(sessionId, 1); + assert.ok(history[0].ts >= before && history[0].ts <= after, 'ts should be within time range'); + }); +}); + +// ================================================================ +// 3. KB缓存读写 +// ================================================================ +describe('redisClient — KB缓存读写', () => { + const cacheKey = TEST_KB_CACHE_KEY; + + afterEach(async () => { + if (redisClient.isAvailable()) { + try { + const client = redisClient.getClient(); + await client.del(`kb_cache:${cacheKey}`); + } catch {} + } + }); + + it('setKbCache + getKbCache 应正确读写', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + const result = { hit: true, query: '测试', results: [{ content: '测试内容' }] }; + const ok = await redisClient.setKbCache(cacheKey, result); + assert.equal(ok, true, 'setKbCache should return true'); + + const cached = await redisClient.getKbCache(cacheKey); + assert.ok(cached, 'getKbCache should return data'); + assert.equal(cached.hit, true); + assert.equal(cached.query, '测试'); + }); + + it('不存在的key应返回null', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + const cached = await redisClient.getKbCache('nonexistent_key_' + Date.now()); + assert.equal(cached, null, 'Should return null for nonexistent key'); + }); +}); + +// ================================================================ +// 4. 降级行为(Redis不可用时) +// ================================================================ +describe('redisClient — 降级行为', () => { + it('pushMessage 在 Redis 不可用时应返回 false 而非报错', async () => { + // 即使 Redis 可用,这也验证接口契约 + const result = await redisClient.pushMessage('fake_session', { role: 'user', content: 'test' }); + assert.equal(typeof result, 'boolean', 'Should return boolean'); + }); + + it('getRecentHistory 在不存在的 session 应返回空数组', async () => { + const result = await redisClient.getRecentHistory('nonexistent_session_' + Date.now(), 5); + if (result === null) { + // Redis不可用的降级 + assert.equal(result, null); + } else { + assert.ok(Array.isArray(result), 'Should return array'); + assert.equal(result.length, 0, 'Should be empty for nonexistent session'); + } + }); + + it('clearSession 对不存在的 session 不应报错', async () => { + const result = await redisClient.clearSession('nonexistent_session_' + Date.now()); + assert.equal(typeof result, 'boolean', 'Should return boolean'); + }); + + it('getKbCache 在 Redis 不可用时应返回 null', async () => { + const result = await redisClient.getKbCache('test_key'); + // 无论 Redis 是否可用,都不应抛出异常 + assert.ok(result === null || typeof result === 'object', 'Should return null or object'); + }); + + it('setKbCache 在 Redis 不可用时应返回 false', async () => { + const result = await redisClient.setKbCache('test_key', { hit: false }); + assert.equal(typeof result, 'boolean', 'Should return boolean'); + }); +}); + +// ================================================================ +// 5. 数据完整性 +// ================================================================ +describe('redisClient — 数据完整性', () => { + const sessionId = TEST_SESSION_ID + '_integrity'; + + afterEach(async () => { + await redisClient.clearSession(sessionId); + }); + + it('特殊字符消息应正确存取', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + const specialContent = '产品价格:¥299.00 "双引号" \'单引号\' \n换行 \t制表符 emoji🎉'; + await redisClient.pushMessage(sessionId, { role: 'user', content: specialContent }); + + const history = await redisClient.getRecentHistory(sessionId, 1); + assert.equal(history[0].content, specialContent, 'Should preserve special characters'); + }); + + it('空内容消息应正确存取', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + await redisClient.pushMessage(sessionId, { role: 'user', content: '' }); + const history = await redisClient.getRecentHistory(sessionId, 1); + assert.equal(history[0].content, '', 'Should handle empty content'); + }); + + it('source 字段应正确保存', async () => { + if (!redisClient.isAvailable()) { + console.log(' ⏭️ Redis不可用,跳过'); + return; + } + + await redisClient.pushMessage(sessionId, { role: 'assistant', content: '回答', source: 'voice_tool' }); + const history = await redisClient.getRecentHistory(sessionId, 1); + assert.equal(history[0].source, 'voice_tool', 'Should preserve source field'); + }); +}); + +console.log('\n=== redisClient 测试加载完成 ===\n');