2026-03-18 17:43:13 +08:00
|
|
|
|
/**
|
|
|
|
|
|
* 零 LLM 上下文关键词追踪器
|
|
|
|
|
|
* 记忆最近的产品/主题关键词,用于追问理解
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
2026-03-20 10:56:29 +08:00
|
|
|
|
const { TRACKER_KEYWORD_GROUPS, buildKeywordRegex } = require('./knowledgeKeywords');
|
|
|
|
|
|
|
2026-03-18 17:43:13 +08:00
|
|
|
|
class ContextKeywordTracker {
|
|
|
|
|
|
constructor() {
|
|
|
|
|
|
this.sessionKeywords = new Map();
|
|
|
|
|
|
this.TTL = 30 * 60 * 1000;
|
|
|
|
|
|
this.MAX_KEYWORDS = 8;
|
2026-03-20 10:56:29 +08:00
|
|
|
|
this.keywordPatterns = TRACKER_KEYWORD_GROUPS.map((group) => buildKeywordRegex(group, 'gi'));
|
2026-03-18 17:43:13 +08:00
|
|
|
|
this.cleanupTimer = setInterval(() => this.cleanup(), this.TTL);
|
|
|
|
|
|
if (typeof this.cleanupTimer.unref === 'function') {
|
|
|
|
|
|
this.cleanupTimer.unref();
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
extractKeywords(text) {
|
|
|
|
|
|
const keywords = [];
|
|
|
|
|
|
const normalized = String(text || '').trim();
|
|
|
|
|
|
if (!normalized) {
|
|
|
|
|
|
return keywords;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (const pattern of this.keywordPatterns) {
|
|
|
|
|
|
const matches = normalized.match(pattern);
|
|
|
|
|
|
if (matches && matches.length > 0) {
|
|
|
|
|
|
keywords.push(...matches);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const deduped = [];
|
|
|
|
|
|
for (const keyword of keywords) {
|
|
|
|
|
|
const normalizedKeyword = String(keyword || '').trim();
|
|
|
|
|
|
if (!normalizedKeyword) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!deduped.some((item) => item.toLowerCase() === normalizedKeyword.toLowerCase())) {
|
|
|
|
|
|
deduped.push(normalizedKeyword);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return deduped;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
mergeKeywords(existing, incoming) {
|
|
|
|
|
|
const merged = Array.isArray(existing) ? [...existing] : [];
|
|
|
|
|
|
for (const keyword of Array.isArray(incoming) ? incoming : []) {
|
|
|
|
|
|
const normalizedKeyword = String(keyword || '').trim();
|
|
|
|
|
|
if (!normalizedKeyword) {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
const existingIndex = merged.findIndex((item) => String(item || '').toLowerCase() === normalizedKeyword.toLowerCase());
|
|
|
|
|
|
if (existingIndex >= 0) {
|
|
|
|
|
|
merged.splice(existingIndex, 1);
|
|
|
|
|
|
}
|
|
|
|
|
|
merged.push(normalizedKeyword);
|
|
|
|
|
|
}
|
|
|
|
|
|
return merged.slice(-this.MAX_KEYWORDS);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
updateSession(sessionId, text) {
|
|
|
|
|
|
if (!sessionId) {
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
const keywords = this.extractKeywords(text);
|
|
|
|
|
|
if (keywords.length > 0) {
|
|
|
|
|
|
const existing = this.sessionKeywords.get(sessionId)?.keywords || [];
|
|
|
|
|
|
const merged = this.mergeKeywords(existing, keywords);
|
|
|
|
|
|
this.sessionKeywords.set(sessionId, {
|
|
|
|
|
|
keywords: merged,
|
|
|
|
|
|
lastUpdate: Date.now(),
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
getSessionKeywords(sessionId) {
|
|
|
|
|
|
const data = this.sessionKeywords.get(sessionId);
|
|
|
|
|
|
if (!data) return [];
|
|
|
|
|
|
if (Date.now() - data.lastUpdate > this.TTL) {
|
|
|
|
|
|
this.sessionKeywords.delete(sessionId);
|
|
|
|
|
|
return [];
|
|
|
|
|
|
}
|
|
|
|
|
|
return data.keywords;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-24 17:19:36 +08:00
|
|
|
|
enrichQueryWithContext(sessionId, query, session = null) {
|
2026-03-18 17:43:13 +08:00
|
|
|
|
const normalized = (query || '').trim();
|
|
|
|
|
|
|
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
|
|
|
|
// 检测追问:包含代词/追问模式,或短查询不含明确产品名
|
|
|
|
|
|
const hasFollowUpSignal = /(它|它的|他|他的|这个|那个|这款|那款|该产品|上面|刚才|再说|再次|强调一下|详细|继续|怎么吃|怎么用|怎么样|功效|成分|作用|原理|核心|区别|哪个好|为什么|什么意思|适合谁|多少钱|价格|副作用|正规吗|一天几次|每天几次|每日几次|给我介绍|介绍一下|说一下|讲一下)/i.test(normalized);
|
|
|
|
|
|
const isShortGeneric = normalized.length <= 20;
|
|
|
|
|
|
const isSimpleFollowUp = hasFollowUpSignal || isShortGeneric;
|
2026-03-24 17:19:36 +08:00
|
|
|
|
|
|
|
|
|
|
if (!isSimpleFollowUp) {
|
2026-03-18 17:43:13 +08:00
|
|
|
|
return normalized;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-24 17:19:36 +08:00
|
|
|
|
// 优先用session的KB话题记忆(60秒内有效)
|
|
|
|
|
|
// 解决:聊了"一成系统"再聊"骨关节"后追问"这款怎么吃",应关联"骨关节"而非"一成系统"
|
|
|
|
|
|
const KB_TOPIC_TTL = 60000;
|
|
|
|
|
|
if (session?._lastKbTopic && session?._lastKbHitAt && (Date.now() - session._lastKbHitAt < KB_TOPIC_TTL)) {
|
|
|
|
|
|
console.log(`[ContextTracker] Enriching from KB topic memory: "${normalized}" + "${session._lastKbTopic}"`);
|
|
|
|
|
|
return `${session._lastKbTopic} ${normalized}`;
|
2026-03-18 17:43:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
|
|
|
|
// fallback: 原有keyword tracker逻辑(只取最后1个最具体关键词,避免查询过长导致向量稀释)
|
2026-03-24 17:19:36 +08:00
|
|
|
|
const keywords = this.getSessionKeywords(sessionId);
|
|
|
|
|
|
if (keywords.length === 0) {
|
|
|
|
|
|
return normalized;
|
|
|
|
|
|
}
|
feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异
- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写
- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery
- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优
- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式
- app.js: 健康检查新增 redis/reranker/kbRetrievalMode
- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00
|
|
|
|
const keywordStr = keywords[keywords.length - 1];
|
2026-03-24 17:19:36 +08:00
|
|
|
|
console.log(`[ContextTracker] Enriching: "${normalized}" + "${keywordStr}"`);
|
|
|
|
|
|
return `${keywordStr} ${normalized}`;
|
2026-03-18 17:43:13 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cleanup() {
|
|
|
|
|
|
const now = Date.now();
|
|
|
|
|
|
for (const [sessionId, data] of this.sessionKeywords) {
|
|
|
|
|
|
if (now - data.lastUpdate > this.TTL) {
|
|
|
|
|
|
this.sessionKeywords.delete(sessionId);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
module.exports = new ContextKeywordTracker();
|