Files
bigwo/test2/server/services/contextKeywordTracker.js
User 56940676f6 feat(kb): VikingDB纯检索+重排+Redis上下文+全库搜索+别名扩展+KB保护窗口+RAG语气引导
- 新增 kbRetriever.js: VikingDB search_knowledge 纯检索替代 Ark chat/completions, doubao-seed-rerank 重排, RAG payload 语气引导缓解音色差异

- 新增 redisClient.js: Redis 连接管理 + 5轮对话历史 + KB缓存双写

- toolExecutor.js: 产品别名扩展25条, 全库检索topK=25, 检索阈值0.01, 精简 buildDeterministicKnowledgeQuery

- nativeVoiceGateway.js: isPureChitchat扩展, KB保护窗口60s, prequery参数调优

- realtimeDialogRouting.js: resolveReply感知KB保护窗口, fast-path适配raw模式

- app.js: 健康检查新增 redis/reranker/kbRetrievalMode

- 新增测试: alias A/B测试, KB retriever测试, Redis客户端测试, raw模式集成测试
2026-03-26 14:30:32 +08:00

129 lines
4.4 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* 零 LLM 上下文关键词追踪器
* 记忆最近的产品/主题关键词,用于追问理解
*/
const { TRACKER_KEYWORD_GROUPS, buildKeywordRegex } = require('./knowledgeKeywords');
class ContextKeywordTracker {
constructor() {
this.sessionKeywords = new Map();
this.TTL = 30 * 60 * 1000;
this.MAX_KEYWORDS = 8;
this.keywordPatterns = TRACKER_KEYWORD_GROUPS.map((group) => buildKeywordRegex(group, 'gi'));
this.cleanupTimer = setInterval(() => this.cleanup(), this.TTL);
if (typeof this.cleanupTimer.unref === 'function') {
this.cleanupTimer.unref();
}
}
extractKeywords(text) {
const keywords = [];
const normalized = String(text || '').trim();
if (!normalized) {
return keywords;
}
for (const pattern of this.keywordPatterns) {
const matches = normalized.match(pattern);
if (matches && matches.length > 0) {
keywords.push(...matches);
}
}
const deduped = [];
for (const keyword of keywords) {
const normalizedKeyword = String(keyword || '').trim();
if (!normalizedKeyword) {
continue;
}
if (!deduped.some((item) => item.toLowerCase() === normalizedKeyword.toLowerCase())) {
deduped.push(normalizedKeyword);
}
}
return deduped;
}
mergeKeywords(existing, incoming) {
const merged = Array.isArray(existing) ? [...existing] : [];
for (const keyword of Array.isArray(incoming) ? incoming : []) {
const normalizedKeyword = String(keyword || '').trim();
if (!normalizedKeyword) {
continue;
}
const existingIndex = merged.findIndex((item) => String(item || '').toLowerCase() === normalizedKeyword.toLowerCase());
if (existingIndex >= 0) {
merged.splice(existingIndex, 1);
}
merged.push(normalizedKeyword);
}
return merged.slice(-this.MAX_KEYWORDS);
}
updateSession(sessionId, text) {
if (!sessionId) {
return;
}
const keywords = this.extractKeywords(text);
if (keywords.length > 0) {
const existing = this.sessionKeywords.get(sessionId)?.keywords || [];
const merged = this.mergeKeywords(existing, keywords);
this.sessionKeywords.set(sessionId, {
keywords: merged,
lastUpdate: Date.now(),
});
}
}
getSessionKeywords(sessionId) {
const data = this.sessionKeywords.get(sessionId);
if (!data) return [];
if (Date.now() - data.lastUpdate > this.TTL) {
this.sessionKeywords.delete(sessionId);
return [];
}
return data.keywords;
}
enrichQueryWithContext(sessionId, query, session = null) {
const normalized = (query || '').trim();
// 检测追问:包含代词/追问模式,或短查询不含明确产品名
const hasFollowUpSignal = /(它|它的|他|他的|这个|那个|这款|那款|该产品|上面|刚才|再说|再次|强调一下|详细|继续|怎么吃|怎么用|怎么样|功效|成分|作用|原理|核心|区别|哪个好|为什么|什么意思|适合谁|多少钱|价格|副作用|正规吗|一天几次|每天几次|每日几次|给我介绍|介绍一下|说一下|讲一下)/i.test(normalized);
const isShortGeneric = normalized.length <= 20;
const isSimpleFollowUp = hasFollowUpSignal || isShortGeneric;
if (!isSimpleFollowUp) {
return normalized;
}
// 优先用session的KB话题记忆60秒内有效
// 解决:聊了"一成系统"再聊"骨关节"后追问"这款怎么吃",应关联"骨关节"而非"一成系统"
const KB_TOPIC_TTL = 60000;
if (session?._lastKbTopic && session?._lastKbHitAt && (Date.now() - session._lastKbHitAt < KB_TOPIC_TTL)) {
console.log(`[ContextTracker] Enriching from KB topic memory: "${normalized}" + "${session._lastKbTopic}"`);
return `${session._lastKbTopic} ${normalized}`;
}
// fallback: 原有keyword tracker逻辑只取最后1个最具体关键词避免查询过长导致向量稀释
const keywords = this.getSessionKeywords(sessionId);
if (keywords.length === 0) {
return normalized;
}
const keywordStr = keywords[keywords.length - 1];
console.log(`[ContextTracker] Enriching: "${normalized}" + "${keywordStr}"`);
return `${keywordStr} ${normalized}`;
}
cleanup() {
const now = Date.now();
for (const [sessionId, data] of this.sessionKeywords) {
if (now - data.lastUpdate > this.TTL) {
this.sessionKeywords.delete(sessionId);
}
}
}
}
module.exports = new ContextKeywordTracker();