const axios = require('axios'); const https = require('https'); const arkChatService = require('./arkChatService'); const { buildKnowledgeAnswerPrompt, resolveAssistantProfile } = require('./assistantProfileConfig'); const { getAssistantProfile } = require('./assistantProfileService'); const kbRetriever = require('./kbRetriever'); const redisClient = require('./redisClient'); // HTTP keep-alive agent:复用TCP连接,避免每次请求重新握手 const kbHttpAgent = new https.Agent({ keepAlive: true, keepAliveMsecs: 30000, maxSockets: 6, timeout: 15000, }); // 连接预热:服务启动后自动建立到方舟API的TLS连接,避免首次查询的握手延迟 setTimeout(() => { const warmupKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID; if (warmupKey) { axios.post('https://ark.cn-beijing.volces.com/api/v3/chat/completions', { model: process.env.VOLC_ARK_KNOWLEDGE_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID || 'warmup', messages: [{ role: 'user', content: 'ping' }], max_tokens: 1, stream: false, }, { headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${warmupKey}` }, timeout: 8000, httpsAgent: kbHttpAgent, }).then(() => { console.log('[ToolExecutor] KB connection pool warmed up'); }).catch(() => { console.log('[ToolExecutor] KB connection warmup sent (pool established)'); }); } }, 2000); const contextKeywordTracker = require('./contextKeywordTracker'); const { hasCanonicalKnowledgeTerm: hasCanonicalKnowledgeTermMatch, extractKnowledgeEntityMatches, hasKeywordFromList, SYSTEM_ROUTE_KEYWORDS, COMPANY_ROUTE_KEYWORDS, PRODUCT_ROUTE_KEYWORDS, FAQ_ROUTE_KEYWORDS, SCIENCE_TRAINING_ROUTE_KEYWORDS, } = require('./knowledgeKeywords'); // KB查询缓存:相同effectiveQuery + datasetIds + userId在TTL内直接返回缓存结果 const KB_CACHE_TTL_MS = 5 * 60 * 1000; // 5分钟 (hit结果) const KB_CACHE_NOHIT_TTL_MS = 2 * 60 * 1000; // 2分钟 (no-hit结果,较短TTL) const KB_CACHE_MAX_SIZE = 200; const kbQueryCache = new Map(); function getKbCacheKey(query, datasetIds, profileScope = 'global') { const mode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; return `vdb2|${mode}|${String(profileScope || 'global').trim() || 'global'}|${(query || '').trim()}|${(datasetIds || []).sort().join(',')}`; } function getKbCache(key) { const entry = kbQueryCache.get(key); if (!entry) return null; // hit: 5min TTL; no-hit: 10s 短 TTL(仅防同一轮次重复查 VikingDB) const ttl = entry.hit ? KB_CACHE_TTL_MS : 10000; if (Date.now() - entry.timestamp > ttl) { kbQueryCache.delete(key); return null; } return entry.result; } function setKbCache(key, result) { if (kbQueryCache.size >= KB_CACHE_MAX_SIZE) { const oldest = kbQueryCache.keys().next().value; kbQueryCache.delete(oldest); } // hit: 正常缓存; no-hit: 内存 10s 去重(防止同一轮次重复查 VikingDB,不写 Redis) kbQueryCache.set(key, { result, timestamp: Date.now(), hit: !!result.hit }); } class ToolExecutor { static hasCanonicalKnowledgeTerm(query) { return hasCanonicalKnowledgeTermMatch(query); } static extractKnowledgeEntities(text) { return extractKnowledgeEntityMatches(text); } static classifyQuestionSlot(query) { const text = String(query || '').trim(); if (/(多少钱|价格|售价|费用|价钱)/.test(text)) return 'price'; if (/(成分|配方|原料|含什么|包含什么)/.test(text)) return 'ingredient'; if (/(规格|包装|剂型|形态|粉末|粉剂|粉状|胶囊|软胶囊|片剂|颗粒|喷雾|乳霜|乳液|凝胶|膏状|口服液|每盒|每袋|每瓶|每支|多少袋|多少粒|多少片|多少毫升|多大规格)/.test(text)) return 'specification'; if (/(怎么吃|怎么用|怎么服用|服用方法|用法|用量|一天几次|每天几次|每日几次|一天吃几次|每天吃几次|一天服用几次|每日服用几次)/.test(text)) return 'usage'; if (/(副作用|不良反应|好转反应|排毒反应|整应反应|皮肤发痒|皮肤微痒)/.test(text)) return 'side_effect'; if (/(多久见效|多久有效|多久能见效|多长时间见效|几天见效|什么时候见效)/.test(text)) return 'effect_time'; if (/(治病|治疗|能治|治愈|药品|药物|替代药|包治|治百病)/.test(text)) return 'medical_claim'; if (/(为什么.*(全套|搭配|三合一)|为什么要.*(全套|搭配|三合一)|为何.*(全套|搭配|三合一)|产品需要全套)/.test(text)) return 'bundle_reason'; if (/(如何发展PM事业|怎么发展PM事业|PM事业发展逻辑|商机|PM价值|为什么选择德国PM|为何选择德国PM|为什么选德国PM|为什么选PM|为何选PM|陌生客户|陌生人沟通|线上拓客|成交|拓客|邀约|自我介绍|故事分享|三大平台|四大Ai生态|AI智能生产力|AI生产力)/.test(text)) return 'business_growth'; if (/(功效|作用|有什么用|有什么好处|效果)/.test(text)) return 'benefit'; if (/(适合谁|适合什么人|哪些人|适用人群)/.test(text)) return 'audience'; if (/(正规吗|合法|是不是传销|传销|骗局|骗子|直销还是传销|合不合法|正不正规)/.test(text)) return 'legality'; if (/(地址|电话|联系方式|联系|总部|公司地址)/.test(text)) return 'address_contact'; if (/(区别|不同|哪个好|有什么区别|差别)/.test(text)) return 'difference'; return 'general'; } static buildQuestionSlotInstruction(query) { const slot = this.classifyQuestionSlot(query); const instructions = { price: '用户当前只关心价格或费用,请只回答价格、收费或是否未提及价格,不要扩展到产品总介绍。', ingredient: '用户当前只关心成分或配方,请只回答成分、原料或是否未提及成分,不要扩展到品牌背景。', specification: '用户当前只关心规格、包装、剂型、形态或每盒每袋等产品细节,请只回答这些明确规格信息;如果知识库没写,就直接说明未提及。', usage: '用户当前只关心用法、吃法、服用频次或剂量,请只回答这一点。', side_effect: '用户当前只关心副作用或好转反应,请只回答可能的不良反应、好转反应或注意事项。', effect_time: '用户当前只关心多久见效或效果周期,请只回答见效时间、周期或个体差异,不要扩展无关信息。', medical_claim: '用户当前只关心产品能不能治病、是不是药,请只回答是否属于药品、能否替代药物以及相关注意事项。', bundle_reason: '用户当前只关心为什么要全套、搭配或三合一,请只回答搭配原理、协同作用或NTC相关原因。', business_growth: '用户当前只关心PM事业发展、商机、PM价值、为何选择、线上拓客、陌生客户沟通、一成系统赋能、三大平台四大Ai生态或自我介绍,请只回答这类业务发展问题。凡涉及“一部手机做天下、0门槛启动、0成本高效率、足不出户梦想横扫全球、身未动梦已成、批发式晋级”等表述,必须明确这是“一成系统”的优势标签,是对德国PM事业的软件赋能,不要混同为德国PM公司或产品本身。', benefit: '用户当前只关心功效或作用,请只回答作用点,不要扩展到无关信息。', audience: '用户当前只关心适合人群,请只回答适用对象。', legality: '用户当前只关心正规性、合法性或是否传销,请只围绕合法合规问题直接回答。', address_contact: '用户当前只关心地址或联系方式,请只回答地址、电话、联系信息。', difference: '用户当前只关心区别或对比,请直接做差异对比,不要扩写成单个产品长介绍。', general: '请优先直接回答用户当前这一问,不要离题扩展。', }; return instructions[slot] || instructions.general; } static answerMatchesQuestionSlot(query, content) { const text = String(content || '').trim(); const lowerText = text.toLowerCase(); const slot = this.classifyQuestionSlot(query); const entities = this.extractKnowledgeEntities(query); // 中英文别名映射:改写后的query可能包含英文实体,但方舟回答用中文名 const ENTITY_ALIAS_MAP = { 'activize oxyplus': ['小红', 'activize', '艾特维'], 'activize': ['小红', '艾特维'], 'basics': ['大白', '倍适'], 'basic power': ['大白', 'basics'], 'restorate': ['小白', '维适多'], 'fitline': ['pm-fitline', 'pm细胞营养素', '细胞营养素'], 'pm-fitline': ['fitline', '细胞营养素'], 'ntc营养保送系统': ['ntc', '营养保送', '吸收利用'], 'ntc': ['ntc营养保送系统', '营养保送'], '儿童倍适': ['powercocktail junior', '儿童'], 'cc-cell': ['cc套装', 'cc胶囊', 'cc乳霜'], 'd-drink': ['小绿', '排毒饮', '排毒d饮料'], 'proshape amino': ['氨基酸', 'bcaa'], 'herbal tea': ['草本茶'], 'hair+': ['发宝', '发健'], 'med hair+': ['发宝', '发健'], 'fitness-drink': ['运动饮料', '健康饮品'], 'topshape': ['纤萃', '减肥'], 'generation 50+': ['乐活50+', '乐活'], 'apple antioxy': ['细胞抗氧素', '苹果'], 'zellschutz': ['细胞抗氧素'], 'women+': ['women'], 'men face': ['男士乳霜', '男士护肤'], 'med dental+': ['牙膏', '草本护理'], 'ib5': ['口腔免疫喷雾'], 'q10': ['辅酵素', '氧修护'], '一成系统': ['三大平台', '四大ai生态', 'ai众享', '数字化工作室', '盛咖学愿'], }; const expandedEntities = []; for (const entity of entities) { const lower = String(entity || '').toLowerCase(); expandedEntities.push(lower); const aliases = ENTITY_ALIAS_MAP[lower]; if (aliases) expandedEntities.push(...aliases); } const mentionsEntity = entities.length === 0 || expandedEntities.some((entity) => lowerText.includes(String(entity || '').toLowerCase())); if (/德国PM是一家1993年成立于德国的合法直销公司/.test(text) && slot !== 'legality') { return false; } if (!mentionsEntity && slot !== 'legality' && slot !== 'address_contact') { return false; } const slotPatterns = { price: /(元|价格|售价|费用|人民币|¥|¥)/, ingredient: /(成分|配方|原料|含有|包含|营养素|葡萄籽|白藜芦醇|益生菌|胶原蛋白肽|辅酵素|Q10)/, specification: /(规格|包装|剂型|形态|粉末|粉剂|粉状|胶囊|软胶囊|片剂|颗粒|喷雾|乳霜|乳液|凝胶|膏状|口服液|每盒|每袋|每瓶|每支|袋装|盒装|瓶装|支装|多少袋|多少粒|多少片|多少毫升|克|g|ml)/, usage: /(服用|用法|用量|每日|每天|一次|次|饭前|饭后|早餐|晚餐|早晚|空腹|睡前)/, side_effect: /(副作用|不良反应|好转反应|排毒|整应|皮肤.*痒|排便|反应|注意事项|正常现象)/, effect_time: /(见效|有效|几天|几周|几个月|周期|坚持|因人而异|吸收利用)/, medical_claim: /(不是药|不能替代药|不能代替药物|不是用于治疗|不能治疗|保健食品|营养补充|就医|医生)/, bundle_reason: /(全套|搭配|协同|三合一|组合|NTC|吸收|运输|利用|代谢|原理)/, business_growth: /(一成系统|PM事业|商机|价值|选择|拓客|成交|邀约|陌生客户|沟通|三大平台|四大Ai生态|数字化工作室|Ai众享|盛咖学愿|故事|自我介绍|赋能|智能生产力|软件赋能|一部手机|0门槛|零门槛|0成本|零成本|身未动梦已成|批发式晋级)/, benefit: /(功效|作用|帮助|支持|改善|提升|有助于)/, audience: /(适合|适用|人群|适宜|可以)/, legality: /(合法|正规|直销|认证|邓白氏|不是传销)/, address_contact: /(地址|电话|联系方式|联系|总部|香港|德国|美国|加拿大)/, difference: /(区别|不同|相比|分别|一个|另一个|而|更适合)/, }; if (slotPatterns[slot]) { return slotPatterns[slot].test(text); } return text.length >= 10; } static getKnowledgeBaseRoutingRules() { const raw = process.env.VOLC_ARK_KNOWLEDGE_BASE_ROUTING || process.env.VOLC_ARK_KNOWLEDGE_BASE_MAP; if (!raw) { return []; } try { const parsed = JSON.parse(raw); const entries = Array.isArray(parsed) ? parsed : Object.entries(parsed).map(([name, config]) => ({ name, ...(config || {}) })); return entries .map((item) => ({ name: String(item.name || '').trim(), dataset_ids: Array.isArray(item.dataset_ids) ? item.dataset_ids.map((id) => String(id || '').trim()).filter(Boolean) : String(item.dataset_ids || item.datasetIds || '') .split(',') .map((id) => id.trim()) .filter(Boolean), keywords: Array.isArray(item.keywords) ? item.keywords.map((keyword) => String(keyword || '').trim()).filter(Boolean) : String(item.keywords || '') .split(',') .map((keyword) => keyword.trim()) .filter(Boolean), })) .filter((item) => item.name && item.dataset_ids.length > 0 && item.keywords.length > 0); } catch (error) { console.warn('[ToolExecutor] parse knowledge base routing failed:', error.message); return []; } } static selectKnowledgeBaseTargets(query, context = []) { const defaultDatasetIds = String(process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS || '') .split(',') .map((id) => id.trim()) .filter(Boolean); const text = String(query || '').trim(); const recentContextText = (Array.isArray(context) ? context : []) .slice(-6) .map((item) => String(item?.content || '').trim()) .filter(Boolean) .join('\n'); const haystack = `${text}\n${recentContextText}`.toLowerCase(); // 5路意图检测:system > company > faq > science > product const hasSystemIntent = hasKeywordFromList(haystack, SYSTEM_ROUTE_KEYWORDS); const hasCompanyIntent = hasKeywordFromList(haystack, COMPANY_ROUTE_KEYWORDS); const hasProductIntent = hasKeywordFromList(haystack, PRODUCT_ROUTE_KEYWORDS); const hasFaqIntent = hasKeywordFromList(haystack, FAQ_ROUTE_KEYWORDS); const hasScienceIntent = hasKeywordFromList(haystack, SCIENCE_TRAINING_ROUTE_KEYWORDS); // 确定路由:多意图可并行,只排除真正冲突的组合 const priorityRouteNames = []; if (hasSystemIntent) priorityRouteNames.push('system'); if (hasProductIntent) { priorityRouteNames.push('product'); // 产品问题同时搜FAQ和科普,获取更全面的回答(好转反应、科普误区等补充信息) if (!hasFaqIntent) priorityRouteNames.push('faq'); if (!hasScienceIntent) priorityRouteNames.push('science'); } if (hasCompanyIntent) { priorityRouteNames.push('company'); // 公司问题同时搜产品和系统培训,test collection 内容有限 if (!hasProductIntent) priorityRouteNames.push('product'); if (!hasSystemIntent) priorityRouteNames.push('system'); } if (hasFaqIntent) priorityRouteNames.push('faq'); if (hasScienceIntent) priorityRouteNames.push('science'); if (priorityRouteNames.length > 0) { const routingRules = this.getKnowledgeBaseRoutingRules(); const priorityRules = routingRules.filter((rule) => priorityRouteNames.includes(rule.name)); const priorityDatasetIds = [...new Set(priorityRules.flatMap((rule) => rule.dataset_ids).filter(Boolean))]; if (priorityDatasetIds.length > 0) { console.log(`[ToolExecutor] KB 5-way route: intents=[${priorityRouteNames.join(',')}] datasets=[${priorityDatasetIds.join(',')}]`); return { datasetIds: priorityDatasetIds, matchedRoutes: [...new Set(priorityRouteNames)], }; } } // 通用env规则匹配回退 const matchedDatasetIds = []; const matchedRoutes = []; for (const rule of this.getKnowledgeBaseRoutingRules()) { if (rule.keywords.some((keyword) => haystack.includes(keyword.toLowerCase()))) { matchedRoutes.push(rule.name); matchedDatasetIds.push(...rule.dataset_ids); } } const datasetIds = [...new Set((matchedDatasetIds.length ? matchedDatasetIds : defaultDatasetIds).filter(Boolean))]; return { datasetIds, matchedRoutes: matchedRoutes.length ? [...new Set(matchedRoutes)] : (datasetIds.length ? ['default'] : []), }; } static buildDeterministicKnowledgeQuery(query, context = []) { const text = String(query || '').trim(); // ==================================================================== // 精简版:只保留 VikingDB 语义检索已知会失败的场景 // 产品/公司/认证等查询全部交给 VikingDB + reranker 处理原始语义 // 追问/代词由 enrichQueryWithContext + KB保护窗口 处理 // ==================================================================== // === 一成系统子话题分流(内部术语,向量检索难区分子话题) === if (/(一成系统|Ai众享|数字化工作室|盛咖学愿|三大平台|四大Ai生态|四大生态|智能生产力)/i.test(text)) { if (/(核心竞争力|竞争力|核心优势|优势)/i.test(text)) return '一成系统 核心竞争力 三大平台 四大Ai生态 零成本高效率'; if (/(发展|怎么做|怎么用|如何用|如何做|关键点|关键|方法|步骤)/i.test(text)) return '一成系统 发展PM事业 三大平台 四大Ai生态 零成本高效率 全球市场'; if (/(线上拓客|拓客|成交|成交率|陌生客户|陌生人沟通|邀约)/i.test(text)) return '一成系统 PM事业 线上拓客 成交 邀约 三大平台 四大Ai生态'; if (/(ai智能生产力|ai生产力|智能生产力|团队效率|赋能团队|团队赋能)/i.test(text)) return '一成系统 AI智能生产力 赋能团队 三大平台 四大Ai生态'; if (/(一部手机|0门槛|零门槛|0成本|零成本|足不出户|梦想横扫全球|一部手机做天下)/i.test(text)) return '一成系统 软件赋能 0成本高效率 一部手机做天下 足不出户梦想横扫全球'; if (/(故事|自我介绍|分享)/i.test(text)) return '一成系统 PM事业 故事分享 自我介绍'; if (/(邀约|话术)/i.test(text)) return '一成系统 邀约话术'; if (/文化/i.test(text)) return '一成系统 文化解析'; if (/(赋能团队|团队发展|AI赋能|ai赋能)/i.test(text)) return '一成系统用AI赋能团队发展'; if (/(三大平台|四大生态|Ai生态)/i.test(text)) return '一成系统 三大平台 四大Ai生态'; return '一成系统 德国PM事业发展的强大赋能工具 三大平台 四大Ai生态'; } if (/(一部手机做天下|一部手机即可运营全球市场|0门槛启动|零门槛启动|0成本高效率|零成本高效率|足不出户梦想横扫全球|身未动,?梦已成|批发式晋级)/i.test(text)) { return '一成系统 软件赋能 德国PM事业 0成本高效率 一部手机做天下 身未动梦已成 批发式晋级'; } if (/(身未动,?梦已成|批发式晋级)/i.test(text)) return '一成系统 身未动梦已成 批发式晋级 三大平台 四大Ai生态'; if (/行动圈/i.test(text)) return '一成系统 行动圈 数字化工作室 团队管理 目标考核'; if (/盟主社区/i.test(text)) return '一成系统 盟主社区 AI众享 社区盟主 引流 转化'; // === 一成系统相关业务话题 === if (/(招商|代理|加盟|事业机会|招商稿|代理政策)/i.test(text)) return '一成系统 PM事业 招商与代理 软件赋能 0成本高效率'; if (/(如何发展PM事业|怎么发展PM事业|PM事业发展逻辑|介绍PM事业|两分钟介绍PM事业)/i.test(text)) return '一成系统 PM事业 发展逻辑 商机 价值 软件赋能 三大平台 四大Ai生态 0成本高效率'; if (/(为什么选择德国PM|为何选择德国PM|为什么选德国PM|为什么选PM|为何选PM)/i.test(text)) return '一成系统 德国PM 选择理由 公司实力 产品优势 软件赋能 0成本高效率'; if (/(陌生客户|陌生人).*(沟通|开口|邀约|交流|切入).*(PM事业|德国PM|PM)/i.test(text)) return '一成系统 PM事业 陌生客户 沟通 邀约 话术 软件赋能'; if (/(线上拓客|线上成交|线上开发客户|线上获客|线上成交率)/i.test(text)) return '一成系统 PM事业 线上拓客 成交 获客'; if (/(一成AI|AI落地|ai落地|转观念|落地对比)/i.test(text)) return '2026一成Ai落地对比与转观念'; // === 敏感话题兜底(必须精确控制回复内容) === if (/(传销|骗局|骗子|正规吗|合法吗|正不正规|合不合法|是不是传销|直销还是传销|层级分销|非法集资|拉人头|下线|发展下线|报单|人头费)/i.test(text)) return '德国PM 1993年 创立 100多个国家 FitLine 公司介绍 邓白氏 99分 AAA+ 合法直销'; // === 别名纠正(向量检索不认的别名) === if (/暖炉原理/i.test(text)) return '火炉原理'; // 所有其它查询(产品/公司/认证/培训等):不做确定性改写 // 依赖 normalizeKnowledgeQueryAlias(别名归一化)+ enrichQueryWithContext(上下文补充)+ VikingDB + reranker return null; } static applyKnowledgeQueryAnchor(query) { let anchoredQuery = String(query || '').trim(); if (/一成系统/.test(anchoredQuery) && !/(德国PM|PM事业|赋能工具|Ai众享|数字化工作室|盛咖学愿)/i.test(anchoredQuery)) { anchoredQuery = anchoredQuery.replace(/一成系统/g, '一成系统 德国PM事业赋能工具'); } return anchoredQuery.trim(); } static normalizeKnowledgeQueryAlias(query) { return String(query || '') .replace(/^[啊哦嗯呢呀哎诶额,。!?、\s]+/g, '') .replace(/[啊哦嗯呢呀哎诶额,。!?、\s]+$/g, '') .replace(/^(你|你们|帮我|麻烦你|请你?|我想|我要|能不能|可以|可不可以|能否)[给帮]?(我)?(查一下|查查|查下|搜一下|搜搜|搜下|找一下|找找|找下|看一下|看看|看下|说一下|说说|说下|讲一下|讲讲|讲下|介绍一下|介绍下)?/g, '') .replace(/(的)?(相关|详细)?(内容|信息|资料|介绍|说明)[。??!]*$/g, '') .replace(/一成[,,、。!?\s]+系统/g, '一成系统') .replace(/X{2}系统/gi, '一成系统') .replace(/[\u4e00-\u9fff]{1,3}(?:成|城|程|诚|乘|声|生)[,,、\s]*系统/g, '一成系统') .replace(/(?:一城|逸城|一程|易成|一诚|亦成|艺成|溢成|义成|毅成|怡成|以成|已成|亿成|忆成|益成|益生|易诚|义诚|忆诚|以诚|一声|亿生|易乘)系统/g, '一成系统') .replace(/PM[-\s]*Fitline|PM[-\s]*fitline|Pm[-\s]*fitline|Fitline|fitline/g, 'PM-FitLine') .replace(/PM细胞营养|PM营养素|德国PM营养素/g, 'PM细胞营养素') .replace(/NTC科技/g, 'NTC营养保送系统') .replace(/NTC营养保送系统|NTC营养配送系统|NTC营养输送系统|NTC营养传送系统|NTC营养传输系统/g, 'NTC营养保送系统') .replace(/Nutrient Transport Concept/gi, 'NTC营养保送系统') .replace(/Activize Oxyplus|Activize/gi, 'Activize Oxyplus') .replace(/Restorate/gi, 'Restorate') .replace(/Basics/gi, 'Basics') .replace(/活力健|火力剑|火力健/g, 'Basics 活力健') .replace(/基础三合一|三合一基础套|大白小红小白|基础套装?/g, 'PM细胞营养素 基础套装') .replace(/儿童倍适|儿童产品/g, '儿童倍适') .replace(/小红精华液/g, 'Activize Serum 小红精华液') .replace(/小红产品/g, '小红产品 Activize Oxyplus') .replace(/大白产品/g, '大白产品 Basics') .replace(/小白产品/g, '小白产品 Restorate') .replace(/(?=60字)且不含无结果模式时,倾向判定为hit // 这避免了方舟LLM用同义词表达导致slot正则不匹配的误杀 if (text.length >= 60 && !strictNoHitPattern.test(text)) { console.log(`[ToolExecutor] slot_mismatch overridden by length fallback: query="${query}" len=${text.length}`); return { hit: true, reason: 'length_fallback', reply: text, }; } return { hit: false, reason: 'slot_mismatch', reply: `知识库中暂未找到与"${query}"直接相关的信息,请换个更具体的问法再试。`, }; } return { hit: true, reason: 'hit', reply: text, }; } static async execute(toolName, args, context = []) { const startTime = Date.now(); console.log(`[ToolExecutor] Executing: ${toolName}`, args); const handlers = { query_weather: this.queryWeather, query_order: this.queryOrder, search_knowledge: this.searchKnowledge, get_current_time: this.getCurrentTime, calculate: this.calculate, }; const handler = handlers[toolName]; if (!handler) { console.warn(`[ToolExecutor] Unknown tool: ${toolName}`); return { error: `未知的工具: ${toolName}` }; } try { const result = await handler.call(this, args, context); const ms = Date.now() - startTime; console.log(`[ToolExecutor] ${toolName} completed in ${ms}ms:`, JSON.stringify(result).substring(0, 200)); return result; } catch (error) { console.error(`[ToolExecutor] ${toolName} error:`, error); return { error: `工具执行失败: ${error.message}` }; } } static async queryWeather({ city }) { const mockData = { '北京': { temp: '22°C', weather: '晴', humidity: '45%', wind: '北风3级', aqi: 65, tips: '空气质量良好,适合户外活动' }, '上海': { temp: '26°C', weather: '多云', humidity: '72%', wind: '东南风2级', aqi: 78, tips: '注意防晒' }, '广州': { temp: '30°C', weather: '阵雨', humidity: '85%', wind: '南风1级', aqi: 55, tips: '记得带伞' }, '深圳': { temp: '29°C', weather: '多云', humidity: '80%', wind: '东风2级', aqi: 60, tips: '较为闷热,注意防暑' }, '杭州': { temp: '24°C', weather: '晴', humidity: '55%', wind: '西北风2级', aqi: 50, tips: '天气宜人' }, '成都': { temp: '20°C', weather: '阴', humidity: '70%', wind: '微风', aqi: 85, tips: '天气阴沉,适合室内活动' }, '武汉': { temp: '25°C', weather: '晴', humidity: '60%', wind: '东风3级', aqi: 72, tips: '适合出行' }, '南京': { temp: '23°C', weather: '多云', humidity: '58%', wind: '东北风2级', aqi: 68, tips: '温度适宜' }, '西安': { temp: '18°C', weather: '晴', humidity: '35%', wind: '西北风3级', aqi: 90, tips: '天气干燥,注意补水' }, '重庆': { temp: '27°C', weather: '阴转多云', humidity: '75%', wind: '微风', aqi: 80, tips: '注意防潮' }, }; const data = mockData[city]; if (data) { return { city, date: new Date().toLocaleDateString('zh-CN'), ...data }; } // 对未知城市生成随机数据 const weathers = ['晴', '多云', '阴', '小雨', '大风']; return { city, date: new Date().toLocaleDateString('zh-CN'), temp: `${Math.floor(Math.random() * 20 + 10)}°C`, weather: weathers[Math.floor(Math.random() * weathers.length)], humidity: `${Math.floor(Math.random() * 50 + 30)}%`, wind: '微风', aqi: Math.floor(Math.random() * 100 + 30), tips: '数据仅供参考', }; } static async queryOrder({ order_id }) { const statuses = ['待支付', '已支付', '拣货中', '已发货', '运输中', '已签收']; const hash = order_id.split('').reduce((a, c) => a + c.charCodeAt(0), 0); const statusIdx = hash % statuses.length; return { order_id, status: statuses[statusIdx], estimated_delivery: '2026-03-01', tracking_number: 'SF' + order_id.replace(/\D/g, '').padEnd(10, '0').substring(0, 10), items: [ { name: '智能音箱 Pro', quantity: 1, price: '¥299' }, ], create_time: '2026-02-20 14:30:00', }; } static async searchKnowledge({ query, response_mode = 'answer', context = [], session_id = null, original_text = '', _session = null, skipCache = false }) { const startTime = Date.now(); query = query || ''; const responseMode = response_mode === 'snippet' ? 'snippet' : 'answer'; const knowledgeEndpointId = process.env.VOLC_ARK_KNOWLEDGE_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID; const profileUserId = _session?.profileUserId || _session?.userId || null; const assistantProfileResult = await getAssistantProfile({ userId: profileUserId }); const assistantProfile = resolveAssistantProfile({ ...(assistantProfileResult?.profile || {}), ...(_session?.assistantProfile || {}), }); if (_session && assistantProfileResult?.profile) { _session.assistantProfile = assistantProfile; } const profileScope = profileUserId || 'global'; console.log(`[ToolExecutor] searchKnowledge called with query="${query}"`); if (!knowledgeEndpointId) { console.warn('[ToolExecutor] searchKnowledge skipped: knowledge endpoint not configured'); return { query, original_query: query, rewritten_query: query, results: [{ title: '配置缺失', content: `知识库中暂未找到与"${query}"直接相关的信息,请换个更具体的问法再试。` }], total: 1, source: 'ark_knowledge', hit: false, reason: 'endpoint_not_configured', }; } const rewrittenQuery = this.rewriteKnowledgeQuery(query, context, session_id, _session); // 全库检索:始终搜索所有 collection,由 VikingDB + reranker 判断相关性 const allDatasetIds = String(process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS || '') .split(',').map(id => id.trim()).filter(Boolean); const kbTarget = { datasetIds: allDatasetIds, matchedRoutes: ['all'] }; const effectiveQuery = rewrittenQuery || query; if (rewrittenQuery && rewrittenQuery !== query) { console.log(`[ToolExecutor] searchKnowledge rewritten query="${rewrittenQuery}"`); } console.log(`[ToolExecutor] searchKnowledge full-scan all ${allDatasetIds.length} collections`); const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS; if (kbIds && kbIds !== 'your_knowledge_base_dataset_id') { if (!knowledgeEndpointId || knowledgeEndpointId === 'your_ark_endpoint_id') { const latencyMs = Date.now() - startTime; console.warn('[ToolExecutor] Ark KB search skipped: knowledge endpoint not configured (knowledge base IDs are set but endpoint is missing)'); return { query, original_query: query, rewritten_query: effectiveQuery, selected_dataset_ids: kbTarget.datasetIds, selected_kb_routes: kbTarget.matchedRoutes, latency_ms: latencyMs, errorType: 'endpoint_not_configured', error: '知识库已配置但知识库方舟端点未配置,请检查 VOLC_ARK_KNOWLEDGE_ENDPOINT_ID 或 VOLC_ARK_ENDPOINT_ID', source: 'ark_knowledge', hit: false, reason: 'endpoint_not_configured', }; } try { // 缓存检查:优先 Redis,降级内存 Map(skipCache 时跳过) const cacheKey = getKbCacheKey(effectiveQuery, kbTarget.datasetIds, profileScope); const redisCached = skipCache ? null : await redisClient.getKbCache(cacheKey).catch(() => null); const cached = skipCache ? null : (redisCached || getKbCache(cacheKey)); if (cached) { const latencyMs = Date.now() - startTime; console.log(`[ToolExecutor] Ark KB cache hit in ${latencyMs}ms key="${cacheKey.slice(0, 60)}" source=${redisCached ? 'redis' : 'memory'}`); return { ...cached, original_query: query, rewritten_query: effectiveQuery, selected_dataset_ids: kbTarget.datasetIds, selected_kb_routes: kbTarget.matchedRoutes, latency_ms: latencyMs, cache_hit: true, }; } // 根据检索模式选择链路 const retrievalMode = process.env.VOLC_ARK_KB_RETRIEVAL_MODE || 'answer'; let arkResult; if (retrievalMode === 'raw') { // ★ 新链路:纯检索 + 重排,不经 LLM 加工 console.log('[ToolExecutor] Using RAW retrieval mode (kbRetriever)'); const rawResult = await kbRetriever.searchAndRerank(effectiveQuery, { datasetIds: kbTarget.datasetIds, sessionId: session_id, session: _session, originalQuery: query, }); // 转换为与旧格式兼容的结构 arkResult = { query: rawResult.query, results: rawResult.ragPayload.length > 0 ? rawResult.ragPayload.map(item => ({ title: item.title, content: item.content })) : [{ title: '未找到', content: `知识库中暂未找到与"${query}"直接相关的信息,请换个更具体的问法再试。` }], total: rawResult.ragPayload.length, source: 'ark_knowledge', hit: rawResult.hit, reason: rawResult.reason, retrieval_mode: 'raw', top_score: rawResult.topScore, chunks_count: rawResult.rerankedChunks?.length || 0, }; } else { // 旧链路:LLM 加工模式 console.log('[ToolExecutor] Using ANSWER retrieval mode (searchArkKnowledge)'); arkResult = await this.searchArkKnowledge(effectiveQuery, context, responseMode, kbTarget.datasetIds, query, assistantProfile); } const latencyMs = Date.now() - startTime; console.log(`[ToolExecutor] Ark KB search succeeded in ${latencyMs}ms mode=${retrievalMode}`); // 缓存到 Redis + 内存双写 setKbCache(cacheKey, arkResult); redisClient.setKbCache(cacheKey, arkResult).catch(() => {}); return { ...arkResult, original_query: query, rewritten_query: effectiveQuery, selected_dataset_ids: kbTarget.datasetIds, selected_kb_routes: kbTarget.matchedRoutes, latency_ms: latencyMs, }; } catch (error) { const latencyMs = Date.now() - startTime; console.warn('[ToolExecutor] Ark Knowledge Search failed:', error.message); return { query, original_query: query, rewritten_query: effectiveQuery, selected_dataset_ids: kbTarget.datasetIds, selected_kb_routes: kbTarget.matchedRoutes, latency_ms: latencyMs, errorType: error.code === 'ECONNABORTED' || /timeout/i.test(error.message) ? 'timeout' : 'request_failed', error: `知识库查询失败: ${error.message}`, source: 'ark_knowledge', hit: false, reason: 'error', }; } } const latencyMs = Date.now() - startTime; console.warn('[ToolExecutor] Ark knowledge base is not configured'); return { query, original_query: query, rewritten_query: effectiveQuery, selected_dataset_ids: kbTarget.datasetIds, selected_kb_routes: kbTarget.matchedRoutes, latency_ms: latencyMs, errorType: 'not_configured', error: '知识库未配置,请检查 VOLC_ARK_KNOWLEDGE_BASE_IDS', source: 'ark_knowledge', hit: false, reason: 'not_configured', }; } static rewriteKnowledgeQuery(query, context = [], sessionId = null, session = null) { const originalQuery = String(query || '').trim(); if (!originalQuery) { return ''; } // 先做别名归一化(ASR变体如"移程系统"→"一成系统"),再尝试确定性改写 const aliasNormalized = this.normalizeKnowledgeQueryAlias(originalQuery); const deterministicQuery = this.buildDeterministicKnowledgeQuery(aliasNormalized, context); if (deterministicQuery) { console.log(`[ToolExecutor] deterministic rewrite: "${originalQuery}" → "${deterministicQuery}"`); return deterministicQuery; } let normalizedQuery = this.applyKnowledgeQueryAnchor(aliasNormalized); if (sessionId) { normalizedQuery = contextKeywordTracker.enrichQueryWithContext(sessionId, normalizedQuery, session); } return this.sanitizeRewrittenQuery(normalizedQuery); } static sanitizeRewrittenQuery(query) { let cleaned = String(query || '').trim(); if (!cleaned) return cleaned; // 1. 清理口语填充词/语气词 cleaned = cleaned.replace(/[啊哦嗯呢呀哎诶额嘛吧啦哇噢]+/g, ' '); // 2. 清理连续标点 cleaned = cleaned.replace(/[,,。!?!?\s]{2,}/g, ' '); // 3. 去除重复的问句片段(如"怎么吃 怎么吃") cleaned = cleaned.replace(/(.{3,}?)[??!!。,,\s]+\1/g, '$1'); // 4. 按空格分词去重(保序) const parts = cleaned.split(/\s+/).filter(Boolean); const seen = new Set(); const deduped = parts.filter(p => { if (seen.has(p)) return false; seen.add(p); return true; }); cleaned = deduped.join(' ').trim(); // 5. 截断:最大80字符,避免过长query降低KB检索质量 if (cleaned.length > 80) { cleaned = cleaned.slice(0, 80).replace(/\s\S*$/, '').trim(); console.log(`[ToolExecutor] query truncated to 80 chars: "${cleaned}"`); } return cleaned; } /** * 通过方舟 Chat Completions API + knowledge_base metadata 进行知识检索 * 使用独立的 LLM 调用,专门用于知识库检索场景(如语音通话的工具回调) */ static async searchArkKnowledge(query, context = [], responseMode = 'answer', datasetIdsOverride = null, originalQuery = null, assistantProfile = null) { const endpointId = process.env.VOLC_ARK_KNOWLEDGE_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID; const kbModel = process.env.VOLC_ARK_KB_MODEL || endpointId; const authKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID; const kbIds = process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS; if (!endpointId || endpointId === 'your_ark_endpoint_id') { console.warn('[ToolExecutor] searchArkKnowledge skipped: knowledge endpoint not configured'); return { query, results: [{ title: '配置缺失', content: `知识库中暂未找到与"${query}"直接相关的信息,请换个更具体的问法再试。` }], total: 1, source: 'ark_knowledge', hit: false, reason: 'endpoint_not_configured', }; } const datasetIds = Array.isArray(datasetIdsOverride) && datasetIdsOverride.length > 0 ? datasetIdsOverride.map((id) => String(id || '').trim()).filter(Boolean) : kbIds.split(',').map(id => id.trim()).filter(Boolean); const topK = parseInt(process.env.VOLC_ARK_KNOWLEDGE_TOP_K) || 3; const threshold = parseFloat(process.env.VOLC_ARK_KNOWLEDGE_THRESHOLD) || 0.3; // 当 query 为空时(FC 流式 chunks 乱序无法解析),使用简短的默认查询 const effectiveQuery = (query && query.trim()) ? query : '请介绍你们的产品和服务'; if (!query || !query.trim()) { console.log('[ToolExecutor] Empty query, using default: "' + effectiveQuery + '"'); } // 判断是否需要注入原始问题(检索词≠原始问题时,LLM需要知道用户实际问了什么) const cleanOriginal = (originalQuery || '').trim(); const answerTargetQuery = cleanOriginal || effectiveQuery; const hasRewrite = cleanOriginal && cleanOriginal !== effectiveQuery; if (hasRewrite) { console.log(`[ToolExecutor] searchArkKnowledge injecting original question: "${cleanOriginal}" (search query: "${effectiveQuery}")`); } // 提取最近2轮对话作为上下文(最多4条user/assistant消息),减少token量加速生成 const recentContext = context .filter(m => m.role === 'user' || m.role === 'assistant') .slice(responseMode === 'snippet' ? -2 : -4); const baseSnippetPrompt = '知识库片段提取助手。提取2-4条与问题最相关的简洁事实片段。只输出中文事实,不寒暄,不写"根据知识库",不补充未出现的内容,无相关内容则说未找到。'; const baseAnswerPrompt = buildKnowledgeAnswerPrompt(assistantProfile); let systemContent = responseMode === 'snippet' ? baseSnippetPrompt : baseAnswerPrompt; if (responseMode === 'answer' && answerTargetQuery) { systemContent += `\n\n当前必须优先直接回答用户当前这一个问题:“${answerTargetQuery}”。如果用户只问一个维度,例如成分、价格、用法、适合谁、区别、正规性、地址或联系方式,就只回答这个维度,不要扩展成整段产品或公司介绍。`; systemContent += `\n\n${this.buildQuestionSlotInstruction(answerTargetQuery)}`; } if (hasRewrite) { systemContent += `\n\n重要:用户的实际问题是"${cleanOriginal}",请围绕这个问题回答,不要偏离用户的真实意图。下方的检索词仅用于匹配知识库文档,不代表用户的真正提问。`; } const messages = [ { role: 'system', content: systemContent, }, ...recentContext, { role: 'user', content: effectiveQuery, }, ]; if (recentContext.length > 0) { console.log(`[ToolExecutor] Ark KB search with ${recentContext.length} context messages`); } const body = { model: kbModel, messages, metadata: { knowledge_base: { dataset_ids: datasetIds, top_k: topK, threshold: threshold, }, }, stream: false, max_tokens: 80, thinking: { type: 'disabled' }, }; const response = await axios.post( 'https://ark.cn-beijing.volces.com/api/v3/chat/completions', body, { headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${authKey}`, }, timeout: 15000, httpsAgent: kbHttpAgent, } ); const content = response.data?.choices?.[0]?.message?.content || '未找到相关信息'; const classifyQuery = [effectiveQuery, (originalQuery || '').trim()].filter(Boolean).join(' '); const classified = this.classifyKnowledgeAnswer(classifyQuery, content); return { query, results: [{ title: '方舟知识库检索结果', content: classified.reply, }], total: 1, source: 'ark_knowledge', hit: classified.hit, reason: classified.reason, }; } /** * 通过 Coze v3 Chat API 进行知识库检索 * 需要在 Coze 平台创建 Bot 并挂载知识库插件 */ static async searchCozeKnowledge(query) { const apiToken = process.env.COZE_API_TOKEN; const botId = process.env.COZE_BOT_ID; const baseUrl = 'https://api.coze.cn/v3'; const headers = { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiToken}`, }; // 1. 创建对话 const chatRes = await axios.post(`${baseUrl}/chat`, { bot_id: botId, user_id: 'kb_search_user', additional_messages: [ { role: 'user', content: query, content_type: 'text', }, ], stream: true, auto_save_history: false, }, { headers, timeout: 30000 }); const chatData = chatRes.data?.data; if (!chatData?.id || !chatData?.conversation_id) { throw new Error('Coze chat creation failed: ' + JSON.stringify(chatRes.data)); } const chatId = chatData.id; const conversationId = chatData.conversation_id; // 2. 轮询等待完成(最多 30 秒) const maxAttempts = 15; for (let i = 0; i < maxAttempts; i++) { await new Promise(r => setTimeout(r, 2000)); const statusRes = await axios.get( `${baseUrl}/chat/retrieve?chat_id=${chatId}&conversation_id=${conversationId}`, { headers, timeout: 10000 } ); const status = statusRes.data?.data?.status; if (status === 'completed') break; if (status === 'failed' || status === 'requires_action') { throw new Error(`Coze chat ended with status: ${status}`); } } // 3. 获取消息列表 const msgRes = await axios.get( `${baseUrl}/chat/message/list?chat_id=${chatId}&conversation_id=${conversationId}`, { headers, timeout: 10000 } ); const messages = msgRes.data?.data || []; const answerMsg = messages.find(m => m.role === 'assistant' && m.type === 'answer'); const content = answerMsg?.content || '未找到相关信息'; return { query, results: [{ title: 'Coze 知识库检索结果', content: content, }], total: 1, source: 'coze', }; } static async searchLocalKnowledge(query) { const knowledgeBase = { '退货': { title: '退货政策', content: '自签收之日起7天内可无理由退货,15天内可换货。请保持商品及包装完好。退货运费由买家承担(质量问题除外)。', }, '退款': { title: '退款流程', content: '退货审核通过后,退款将在3-5个工作日内原路返回。如超过时间未到账,请联系客服。', }, '配送': { title: '配送说明', content: '默认顺丰快递,普通订单1-3天送达,偏远地区3-7天。满99元免运费。', }, '保修': { title: '保修政策', content: '电子产品保修期1年,自购买之日起计算。人为损坏不在保修范围内。', }, '会员': { title: '会员权益', content: '会员享受9折优惠、免运费、专属客服、生日礼券等权益。年费128元。', }, }; const results = []; const q = query || ''; for (const [key, value] of Object.entries(knowledgeBase)) { if (q.includes(key) || key.includes(q)) { results.push(value); } } if (results.length === 0) { results.push({ title: '搜索结果', content: `未找到与"${query}"直接相关的知识库文档。建议联系人工客服获取更详细的帮助。`, }); } return { query, results, total: results.length, source: 'local' }; } static async getCurrentTime() { const now = new Date(); return { datetime: now.toLocaleString('zh-CN', { timeZone: 'Asia/Shanghai' }), timestamp: now.getTime(), timezone: 'Asia/Shanghai', weekday: ['周日', '周一', '周二', '周三', '周四', '周五', '周六'][now.getDay()], }; } static async calculate({ expression }) { try { // 仅允许数字和基本运算符,防止注入 const sanitized = expression.replace(/[^0-9+\-*/().% ]/g, ''); if (!sanitized || sanitized !== expression.replace(/\s/g, '')) { return { error: '表达式包含不支持的字符', expression }; } const result = Function('"use strict"; return (' + sanitized + ')')(); return { expression, result: Number(result), formatted: String(result) }; } catch (e) { return { error: '计算失败: ' + e.message, expression }; } } } module.exports = ToolExecutor;