feat(server): KB prompt优化、字幕修复、S2S重连、助手配置API

- assistantProfileConfig: KB answer prompt改为分层策略(严格产品信息+灵活常识补充) - nativeVoiceGateway: S2S upstream自动重连(最多50次)、event 351字幕debounce(800ms取最长文本) - toolExecutor: 确定性query改写增强、KB查询传递session上下文 - contextKeywordTracker: 支持KB话题记忆优先enrichment - contentSafeGuard: 新增品牌安全内容过滤服务 - assistantProfileService: 新增助手配置CRUD服务 - routes/assistantProfile: 新增助手配置API路由 - knowledgeKeywords: 扩展KB关键词词典 - fastAsrCorrector: ASR纠错规则更新 - tests/: KB prompt测试、保护窗口测试、Viking性能测试 - docs/: 助手配置API文档、系统提示词目录
2026-03-24 17:19:36 +08:00
parent 57a03677a9
commit 9567eb7358
34 changed files with 7076 additions and 46 deletions
--- a/test2/server/tests/viking_retrieval_performance.js
+++ b/test2/server/tests/viking_retrieval_performance.js
@@ -0,0 +1,349 @@
+const fs = require('fs');
+const path = require('path');
+const { performance } = require('perf_hooks');
+
+require('dotenv').config({ path: path.join(__dirname, '../.env') });
+
+const ToolExecutor = require('../services/toolExecutor');
+
+class VikingRetrievalPerformanceTester {
+  constructor(options = {}) {
+    this.results = [];
+    this.outputDir = options.outputDir || path.join(__dirname, 'test_results');
+    this.verbose = options.verbose !== false;
+    this.warmupRuns = options.warmupRuns || 2;
+
+    if (!fs.existsSync(this.outputDir)) {
+      fs.mkdirSync(this.outputDir, { recursive: true });
+    }
+  }
+
+  log(msg) {
+    if (this.verbose) {
+      console.log(`[VikingTest] ${msg}`);
+    }
+  }
+
+  async warmup(queries) {
+    this.log('Warming up...');
+    for (let i = 0; i < this.warmupRuns; i++) {
+      for (const query of queries) {
+        try {
+          await ToolExecutor.searchKnowledge({ query, response_mode: 'answer' }, []);
+        } catch (e) {
+          // ignore warmup errors
+        }
+      }
+    }
+    this.log('Warmup complete');
+  }
+
+  async testLatency(testQueries, iterations = 10) {
+    this.log(`Starting latency test with ${iterations} iterations...`);
+    
+    const results = {};
+    
+    for (const { name, query } of testQueries) {
+      this.log(`Testing query: ${name}`);
+      const latencies = [];
+      const hits = [];
+      
+      for (let i = 0; i < iterations; i++) {
+        const start = performance.now();
+        let result;
+        try {
+          result = await ToolExecutor.searchKnowledge({ query, response_mode: 'answer' }, []);
+          const latency = performance.now() - start;
+          latencies.push(latency);
+          hits.push(!!result.hit);
+          this.log(`  Iteration ${i + 1}: ${latency.toFixed(2)}ms, hit=${result.hit}`);
+        } catch (e) {
+          this.log(`  Iteration ${i + 1} error: ${e.message}`);
+        }
+      }
+      
+      results[name] = {
+        query,
+        latencies,
+        hits,
+        avgLatency: latencies.length ? latencies.reduce((a, b) => a + b, 0) / latencies.length : 0,
+        minLatency: latencies.length ? Math.min(...latencies) : 0,
+        maxLatency: latencies.length ? Math.max(...latencies) : 0,
+        p50Latency: this.percentile(latencies, 50),
+        p95Latency: this.percentile(latencies, 95),
+        p99Latency: this.percentile(latencies, 99),
+        hitRate: hits.length ? hits.filter(h => h).length / hits.length : 0
+      };
+    }
+    
+    this.results.push({
+      type: 'latency',
+      timestamp: new Date().toISOString(),
+      iterations,
+      results
+    });
+    
+    return results;
+  }
+
+  async testCacheEfficiency(queries, cacheHitsIterations = 5) {
+    this.log('Testing cache efficiency...');
+    
+    const results = [];
+    
+    for (const { name, query } of queries) {
+      this.log(`Testing cache for query: ${name}`);
+      
+      const firstStart = performance.now();
+      const firstResult = await ToolExecutor.searchKnowledge({ query, response_mode: 'answer' }, []);
+      const firstLatency = performance.now() - firstStart;
+      
+      const cacheLatencies = [];
+      for (let i = 0; i < cacheHitsIterations; i++) {
+        const start = performance.now();
+        const result = await ToolExecutor.searchKnowledge({ query, response_mode: 'answer' }, []);
+        const latency = performance.now() - start;
+        cacheLatencies.push(latency);
+        this.log(`  Cache hit ${i + 1}: ${latency.toFixed(2)}ms, cache_hit=${!!result.cache_hit}`);
+      }
+      
+      const avgCacheLatency = cacheLatencies.reduce((a, b) => a + b, 0) / cacheLatencies.length;
+      
+      results.push({
+        name,
+        query,
+        firstHitLatency: firstLatency,
+        cacheHitLatencies,
+        avgCacheLatency,
+        speedup: firstLatency / avgCacheLatency,
+        firstHit: firstResult
+      });
+    }
+    
+    this.results.push({
+      type: 'cache',
+      timestamp: new Date().toISOString(),
+      cacheHitsIterations,
+      results
+    });
+    
+    return results;
+  }
+
+  async testConcurrency(queries, concurrencyLevels = [1, 5, 10, 20]) {
+    this.log('Testing concurrency...');
+    
+    const results = {};
+    
+    for (const concurrency of concurrencyLevels) {
+      this.log(`Testing concurrency level: ${concurrency}`);
+      
+      const startTime = performance.now();
+      const promises = [];
+      
+      for (let i = 0; i < concurrency; i++) {
+        const queryObj = queries[i % queries.length];
+        promises.push(
+          ToolExecutor.searchKnowledge({ query: queryObj.query, response_mode: 'answer' }, [])
+        );
+      }
+      
+      const allResults = await Promise.all(promises);
+      const totalTime = performance.now() - startTime;
+      
+      const successCount = allResults.filter(r => r && !r.error).length;
+      const latencies = allResults.map((r, i) => {
+        return totalTime / concurrency;
+      });
+      
+      results[concurrency] = {
+        concurrency,
+        totalTime,
+        throughput: concurrency / (totalTime / 1000),
+        successRate: successCount / concurrency,
+        results: allResults
+      };
+      
+      this.log(`  Throughput: ${results[concurrency].throughput.toFixed(2)} req/s`);
+      this.log(`  Success rate: ${(results[concurrency].successRate * 100).toFixed(1)}%`);
+    }
+    
+    this.results.push({
+      type: 'concurrency',
+      timestamp: new Date().toISOString(),
+      concurrencyLevels,
+      results
+    });
+    
+    return results;
+  }
+
+  async testQueryTypes(queryGroups) {
+    this.log('Testing different query types...');
+    
+    const results = {};
+    
+    for (const [groupName, queries] of Object.entries(queryGroups)) {
+      this.log(`Testing group: ${groupName}`);
+      
+      const groupResults = [];
+      
+      for (const query of queries) {
+        const start = performance.now();
+        const result = await ToolExecutor.searchKnowledge({ query, response_mode: 'answer' }, []);
+        const latency = performance.now() - start;
+        
+        groupResults.push({
+          query,
+          latency,
+          hit: !!result.hit,
+          result
+        });
+      }
+      
+      results[groupName] = {
+        queries: groupResults,
+        avgLatency: groupResults.reduce((a, b) => a + b.latency, 0) / groupResults.length,
+        hitRate: groupResults.filter(r => r.hit).length / groupResults.length
+      };
+    }
+    
+    this.results.push({
+      type: 'query_types',
+      timestamp: new Date().toISOString(),
+      results
+    });
+    
+    return results;
+  }
+
+  percentile(arr, p) {
+    if (arr.length === 0) return 0;
+    const sorted = [...arr].sort((a, b) => a - b);
+    const index = Math.ceil((p / 100) * sorted.length) - 1;
+    return sorted[Math.max(0, index)];
+  }
+
+  generateReport() {
+    const report = {
+      generatedAt: new Date().toISOString(),
+      summary: {},
+      tests: this.results
+    };
+
+    for (const test of this.results) {
+      if (test.type === 'latency') {
+        report.summary.latency = Object.fromEntries(
+          Object.entries(test.results).map(([name, data]) => [
+            name,
+            {
+              avg: data.avgLatency.toFixed(2),
+              p95: data.p95Latency.toFixed(2),
+              hitRate: (data.hitRate * 100).toFixed(1) + '%'
+            }
+          ])
+        );
+      } else if (test.type === 'cache') {
+        report.summary.cache = test.results.map(r => ({
+          name: r.name,
+          speedup: r.speedup.toFixed(2) + 'x'
+        }));
+      } else if (test.type === 'concurrency') {
+        report.summary.concurrency = Object.fromEntries(
+          Object.entries(test.results).map(([level, data]) => [
+            level,
+            {
+              throughput: data.throughput.toFixed(2) + ' req/s',
+              successRate: (data.successRate * 100).toFixed(1) + '%'
+            }
+          ])
+        );
+      }
+    }
+
+    return report;
+  }
+
+  saveReport(filename = null) {
+    const report = this.generateReport();
+    const filepath = path.join(
+      this.outputDir,
+      filename || `viking_performance_${Date.now()}.json`
+    );
+    fs.writeFileSync(filepath, JSON.stringify(report, null, 2));
+    this.log(`Report saved to ${filepath}`);
+    return filepath;
+  }
+
+  printSummary() {
+    console.log('\n' + '='.repeat(80));
+    console.log('VIKING RETRIEVAL PERFORMANCE TEST SUMMARY');
+    console.log('='.repeat(80));
+
+    const report = this.generateReport();
+
+    if (report.summary.latency) {
+      console.log('\n--- Latency Test ---');
+      for (const [name, data] of Object.entries(report.summary.latency)) {
+        console.log(`  ${name}:`);
+        console.log(`    Avg: ${data.avg}ms, P95: ${data.p95}ms, Hit Rate: ${data.hitRate}`);
+      }
+    }
+
+    if (report.summary.cache) {
+      console.log('\n--- Cache Efficiency ---');
+      for (const r of report.summary.cache) {
+        console.log(`  ${r.name}: Speedup ${r.speedup}`);
+      }
+    }
+
+    if (report.summary.concurrency) {
+      console.log('\n--- Concurrency Test ---');
+      for (const [level, data] of Object.entries(report.summary.concurrency)) {
+        console.log(`  ${level} concurrent:`);
+        console.log(`    Throughput: ${data.throughput}, Success: ${data.successRate}`);
+      }
+    }
+
+    console.log('\n' + '='.repeat(80));
+  }
+
+  async runFullSuite() {
+    this.log('Starting full Viking retrieval performance test suite...');
+
+    const testQueries = [
+      { name: 'Product Query - Xiaohong', query: '小红产品有什么功效' },
+      { name: 'Product Query - Dabai', query: '大白产品怎么吃' },
+      { name: 'Company Info', query: '德国PM公司介绍' },
+      { name: 'NTC Technology', query: 'NTC营养保送系统原理' },
+      { name: 'Hot Answer', query: '基础三合一怎么吃' },
+      { name: 'No Hit Query', query: '今天天气怎么样' }
+    ];
+
+    await this.warmup(testQueries.map(q => q.query));
+    await this.testLatency(testQueries, 10);
+    await this.testCacheEfficiency(testQueries.slice(0, 3), 5);
+    await this.testConcurrency(testQueries.slice(0, 3), [1, 3, 5]);
+
+    const queryGroups = {
+      product: ['小红有什么功效', '大白怎么吃', '小白的作用'],
+      company: ['PM公司介绍', '邓白氏认证', '总部在哪里'],
+      technical: ['NTC原理', '火炉原理', '好转反应']
+    };
+    await this.testQueryTypes(queryGroups);
+
+    this.printSummary();
+    this.saveReport();
+
+    return this.results;
+  }
+}
+
+module.exports = VikingRetrievalPerformanceTester;
+
+if (require.main === module) {
+  (async () => {
+    const tester = new VikingRetrievalPerformanceTester();
+    await tester.runFullSuite();
+  })();
+}