153 lines
5.2 KiB
JavaScript
153 lines
5.2 KiB
JavaScript
|
|
const path = require('path');
|
||
|
|
const { performance } = require('perf_hooks');
|
||
|
|
const axios = require('axios');
|
||
|
|
const https = require('https');
|
||
|
|
|
||
|
|
require('dotenv').config({ path: path.join(__dirname, '../.env') });
|
||
|
|
|
||
|
|
const kbHttpAgent = new https.Agent({
|
||
|
|
keepAlive: true,
|
||
|
|
keepAliveMsecs: 30000,
|
||
|
|
maxSockets: 6,
|
||
|
|
timeout: 15000,
|
||
|
|
});
|
||
|
|
|
||
|
|
const directTestQueries = [
|
||
|
|
{ name: 'Q10 Direct', query: 'Q10辅酵素氧修护有什么独特功效' },
|
||
|
|
{ name: 'IB5 Direct', query: 'IB5口腔免疫喷雾如何正确使用' },
|
||
|
|
{ name: 'CC胶囊 Direct', query: 'CC胶囊的主要适用人群有哪些' },
|
||
|
|
{ name: '邓白氏 Direct', query: '德国PM邓白氏认证的具体含义是什么' },
|
||
|
|
{ name: '火炉原理 Direct', query: '请详细阐述一下火炉原理的核心思想' },
|
||
|
|
];
|
||
|
|
|
||
|
|
async function callVikingDirectly(query, datasetIds) {
|
||
|
|
const endpointId = process.env.VOLC_ARK_KNOWLEDGE_ENDPOINT_ID || process.env.VOLC_ARK_ENDPOINT_ID;
|
||
|
|
const authKey = process.env.VOLC_ARK_API_KEY || process.env.VOLC_ACCESS_KEY_ID;
|
||
|
|
const kbIds = datasetIds || (process.env.VOLC_ARK_KNOWLEDGE_BASE_IDS || '').split(',').map(id => id.trim()).filter(Boolean);
|
||
|
|
const topK = parseInt(process.env.VOLC_ARK_KNOWLEDGE_TOP_K) || 3;
|
||
|
|
const threshold = parseFloat(process.env.VOLC_ARK_KNOWLEDGE_THRESHOLD) || 0.4;
|
||
|
|
|
||
|
|
if (!endpointId || !authKey || kbIds.length === 0) {
|
||
|
|
throw new Error('Missing required config');
|
||
|
|
}
|
||
|
|
|
||
|
|
const systemContent = '你是企业知识库问答助手。只依据知识库信息回答,不补充不脑补。';
|
||
|
|
|
||
|
|
const body = {
|
||
|
|
model: endpointId,
|
||
|
|
messages: [
|
||
|
|
{ role: 'system', content: systemContent },
|
||
|
|
{ role: 'user', content: query }
|
||
|
|
],
|
||
|
|
metadata: {
|
||
|
|
knowledge_base: {
|
||
|
|
dataset_ids: kbIds,
|
||
|
|
top_k: topK,
|
||
|
|
threshold: threshold,
|
||
|
|
},
|
||
|
|
},
|
||
|
|
stream: false,
|
||
|
|
max_tokens: 400,
|
||
|
|
};
|
||
|
|
|
||
|
|
const response = await axios.post(
|
||
|
|
'https://ark.cn-beijing.volces.com/api/v3/chat/completions',
|
||
|
|
body,
|
||
|
|
{
|
||
|
|
headers: {
|
||
|
|
'Content-Type': 'application/json',
|
||
|
|
'Authorization': `Bearer ${authKey}`,
|
||
|
|
},
|
||
|
|
timeout: 30000,
|
||
|
|
httpsAgent: kbHttpAgent,
|
||
|
|
}
|
||
|
|
);
|
||
|
|
|
||
|
|
return response.data;
|
||
|
|
}
|
||
|
|
|
||
|
|
async function runDirectAPITest() {
|
||
|
|
console.log('='.repeat(80));
|
||
|
|
console.log('VIKING DIRECT API PERFORMANCE TEST');
|
||
|
|
console.log('(No Query Rewrite, No Cache - Pure API Calls)');
|
||
|
|
console.log('='.repeat(80));
|
||
|
|
console.log('');
|
||
|
|
|
||
|
|
const allResults = [];
|
||
|
|
|
||
|
|
for (const { name, query } of directTestQueries) {
|
||
|
|
console.log(`Testing (Direct): ${name}`);
|
||
|
|
const latencies = [];
|
||
|
|
|
||
|
|
for (let i = 0; i < 3; i++) {
|
||
|
|
const uniqueSuffix = ` [${Date.now()}-${Math.random()}]`;
|
||
|
|
const uniqueQuery = query + uniqueSuffix;
|
||
|
|
|
||
|
|
const start = performance.now();
|
||
|
|
try {
|
||
|
|
const result = await callVikingDirectly(uniqueQuery);
|
||
|
|
const latency = performance.now() - start;
|
||
|
|
latencies.push(latency);
|
||
|
|
|
||
|
|
const content = result?.choices?.[0]?.message?.content || 'N/A';
|
||
|
|
console.log(` Attempt ${i + 1}: ${latency.toFixed(2)}ms, content length=${content.length}`);
|
||
|
|
} catch (e) {
|
||
|
|
console.log(` Attempt ${i + 1} error: ${e.message}`);
|
||
|
|
if (e.response) {
|
||
|
|
console.log(` Status: ${e.response.status}, Data:`, JSON.stringify(e.response.data).substring(0, 200));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (latencies.length > 0) {
|
||
|
|
const avgLatency = latencies.reduce((a, b) => a + b, 0) / latencies.length;
|
||
|
|
const minLatency = Math.min(...latencies);
|
||
|
|
const maxLatency = Math.max(...latencies);
|
||
|
|
|
||
|
|
allResults.push({
|
||
|
|
name,
|
||
|
|
query,
|
||
|
|
avgLatency,
|
||
|
|
minLatency,
|
||
|
|
maxLatency,
|
||
|
|
latencies
|
||
|
|
});
|
||
|
|
|
||
|
|
console.log(` → Avg: ${avgLatency.toFixed(2)}ms, Min: ${minLatency.toFixed(2)}ms, Max: ${maxLatency.toFixed(2)}ms\n`);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log('='.repeat(80));
|
||
|
|
console.log('DIRECT API SUMMARY');
|
||
|
|
console.log('='.repeat(80));
|
||
|
|
|
||
|
|
if (allResults.length > 0) {
|
||
|
|
const totalAvg = allResults.reduce((a, b) => a + b.avgLatency, 0) / allResults.length;
|
||
|
|
const totalMin = Math.min(...allResults.flatMap(r => r.latencies));
|
||
|
|
const totalMax = Math.max(...allResults.flatMap(r => r.latencies));
|
||
|
|
const allLatencies = allResults.flatMap(r => r.latencies);
|
||
|
|
allLatencies.sort((a, b) => a - b);
|
||
|
|
const p50Index = Math.ceil(0.5 * allLatencies.length) - 1;
|
||
|
|
const p95Index = Math.ceil(0.95 * allLatencies.length) - 1;
|
||
|
|
const p99Index = Math.ceil(0.99 * allLatencies.length) - 1;
|
||
|
|
|
||
|
|
console.log(`\nOverall Average Latency: ${totalAvg.toFixed(2)}ms`);
|
||
|
|
console.log(`Overall P50 Latency: ${allLatencies[p50Index].toFixed(2)}ms`);
|
||
|
|
console.log(`Overall P95 Latency: ${allLatencies[p95Index].toFixed(2)}ms`);
|
||
|
|
console.log(`Overall P99 Latency: ${allLatencies[p99Index].toFixed(2)}ms`);
|
||
|
|
console.log(`Overall Min Latency: ${totalMin.toFixed(2)}ms`);
|
||
|
|
console.log(`Overall Max Latency: ${totalMax.toFixed(2)}ms`);
|
||
|
|
|
||
|
|
console.log('\nAll queries sorted by latency:');
|
||
|
|
allResults.sort((a, b) => a.avgLatency - b.avgLatency).forEach((r, i) => {
|
||
|
|
console.log(` ${i + 1}. ${r.name}: ${r.avgLatency.toFixed(2)}ms`);
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
console.log('\n' + '='.repeat(80));
|
||
|
|
|
||
|
|
return allResults;
|
||
|
|
}
|
||
|
|
|
||
|
|
runDirectAPITest().catch(console.error);
|