feat: add shadow AI benchmarking for admin model comparison
Add a new admin-only feature that allows the platform owner to benchmark the production AI model against up to 2 alternate models (any OpenAI-compatible API) using real tenant data, without impacting users. Backend: - Shared AI caller utility (ai-caller.ts) for OpenAI-compatible endpoints - Shadow AI module with service, controller, and 3 entities - 6 admin API endpoints for model config CRUD, run trigger, and history - Auto-creates shadow_ai_models, shadow_runs, shadow_run_results tables - Exposes health-scores and investment-planning prompt builders for reuse Frontend: - New admin page at /admin/shadow-ai with 3 tabs: - Model Configuration (production + 2 alternate slots) - Run Comparison (tenant select, feature select, side-by-side results) - History (filterable run log with detail drill-down) - Full side-by-side output display with diff highlighting - Sidebar navigation link for AI Benchmarking Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
101
backend/src/common/utils/ai-caller.ts
Normal file
101
backend/src/common/utils/ai-caller.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
/**
|
||||
* Shared utility for calling OpenAI-compatible chat completion APIs.
|
||||
* Used by both production AI features and shadow AI benchmarking.
|
||||
*/
|
||||
|
||||
export interface AICallerParams {
|
||||
apiUrl: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
messages: Array<{ role: string; content: string }>;
|
||||
temperature: number;
|
||||
maxTokens: number;
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
export interface AICallerResult {
|
||||
content: string;
|
||||
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
|
||||
responseTimeMs: number;
|
||||
rawResponse: string;
|
||||
}
|
||||
|
||||
export async function callOpenAICompatible(params: AICallerParams): Promise<AICallerResult> {
|
||||
const { apiUrl, apiKey, model, messages, temperature, maxTokens, timeoutMs = 600000 } = params;
|
||||
|
||||
const requestBody = {
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
max_tokens: maxTokens,
|
||||
};
|
||||
|
||||
const bodyString = JSON.stringify(requestBody);
|
||||
const startTime = Date.now();
|
||||
|
||||
const { URL } = await import('url');
|
||||
const https = await import('https');
|
||||
|
||||
const aiResult = await new Promise<{ status: number; body: string }>((resolve, reject) => {
|
||||
const url = new URL(`${apiUrl}/chat/completions`);
|
||||
|
||||
const options = {
|
||||
hostname: url.hostname,
|
||||
port: url.port || 443,
|
||||
path: url.pathname,
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': Buffer.byteLength(bodyString, 'utf-8'),
|
||||
},
|
||||
timeout: timeoutMs,
|
||||
};
|
||||
|
||||
const req = https.request(options, (res) => {
|
||||
let data = '';
|
||||
res.on('data', (chunk) => { data += chunk; });
|
||||
res.on('end', () => {
|
||||
resolve({ status: res.statusCode!, body: data });
|
||||
});
|
||||
});
|
||||
|
||||
req.on('error', (err) => reject(err));
|
||||
req.on('timeout', () => {
|
||||
req.destroy();
|
||||
reject(new Error(`Request timed out after ${timeoutMs / 1000}s`));
|
||||
});
|
||||
|
||||
req.write(bodyString);
|
||||
req.end();
|
||||
});
|
||||
|
||||
const responseTimeMs = Date.now() - startTime;
|
||||
|
||||
if (aiResult.status >= 400) {
|
||||
throw new Error(`AI API returned ${aiResult.status}: ${aiResult.body}`);
|
||||
}
|
||||
|
||||
const data = JSON.parse(aiResult.body);
|
||||
const content = data.choices?.[0]?.message?.content || null;
|
||||
|
||||
if (!content) {
|
||||
throw new Error('AI model returned empty content');
|
||||
}
|
||||
|
||||
// Clean response: strip markdown fences and thinking blocks
|
||||
let cleaned = content.trim();
|
||||
if (cleaned.startsWith('```')) {
|
||||
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
|
||||
}
|
||||
cleaned = cleaned.replace(/<think>[\s\S]*?<\/think>\s*/g, '').trim();
|
||||
|
||||
const usage = data.usage || undefined;
|
||||
|
||||
return {
|
||||
content: cleaned,
|
||||
usage,
|
||||
responseTimeMs,
|
||||
rawResponse: content,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user