feat: add shadow AI benchmarking for admin model comparison

Add a new admin-only feature that allows the platform owner to benchmark
the production AI model against up to 2 alternate models (any OpenAI-compatible
API) using real tenant data, without impacting users.

Backend:
- Shared AI caller utility (ai-caller.ts) for OpenAI-compatible endpoints
- Shadow AI module with service, controller, and 3 entities
- 6 admin API endpoints for model config CRUD, run trigger, and history
- Auto-creates shadow_ai_models, shadow_runs, shadow_run_results tables
- Exposes health-scores and investment-planning prompt builders for reuse

Frontend:
- New admin page at /admin/shadow-ai with 3 tabs:
  - Model Configuration (production + 2 alternate slots)
  - Run Comparison (tenant select, feature select, side-by-side results)
  - History (filterable run log with detail drill-down)
- Full side-by-side output display with diff highlighting
- Sidebar navigation link for AI Benchmarking

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
JoeBot
2026-04-05 07:16:38 -04:00
parent 629d112850
commit 4797669591
16 changed files with 2461 additions and 6 deletions

View File

@@ -0,0 +1,101 @@
/**
* Shared utility for calling OpenAI-compatible chat completion APIs.
* Used by both production AI features and shadow AI benchmarking.
*/
export interface AICallerParams {
apiUrl: string;
apiKey: string;
model: string;
messages: Array<{ role: string; content: string }>;
temperature: number;
maxTokens: number;
timeoutMs?: number;
}
export interface AICallerResult {
content: string;
usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
responseTimeMs: number;
rawResponse: string;
}
export async function callOpenAICompatible(params: AICallerParams): Promise<AICallerResult> {
const { apiUrl, apiKey, model, messages, temperature, maxTokens, timeoutMs = 600000 } = params;
const requestBody = {
model,
messages,
temperature,
max_tokens: maxTokens,
};
const bodyString = JSON.stringify(requestBody);
const startTime = Date.now();
const { URL } = await import('url');
const https = await import('https');
const aiResult = await new Promise<{ status: number; body: string }>((resolve, reject) => {
const url = new URL(`${apiUrl}/chat/completions`);
const options = {
hostname: url.hostname,
port: url.port || 443,
path: url.pathname,
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(bodyString, 'utf-8'),
},
timeout: timeoutMs,
};
const req = https.request(options, (res) => {
let data = '';
res.on('data', (chunk) => { data += chunk; });
res.on('end', () => {
resolve({ status: res.statusCode!, body: data });
});
});
req.on('error', (err) => reject(err));
req.on('timeout', () => {
req.destroy();
reject(new Error(`Request timed out after ${timeoutMs / 1000}s`));
});
req.write(bodyString);
req.end();
});
const responseTimeMs = Date.now() - startTime;
if (aiResult.status >= 400) {
throw new Error(`AI API returned ${aiResult.status}: ${aiResult.body}`);
}
const data = JSON.parse(aiResult.body);
const content = data.choices?.[0]?.message?.content || null;
if (!content) {
throw new Error('AI model returned empty content');
}
// Clean response: strip markdown fences and thinking blocks
let cleaned = content.trim();
if (cleaned.startsWith('```')) {
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
}
cleaned = cleaned.replace(/<think>[\s\S]*?<\/think>\s*/g, '').trim();
const usage = data.usage || undefined;
return {
content: cleaned,
usage,
responseTimeMs,
rawResponse: content,
};
}