feat: add shadow AI benchmarking for admin model comparison

Add a new admin-only feature that allows the platform owner to benchmark the production AI model against up to 2 alternate models (any OpenAI-compatible API) using real tenant data, without impacting users. Backend: - Shared AI caller utility (ai-caller.ts) for OpenAI-compatible endpoints - Shadow AI module with service, controller, and 3 entities - 6 admin API endpoints for model config CRUD, run trigger, and history - Auto-creates shadow_ai_models, shadow_runs, shadow_run_results tables - Exposes health-scores and investment-planning prompt builders for reuse Frontend: - New admin page at /admin/shadow-ai with 3 tabs: - Model Configuration (production + 2 alternate slots) - Run Comparison (tenant select, feature select, side-by-side results) - History (filterable run log with detail drill-down) - Full side-by-side output display with diff highlighting - Sidebar navigation link for AI Benchmarking Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-05 07:16:38 -04:00
parent 629d112850
commit 4797669591
16 changed files with 2461 additions and 6 deletions
--- a/backend/src/common/utils/ai-caller.ts
+++ b/backend/src/common/utils/ai-caller.ts
@@ -0,0 +1,101 @@
+/**
+ * Shared utility for calling OpenAI-compatible chat completion APIs.
+ * Used by both production AI features and shadow AI benchmarking.
+ */
+
+export interface AICallerParams {
+  apiUrl: string;
+  apiKey: string;
+  model: string;
+  messages: Array<{ role: string; content: string }>;
+  temperature: number;
+  maxTokens: number;
+  timeoutMs?: number;
+}
+
+export interface AICallerResult {
+  content: string;
+  usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number };
+  responseTimeMs: number;
+  rawResponse: string;
+}
+
+export async function callOpenAICompatible(params: AICallerParams): Promise<AICallerResult> {
+  const { apiUrl, apiKey, model, messages, temperature, maxTokens, timeoutMs = 600000 } = params;
+
+  const requestBody = {
+    model,
+    messages,
+    temperature,
+    max_tokens: maxTokens,
+  };
+
+  const bodyString = JSON.stringify(requestBody);
+  const startTime = Date.now();
+
+  const { URL } = await import('url');
+  const https = await import('https');
+
+  const aiResult = await new Promise<{ status: number; body: string }>((resolve, reject) => {
+    const url = new URL(`${apiUrl}/chat/completions`);
+
+    const options = {
+      hostname: url.hostname,
+      port: url.port || 443,
+      path: url.pathname,
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${apiKey}`,
+        'Content-Type': 'application/json',
+        'Content-Length': Buffer.byteLength(bodyString, 'utf-8'),
+      },
+      timeout: timeoutMs,
+    };
+
+    const req = https.request(options, (res) => {
+      let data = '';
+      res.on('data', (chunk) => { data += chunk; });
+      res.on('end', () => {
+        resolve({ status: res.statusCode!, body: data });
+      });
+    });
+
+    req.on('error', (err) => reject(err));
+    req.on('timeout', () => {
+      req.destroy();
+      reject(new Error(`Request timed out after ${timeoutMs / 1000}s`));
+    });
+
+    req.write(bodyString);
+    req.end();
+  });
+
+  const responseTimeMs = Date.now() - startTime;
+
+  if (aiResult.status >= 400) {
+    throw new Error(`AI API returned ${aiResult.status}: ${aiResult.body}`);
+  }
+
+  const data = JSON.parse(aiResult.body);
+  const content = data.choices?.[0]?.message?.content || null;
+
+  if (!content) {
+    throw new Error('AI model returned empty content');
+  }
+
+  // Clean response: strip markdown fences and thinking blocks
+  let cleaned = content.trim();
+  if (cleaned.startsWith('```')) {
+    cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, '').replace(/\n?```\s*$/, '');
+  }
+  cleaned = cleaned.replace(/<think>[\s\S]*?<\/think>\s*/g, '').trim();
+
+  const usage = data.usage || undefined;
+
+  return {
+    content: cleaned,
+    usage,
+    responseTimeMs,
+    rawResponse: content,
+  };
+}