feat: add shadow AI benchmarking for admin model comparison

Add a new admin-only feature that allows the platform owner to benchmark the production AI model against up to 2 alternate models (any OpenAI-compatible API) using real tenant data, without impacting users. Backend: - Shared AI caller utility (ai-caller.ts) for OpenAI-compatible endpoints - Shadow AI module with service, controller, and 3 entities - 6 admin API endpoints for model config CRUD, run trigger, and history - Auto-creates shadow_ai_models, shadow_runs, shadow_run_results tables - Exposes health-scores and investment-planning prompt builders for reuse Frontend: - New admin page at /admin/shadow-ai with 3 tabs: - Model Configuration (production + 2 alternate slots) - Run Comparison (tenant select, feature select, side-by-side results) - History (filterable run log with detail drill-down) - Full side-by-side output display with diff highlighting - Sidebar navigation link for AI Benchmarking Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-05 07:16:38 -04:00
parent 629d112850
commit 4797669591
16 changed files with 2461 additions and 6 deletions
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -30,6 +30,7 @@ import { UserPreferencesPage } from './pages/preferences/UserPreferencesPage';
 import { OrgMembersPage } from './pages/org-members/OrgMembersPage';
 import { AdminPage } from './pages/admin/AdminPage';
 import { AdminIdeasPage } from './pages/admin/AdminIdeasPage';
+import { AdminShadowAiPage } from './pages/admin/AdminShadowAiPage';
 import { AssessmentGroupsPage } from './pages/assessment-groups/AssessmentGroupsPage';
 import { CashFlowForecastPage } from './pages/cash-flow/CashFlowForecastPage';
 import { MonthlyActualsPage } from './pages/monthly-actuals/MonthlyActualsPage';
@@ -135,6 +136,7 @@ export function App() {
      >
        <Route index element={<AdminPage />} />
        <Route path="ideas" element={<AdminIdeasPage />} />
+        <Route path="shadow-ai" element={<AdminShadowAiPage />} />
      </Route>

      {/* Main app routes (require auth + org) */}
--- a/frontend/src/components/layout/Sidebar.tsx
+++ b/frontend/src/components/layout/Sidebar.tsx
@@ -140,6 +140,13 @@ export function Sidebar({ onNavigate }: SidebarProps) {
          onClick={() => go('/admin/ideas')}
          color="yellow"
        />
+        <NavLink
+          label="AI Benchmarking"
+          leftSection={<IconScale size={18} />}
+          active={location.pathname === '/admin/shadow-ai'}
+          onClick={() => go('/admin/shadow-ai')}
+          color="violet"
+        />
        {organizations && organizations.length > 0 && (
          <>
            <Divider my="sm" />
@@ -245,6 +252,13 @@ export function Sidebar({ onNavigate }: SidebarProps) {
            onClick={() => go('/admin/ideas')}
            color="yellow"
          />
+          <NavLink
+            label="AI Benchmarking"
+            leftSection={<IconScale size={18} />}
+            active={location.pathname === '/admin/shadow-ai'}
+            onClick={() => go('/admin/shadow-ai')}
+            color="violet"
+          />
        </>
      )}
    </ScrollArea>
--- a/frontend/src/pages/admin/AdminShadowAiPage.tsx
+++ b/frontend/src/pages/admin/AdminShadowAiPage.tsx
@@ -0,0 +1,780 @@
+import { useState, useEffect } from 'react';
+import {
+  Title, Text, Card, SimpleGrid, Group, Stack, Badge, Loader, Center,
+  Tabs, TextInput, Button, PasswordInput, Select, Table, Accordion,
+  Switch, Paper, RingProgress, Divider, Alert, Code, ScrollArea, Box,
+  Tooltip, ActionIcon,
+} from '@mantine/core';
+import {
+  IconScale, IconSettings, IconPlayerPlay, IconHistory,
+  IconCheck, IconX, IconAlertTriangle, IconClock, IconTrash,
+  IconRefresh, IconArrowRight, IconChevronDown,
+} from '@tabler/icons-react';
+import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
+import api from '../../services/api';
+
+// ── Interfaces ──
+
+interface ShadowModel {
+  id: string;
+  slot: string;
+  name: string;
+  api_url: string;
+  api_key: string;
+  model_name: string;
+  is_active: boolean;
+  created_at: string;
+  updated_at: string;
+}
+
+interface ShadowRunResult {
+  id: string;
+  run_id: string;
+  model_role: string;
+  model_name: string;
+  api_url: string;
+  raw_response: string;
+  parsed_response: any;
+  response_time_ms: number;
+  token_usage: any;
+  status: string;
+  error_message: string;
+  created_at: string;
+}
+
+interface ShadowRun {
+  id: string;
+  tenant_id: string;
+  tenant_name: string;
+  feature: string;
+  status: string;
+  prompt_messages: any;
+  started_at: string;
+  completed_at: string;
+  created_at: string;
+  results: ShadowRunResult[];
+  result_count?: string;
+  success_count?: string;
+}
+
+interface AdminOrg {
+  id: string;
+  name: string;
+  status: string;
+}
+
+// ── Helper Functions ──
+
+const featureLabels: Record<string, string> = {
+  operating_health: 'Operating Health',
+  reserve_health: 'Reserve Health',
+  investment_recommendations: 'Investment Recommendations',
+};
+
+const roleLabels: Record<string, string> = {
+  production: 'Production',
+  alternate_a: 'Alternate A',
+  alternate_b: 'Alternate B',
+};
+
+const statusColor: Record<string, string> = {
+  running: 'blue',
+  completed: 'green',
+  partial: 'yellow',
+  failed: 'red',
+  pending: 'gray',
+  success: 'green',
+  error: 'red',
+};
+
+function formatDuration(ms: number | null): string {
+  if (!ms) return '-';
+  if (ms < 1000) return `${ms}ms`;
+  return `${(ms / 1000).toFixed(1)}s`;
+}
+
+function formatDate(d: string): string {
+  if (!d) return '-';
+  return new Date(d).toLocaleString();
+}
+
+// ── Model Configuration Tab ──
+
+function ModelConfigTab() {
+  const queryClient = useQueryClient();
+  const { data: models, isLoading } = useQuery<ShadowModel[]>({
+    queryKey: ['shadow-ai-models'],
+    queryFn: () => api.get('/admin/shadow-ai/models').then((r) => r.data),
+  });
+
+  const modelA = models?.find((m) => m.slot === 'A');
+  const modelB = models?.find((m) => m.slot === 'B');
+
+  return (
+    <Stack>
+      <Text size="sm" c="dimmed">
+        Configure alternate AI models to benchmark against the production model.
+        Each model can use any OpenAI-compatible API endpoint.
+      </Text>
+      <SimpleGrid cols={{ base: 1, md: 3 }}>
+        <ProductionModelCard />
+        <ModelSlotCard slot="A" model={modelA} isLoading={isLoading} />
+        <ModelSlotCard slot="B" model={modelB} isLoading={isLoading} />
+      </SimpleGrid>
+    </Stack>
+  );
+}
+
+function ProductionModelCard() {
+  return (
+    <Card withBorder shadow="sm">
+      <Stack gap="sm">
+        <Group justify="space-between">
+          <Text fw={600}>Production Model</Text>
+          <Badge color="green" variant="light">Active</Badge>
+        </Group>
+        <Divider />
+        <Text size="sm" c="dimmed">Configured via environment variables</Text>
+        <TextInput label="Model" value="(from AI_MODEL env var)" readOnly disabled size="sm" />
+        <TextInput label="API URL" value="(from AI_API_URL env var)" readOnly disabled size="sm" />
+        <Text size="xs" c="dimmed" mt="xs">
+          Production model settings are managed through server environment
+          variables and cannot be changed from the UI.
+        </Text>
+      </Stack>
+    </Card>
+  );
+}
+
+function ModelSlotCard({ slot, model, isLoading }: { slot: string; model?: ShadowModel; isLoading: boolean }) {
+  const queryClient = useQueryClient();
+  const [name, setName] = useState('');
+  const [apiUrl, setApiUrl] = useState('');
+  const [apiKey, setApiKey] = useState('');
+  const [modelName, setModelName] = useState('');
+  const [isActive, setIsActive] = useState(true);
+
+  useEffect(() => {
+    if (model) {
+      setName(model.name);
+      setApiUrl(model.api_url);
+      setApiKey(model.api_key);
+      setModelName(model.model_name);
+      setIsActive(model.is_active);
+    }
+  }, [model]);
+
+  const saveMutation = useMutation({
+    mutationFn: () => api.put(`/admin/shadow-ai/models/${slot}`, { name, apiUrl, apiKey, modelName, isActive }),
+    onSuccess: () => queryClient.invalidateQueries({ queryKey: ['shadow-ai-models'] }),
+  });
+
+  const deleteMutation = useMutation({
+    mutationFn: () => api.delete(`/admin/shadow-ai/models/${slot}`),
+    onSuccess: () => {
+      setName(''); setApiUrl(''); setApiKey(''); setModelName(''); setIsActive(true);
+      queryClient.invalidateQueries({ queryKey: ['shadow-ai-models'] });
+    },
+  });
+
+  if (isLoading) return <Card withBorder shadow="sm"><Center h={200}><Loader size="sm" /></Center></Card>;
+
+  return (
+    <Card withBorder shadow="sm">
+      <Stack gap="sm">
+        <Group justify="space-between">
+          <Text fw={600}>Alternate {slot}</Text>
+          {model ? (
+            <Badge color={isActive ? 'blue' : 'gray'} variant="light">
+              {isActive ? 'Active' : 'Inactive'}
+            </Badge>
+          ) : (
+            <Badge color="gray" variant="light">Not configured</Badge>
+          )}
+        </Group>
+        <Divider />
+        <TextInput label="Display Name" placeholder="e.g. GPT-4o" value={name} onChange={(e) => setName(e.target.value)} size="sm" />
+        <TextInput label="API URL" placeholder="https://api.openai.com/v1" value={apiUrl} onChange={(e) => setApiUrl(e.target.value)} size="sm" />
+        <PasswordInput label="API Key" placeholder="sk-..." value={apiKey} onChange={(e) => setApiKey(e.target.value)} size="sm" />
+        <TextInput label="Model Name" placeholder="gpt-4o" value={modelName} onChange={(e) => setModelName(e.target.value)} size="sm" />
+        <Switch label="Active" checked={isActive} onChange={(e) => setIsActive(e.currentTarget.checked)} />
+        <Group>
+          <Button
+            size="sm"
+            onClick={() => saveMutation.mutate()}
+            loading={saveMutation.isPending}
+            disabled={!name || !apiUrl || !apiKey || !modelName}
+          >
+            Save
+          </Button>
+          {model && (
+            <Button size="sm" color="red" variant="light" onClick={() => deleteMutation.mutate()} loading={deleteMutation.isPending}>
+              <IconTrash size={16} />
+            </Button>
+          )}
+        </Group>
+        {saveMutation.isError && <Text size="xs" c="red">Failed to save</Text>}
+        {saveMutation.isSuccess && <Text size="xs" c="green">Saved</Text>}
+      </Stack>
+    </Card>
+  );
+}
+
+// ── Run Comparison Tab ──
+
+function RunComparisonTab() {
+  const queryClient = useQueryClient();
+  const [tenantId, setTenantId] = useState<string | null>(null);
+  const [feature, setFeature] = useState<string | null>(null);
+  const [activeRunId, setActiveRunId] = useState<string | null>(null);
+
+  const { data: orgs } = useQuery<AdminOrg[]>({
+    queryKey: ['admin-orgs'],
+    queryFn: () => api.get('/admin/organizations').then((r) => r.data),
+  });
+
+  const triggerMutation = useMutation({
+    mutationFn: () => api.post('/admin/shadow-ai/runs', { tenantId, feature }),
+    onSuccess: (res) => {
+      setActiveRunId(res.data.runId);
+    },
+  });
+
+  const { data: activeRun } = useQuery<ShadowRun>({
+    queryKey: ['shadow-ai-run', activeRunId],
+    queryFn: () => api.get(`/admin/shadow-ai/runs/${activeRunId}`).then((r) => r.data),
+    enabled: !!activeRunId,
+    refetchInterval: (query) => {
+      const run = query.state.data;
+      return run?.status === 'running' ? 3000 : false;
+    },
+  });
+
+  const orgOptions = (orgs || [])
+    .filter((o) => o.status === 'active')
+    .map((o) => ({ value: o.id, label: o.name }));
+
+  const featureOptions = [
+    { value: 'operating_health', label: 'Operating Health Score' },
+    { value: 'reserve_health', label: 'Reserve Health Score' },
+    { value: 'investment_recommendations', label: 'Investment Recommendations' },
+  ];
+
+  return (
+    <Stack>
+      <Card withBorder shadow="sm">
+        <Stack gap="md">
+          <Text fw={600}>Run Shadow Comparison</Text>
+          <SimpleGrid cols={{ base: 1, sm: 3 }}>
+            <Select
+              label="Tenant"
+              placeholder="Select a tenant"
+              data={orgOptions}
+              value={tenantId}
+              onChange={setTenantId}
+              searchable
+            />
+            <Select
+              label="AI Feature"
+              placeholder="Select feature"
+              data={featureOptions}
+              value={feature}
+              onChange={setFeature}
+            />
+            <Stack justify="flex-end">
+              <Button
+                leftSection={<IconPlayerPlay size={16} />}
+                onClick={() => triggerMutation.mutate()}
+                loading={triggerMutation.isPending}
+                disabled={!tenantId || !feature}
+              >
+                Run Comparison
+              </Button>
+            </Stack>
+          </SimpleGrid>
+          {triggerMutation.isError && (
+            <Alert color="red" icon={<IconAlertTriangle size={16} />}>
+              Failed to start comparison. Ensure at least one alternate model is configured.
+            </Alert>
+          )}
+        </Stack>
+      </Card>
+
+      {activeRun && (
+        <Card withBorder shadow="sm">
+          <Stack gap="md">
+            <Group justify="space-between">
+              <Group>
+                <Text fw={600}>
+                  {featureLabels[activeRun.feature] || activeRun.feature}
+                </Text>
+                <Badge color={statusColor[activeRun.status]}>{activeRun.status}</Badge>
+              </Group>
+              {activeRun.tenant_name && (
+                <Text size="sm" c="dimmed">Tenant: {activeRun.tenant_name}</Text>
+              )}
+            </Group>
+
+            {activeRun.status === 'running' && (
+              <Center py="md">
+                <Stack align="center" gap="xs">
+                  <Loader size="md" />
+                  <Text size="sm" c="dimmed">Running models... This may take a few minutes.</Text>
+                  <Group gap="xs">
+                    {(activeRun.results || []).map((r) => (
+                      <Badge key={r.model_role} color={statusColor[r.status]} variant="light">
+                        {roleLabels[r.model_role]}: {r.status}
+                      </Badge>
+                    ))}
+                  </Group>
+                </Stack>
+              </Center>
+            )}
+
+            {activeRun.status !== 'running' && activeRun.results && (
+              <ComparisonResults results={activeRun.results} feature={activeRun.feature} />
+            )}
+          </Stack>
+        </Card>
+      )}
+    </Stack>
+  );
+}
+
+// ── History Tab ──
+
+function HistoryTab() {
+  const [selectedRunId, setSelectedRunId] = useState<string | null>(null);
+  const [tenantFilter, setTenantFilter] = useState<string | null>(null);
+  const [featureFilter, setFeatureFilter] = useState<string | null>(null);
+
+  const { data: orgs } = useQuery<AdminOrg[]>({
+    queryKey: ['admin-orgs'],
+    queryFn: () => api.get('/admin/organizations').then((r) => r.data),
+  });
+
+  const { data: historyData, isLoading } = useQuery({
+    queryKey: ['shadow-ai-runs', tenantFilter, featureFilter],
+    queryFn: () => {
+      const params = new URLSearchParams();
+      if (tenantFilter) params.set('tenantId', tenantFilter);
+      if (featureFilter) params.set('feature', featureFilter);
+      params.set('limit', '50');
+      return api.get(`/admin/shadow-ai/runs?${params}`).then((r) => r.data);
+    },
+  });
+
+  const { data: selectedRun } = useQuery<ShadowRun>({
+    queryKey: ['shadow-ai-run', selectedRunId],
+    queryFn: () => api.get(`/admin/shadow-ai/runs/${selectedRunId}`).then((r) => r.data),
+    enabled: !!selectedRunId,
+  });
+
+  const orgOptions = [
+    { value: '', label: 'All Tenants' },
+    ...(orgs || []).map((o) => ({ value: o.id, label: o.name })),
+  ];
+
+  const featureOptions = [
+    { value: '', label: 'All Features' },
+    { value: 'operating_health', label: 'Operating Health' },
+    { value: 'reserve_health', label: 'Reserve Health' },
+    { value: 'investment_recommendations', label: 'Investment Recommendations' },
+  ];
+
+  const runs: ShadowRun[] = historyData?.runs || [];
+
+  return (
+    <Stack>
+      <Group>
+        <Select
+          size="sm"
+          placeholder="Filter by tenant"
+          data={orgOptions}
+          value={tenantFilter || ''}
+          onChange={(v) => setTenantFilter(v || null)}
+          clearable
+          w={200}
+        />
+        <Select
+          size="sm"
+          placeholder="Filter by feature"
+          data={featureOptions}
+          value={featureFilter || ''}
+          onChange={(v) => setFeatureFilter(v || null)}
+          clearable
+          w={200}
+        />
+      </Group>
+
+      {isLoading ? (
+        <Center py="xl"><Loader /></Center>
+      ) : runs.length === 0 ? (
+        <Text c="dimmed" ta="center" py="xl">No shadow runs found.</Text>
+      ) : (
+        <Table striped highlightOnHover>
+          <Table.Thead>
+            <Table.Tr>
+              <Table.Th>Date</Table.Th>
+              <Table.Th>Tenant</Table.Th>
+              <Table.Th>Feature</Table.Th>
+              <Table.Th>Status</Table.Th>
+              <Table.Th>Models</Table.Th>
+              <Table.Th>Duration</Table.Th>
+              <Table.Th></Table.Th>
+            </Table.Tr>
+          </Table.Thead>
+          <Table.Tbody>
+            {runs.map((run) => {
+              const duration = run.completed_at && run.started_at
+                ? new Date(run.completed_at).getTime() - new Date(run.started_at).getTime()
+                : null;
+              return (
+                <Table.Tr
+                  key={run.id}
+                  style={{ cursor: 'pointer' }}
+                  onClick={() => setSelectedRunId(run.id)}
+                  bg={selectedRunId === run.id ? 'var(--mantine-color-blue-light)' : undefined}
+                >
+                  <Table.Td>{formatDate(run.created_at)}</Table.Td>
+                  <Table.Td>{run.tenant_name || '-'}</Table.Td>
+                  <Table.Td>{featureLabels[run.feature] || run.feature}</Table.Td>
+                  <Table.Td><Badge color={statusColor[run.status]} size="sm">{run.status}</Badge></Table.Td>
+                  <Table.Td>{run.success_count || '0'}/{run.result_count || '0'}</Table.Td>
+                  <Table.Td>{formatDuration(duration)}</Table.Td>
+                  <Table.Td><IconArrowRight size={14} /></Table.Td>
+                </Table.Tr>
+              );
+            })}
+          </Table.Tbody>
+        </Table>
+      )}
+
+      {selectedRun && selectedRun.results && (
+        <Card withBorder shadow="sm" mt="md">
+          <Stack gap="md">
+            <Group justify="space-between">
+              <Group>
+                <Text fw={600}>{featureLabels[selectedRun.feature] || selectedRun.feature}</Text>
+                <Badge color={statusColor[selectedRun.status]}>{selectedRun.status}</Badge>
+              </Group>
+              <Text size="sm" c="dimmed">
+                {selectedRun.tenant_name} | {formatDate(selectedRun.created_at)}
+              </Text>
+            </Group>
+            <ComparisonResults results={selectedRun.results} feature={selectedRun.feature} />
+          </Stack>
+        </Card>
+      )}
+    </Stack>
+  );
+}
+
+// ── Comparison Results Component ──
+
+function ComparisonResults({ results, feature }: { results: ShadowRunResult[]; feature: string }) {
+  const isHealthScore = feature === 'operating_health' || feature === 'reserve_health';
+
+  // Collect all parsed values for diff highlighting
+  const parsedValues = results
+    .filter((r) => r.status === 'success' && r.parsed_response)
+    .map((r) => r.parsed_response);
+
+  return (
+    <SimpleGrid cols={{ base: 1, md: Math.min(results.length, 3) }}>
+      {results.map((result) => (
+        <ResultCard
+          key={result.model_role}
+          result={result}
+          isHealthScore={isHealthScore}
+          allParsed={parsedValues}
+        />
+      ))}
+    </SimpleGrid>
+  );
+}
+
+function ResultCard({
+  result,
+  isHealthScore,
+  allParsed,
+}: {
+  result: ShadowRunResult;
+  isHealthScore: boolean;
+  allParsed: any[];
+}) {
+  const roleColor: Record<string, string> = {
+    production: 'green',
+    alternate_a: 'blue',
+    alternate_b: 'violet',
+  };
+
+  return (
+    <Card withBorder shadow="xs" padding="md">
+      <Stack gap="sm">
+        <Group justify="space-between">
+          <Group gap="xs">
+            <Badge color={roleColor[result.model_role] || 'gray'} variant="filled">
+              {roleLabels[result.model_role]}
+            </Badge>
+          </Group>
+          <Badge
+            color={statusColor[result.status]}
+            variant="light"
+            leftSection={result.status === 'success' ? <IconCheck size={12} /> : result.status === 'error' ? <IconX size={12} /> : <IconClock size={12} />}
+          >
+            {result.status}
+          </Badge>
+        </Group>
+
+        <Text size="xs" c="dimmed" truncate>{result.model_name}</Text>
+
+        {result.response_time_ms && (
+          <Badge color="gray" variant="light" size="sm">
+            {formatDuration(result.response_time_ms)}
+          </Badge>
+        )}
+
+        {result.token_usage && (
+          <Text size="xs" c="dimmed">
+            Tokens: {result.token_usage.prompt_tokens || '?'} prompt / {result.token_usage.completion_tokens || '?'} completion
+          </Text>
+        )}
+
+        <Divider />
+
+        {result.status === 'error' && (
+          <Alert color="red" icon={<IconAlertTriangle size={16} />}>
+            <Text size="sm">{result.error_message || 'Unknown error'}</Text>
+          </Alert>
+        )}
+
+        {result.status === 'success' && result.parsed_response && (
+          isHealthScore
+            ? <HealthScoreDisplay data={result.parsed_response} allParsed={allParsed} />
+            : <InvestmentDisplay data={result.parsed_response} allParsed={allParsed} />
+        )}
+
+        {result.status === 'success' && (
+          <Accordion variant="contained">
+            <Accordion.Item value="raw">
+              <Accordion.Control>
+                <Text size="xs">Raw JSON Response</Text>
+              </Accordion.Control>
+              <Accordion.Panel>
+                <ScrollArea h={300}>
+                  <Code block style={{ fontSize: 11 }}>
+                    {JSON.stringify(result.parsed_response, null, 2)}
+                  </Code>
+                </ScrollArea>
+              </Accordion.Panel>
+            </Accordion.Item>
+          </Accordion>
+        )}
+      </Stack>
+    </Card>
+  );
+}
+
+// ── Health Score Display ──
+
+function HealthScoreDisplay({ data, allParsed }: { data: any; allParsed: any[] }) {
+  const score = data.score ?? data.raw_text;
+  const label = data.label || '';
+  const summary = data.summary || '';
+  const factors = data.factors || [];
+  const recommendations = data.recommendations || [];
+
+  // Check if score differs from other models
+  const scores = allParsed.map((p) => p.score).filter((s) => typeof s === 'number');
+  const scoreDiffers = scores.length > 1 && !scores.every((s) => s === scores[0]);
+
+  const labelColor: Record<string, string> = {
+    Excellent: 'green', Good: 'teal', Fair: 'yellow',
+    'Needs Attention': 'orange', 'At Risk': 'red', Critical: 'red',
+  };
+
+  return (
+    <Stack gap="sm">
+      {typeof score === 'number' && (
+        <Group justify="center">
+          <Box bg={scoreDiffers ? 'yellow.0' : undefined} p="xs" style={{ borderRadius: 8 }}>
+            <RingProgress
+              size={100}
+              thickness={10}
+              roundCaps
+              sections={[{ value: score, color: labelColor[label] || 'blue' }]}
+              label={
+                <Text ta="center" fw={700} size="lg">{score}</Text>
+              }
+            />
+          </Box>
+        </Group>
+      )}
+
+      {label && (
+        <Group justify="center">
+          <Badge color={labelColor[label] || 'gray'} size="lg">{label}</Badge>
+        </Group>
+      )}
+
+      {summary && <Text size="sm">{summary}</Text>}
+
+      {factors.length > 0 && (
+        <>
+          <Text size="xs" fw={600} c="dimmed" tt="uppercase">Factors</Text>
+          {factors.map((f: any, i: number) => (
+            <Group key={i} gap="xs" wrap="nowrap">
+              <Badge
+                size="xs"
+                variant="light"
+                color={f.impact === 'positive' ? 'green' : f.impact === 'negative' ? 'red' : 'gray'}
+              >
+                {f.impact}
+              </Badge>
+              <Text size="xs" style={{ flex: 1 }}><b>{f.name}:</b> {f.detail}</Text>
+            </Group>
+          ))}
+        </>
+      )}
+
+      {recommendations.length > 0 && (
+        <>
+          <Text size="xs" fw={600} c="dimmed" tt="uppercase">Recommendations</Text>
+          {recommendations.map((r: any, i: number) => (
+            <Group key={i} gap="xs" wrap="nowrap">
+              <Badge
+                size="xs"
+                variant="light"
+                color={r.priority === 'high' ? 'red' : r.priority === 'medium' ? 'yellow' : 'blue'}
+              >
+                {r.priority}
+              </Badge>
+              <Text size="xs" style={{ flex: 1 }}>{r.text}</Text>
+            </Group>
+          ))}
+        </>
+      )}
+    </Stack>
+  );
+}
+
+// ── Investment Display ──
+
+function InvestmentDisplay({ data, allParsed }: { data: any; allParsed: any[] }) {
+  const recommendations = data.recommendations || [];
+  const overall = data.overall_assessment || '';
+  const riskNotes = data.risk_notes || [];
+
+  const recCounts = allParsed.map((p) => (p.recommendations || []).length);
+  const countDiffers = recCounts.length > 1 && !recCounts.every((c) => c === recCounts[0]);
+
+  const typeColors: Record<string, string> = {
+    cd_ladder: 'violet', new_investment: 'blue', reallocation: 'teal',
+    maturity_action: 'orange', liquidity_warning: 'red', general: 'gray',
+  };
+
+  return (
+    <Stack gap="sm">
+      {overall && (
+        <Paper p="xs" bg="gray.0" radius="sm">
+          <Text size="sm">{overall}</Text>
+        </Paper>
+      )}
+
+      {recommendations.length > 0 && (
+        <>
+          <Group gap="xs">
+            <Text size="xs" fw={600} c="dimmed" tt="uppercase">
+              Recommendations
+            </Text>
+            <Badge
+              size="xs"
+              variant="light"
+              color={countDiffers ? 'yellow' : 'gray'}
+            >
+              {recommendations.length}
+            </Badge>
+          </Group>
+          {recommendations.map((rec: any, i: number) => (
+            <Card key={i} withBorder padding="xs" radius="sm">
+              <Stack gap={4}>
+                <Group gap="xs">
+                  <Badge size="xs" color={typeColors[rec.type] || 'gray'}>{rec.type}</Badge>
+                  <Badge size="xs" variant="light" color={rec.priority === 'high' ? 'red' : rec.priority === 'medium' ? 'yellow' : 'blue'}>
+                    {rec.priority}
+                  </Badge>
+                  {rec.fund_type && <Badge size="xs" variant="outline">{rec.fund_type}</Badge>}
+                </Group>
+                <Text size="sm" fw={600}>{rec.title}</Text>
+                <Text size="xs">{rec.summary}</Text>
+                {rec.suggested_amount && (
+                  <Text size="xs" c="dimmed">
+                    Amount: ${rec.suggested_amount.toLocaleString()}
+                    {rec.suggested_rate ? ` | Rate: ${rec.suggested_rate}%` : ''}
+                    {rec.suggested_term ? ` | Term: ${rec.suggested_term}` : ''}
+                  </Text>
+                )}
+              </Stack>
+            </Card>
+          ))}
+        </>
+      )}
+
+      {riskNotes.length > 0 && (
+        <>
+          <Text size="xs" fw={600} c="dimmed" tt="uppercase">Risk Notes</Text>
+          {riskNotes.map((note: string, i: number) => (
+            <Group key={i} gap="xs" wrap="nowrap">
+              <IconAlertTriangle size={14} color="orange" />
+              <Text size="xs">{note}</Text>
+            </Group>
+          ))}
+        </>
+      )}
+    </Stack>
+  );
+}
+
+// ── Main Page ──
+
+export function AdminShadowAiPage() {
+  return (
+    <Stack gap="lg" p="md">
+      <Group>
+        <IconScale size={28} />
+        <Title order={2}>AI Benchmarking</Title>
+      </Group>
+      <Text c="dimmed" size="sm">
+        Compare AI model outputs side-by-side using real tenant data.
+        Configure alternate models, run shadow comparisons, and review historical results.
+      </Text>
+
+      <Tabs defaultValue="run">
+        <Tabs.List>
+          <Tabs.Tab value="config" leftSection={<IconSettings size={16} />}>
+            Model Configuration
+          </Tabs.Tab>
+          <Tabs.Tab value="run" leftSection={<IconPlayerPlay size={16} />}>
+            Run Comparison
+          </Tabs.Tab>
+          <Tabs.Tab value="history" leftSection={<IconHistory size={16} />}>
+            History
+          </Tabs.Tab>
+        </Tabs.List>
+
+        <Tabs.Panel value="config" pt="md">
+          <ModelConfigTab />
+        </Tabs.Panel>
+
+        <Tabs.Panel value="run" pt="md">
+          <RunComparisonTab />
+        </Tabs.Panel>
+
+        <Tabs.Panel value="history" pt="md">
+          <HistoryTab />
+        </Tabs.Panel>
+      </Tabs>
+    </Stack>
+  );
+}