feat: add shadow AI benchmarking for admin model comparison

Add a new admin-only feature that allows the platform owner to benchmark
the production AI model against up to 2 alternate models (any OpenAI-compatible
API) using real tenant data, without impacting users.

Backend:
- Shared AI caller utility (ai-caller.ts) for OpenAI-compatible endpoints
- Shadow AI module with service, controller, and 3 entities
- 6 admin API endpoints for model config CRUD, run trigger, and history
- Auto-creates shadow_ai_models, shadow_runs, shadow_run_results tables
- Exposes health-scores and investment-planning prompt builders for reuse

Frontend:
- New admin page at /admin/shadow-ai with 3 tabs:
  - Model Configuration (production + 2 alternate slots)
  - Run Comparison (tenant select, feature select, side-by-side results)
  - History (filterable run log with detail drill-down)
- Full side-by-side output display with diff highlighting
- Sidebar navigation link for AI Benchmarking

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
JoeBot
2026-04-05 07:16:38 -04:00
parent 629d112850
commit 4797669591
16 changed files with 2461 additions and 6 deletions

View File

@@ -30,6 +30,7 @@ import { UserPreferencesPage } from './pages/preferences/UserPreferencesPage';
import { OrgMembersPage } from './pages/org-members/OrgMembersPage';
import { AdminPage } from './pages/admin/AdminPage';
import { AdminIdeasPage } from './pages/admin/AdminIdeasPage';
import { AdminShadowAiPage } from './pages/admin/AdminShadowAiPage';
import { AssessmentGroupsPage } from './pages/assessment-groups/AssessmentGroupsPage';
import { CashFlowForecastPage } from './pages/cash-flow/CashFlowForecastPage';
import { MonthlyActualsPage } from './pages/monthly-actuals/MonthlyActualsPage';
@@ -135,6 +136,7 @@ export function App() {
>
<Route index element={<AdminPage />} />
<Route path="ideas" element={<AdminIdeasPage />} />
<Route path="shadow-ai" element={<AdminShadowAiPage />} />
</Route>
{/* Main app routes (require auth + org) */}

View File

@@ -140,6 +140,13 @@ export function Sidebar({ onNavigate }: SidebarProps) {
onClick={() => go('/admin/ideas')}
color="yellow"
/>
<NavLink
label="AI Benchmarking"
leftSection={<IconScale size={18} />}
active={location.pathname === '/admin/shadow-ai'}
onClick={() => go('/admin/shadow-ai')}
color="violet"
/>
{organizations && organizations.length > 0 && (
<>
<Divider my="sm" />
@@ -245,6 +252,13 @@ export function Sidebar({ onNavigate }: SidebarProps) {
onClick={() => go('/admin/ideas')}
color="yellow"
/>
<NavLink
label="AI Benchmarking"
leftSection={<IconScale size={18} />}
active={location.pathname === '/admin/shadow-ai'}
onClick={() => go('/admin/shadow-ai')}
color="violet"
/>
</>
)}
</ScrollArea>

View File

@@ -0,0 +1,780 @@
import { useState, useEffect } from 'react';
import {
Title, Text, Card, SimpleGrid, Group, Stack, Badge, Loader, Center,
Tabs, TextInput, Button, PasswordInput, Select, Table, Accordion,
Switch, Paper, RingProgress, Divider, Alert, Code, ScrollArea, Box,
Tooltip, ActionIcon,
} from '@mantine/core';
import {
IconScale, IconSettings, IconPlayerPlay, IconHistory,
IconCheck, IconX, IconAlertTriangle, IconClock, IconTrash,
IconRefresh, IconArrowRight, IconChevronDown,
} from '@tabler/icons-react';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import api from '../../services/api';
// ── Interfaces ──
interface ShadowModel {
id: string;
slot: string;
name: string;
api_url: string;
api_key: string;
model_name: string;
is_active: boolean;
created_at: string;
updated_at: string;
}
interface ShadowRunResult {
id: string;
run_id: string;
model_role: string;
model_name: string;
api_url: string;
raw_response: string;
parsed_response: any;
response_time_ms: number;
token_usage: any;
status: string;
error_message: string;
created_at: string;
}
interface ShadowRun {
id: string;
tenant_id: string;
tenant_name: string;
feature: string;
status: string;
prompt_messages: any;
started_at: string;
completed_at: string;
created_at: string;
results: ShadowRunResult[];
result_count?: string;
success_count?: string;
}
interface AdminOrg {
id: string;
name: string;
status: string;
}
// ── Helper Functions ──
const featureLabels: Record<string, string> = {
operating_health: 'Operating Health',
reserve_health: 'Reserve Health',
investment_recommendations: 'Investment Recommendations',
};
const roleLabels: Record<string, string> = {
production: 'Production',
alternate_a: 'Alternate A',
alternate_b: 'Alternate B',
};
const statusColor: Record<string, string> = {
running: 'blue',
completed: 'green',
partial: 'yellow',
failed: 'red',
pending: 'gray',
success: 'green',
error: 'red',
};
function formatDuration(ms: number | null): string {
if (!ms) return '-';
if (ms < 1000) return `${ms}ms`;
return `${(ms / 1000).toFixed(1)}s`;
}
function formatDate(d: string): string {
if (!d) return '-';
return new Date(d).toLocaleString();
}
// ── Model Configuration Tab ──
function ModelConfigTab() {
const queryClient = useQueryClient();
const { data: models, isLoading } = useQuery<ShadowModel[]>({
queryKey: ['shadow-ai-models'],
queryFn: () => api.get('/admin/shadow-ai/models').then((r) => r.data),
});
const modelA = models?.find((m) => m.slot === 'A');
const modelB = models?.find((m) => m.slot === 'B');
return (
<Stack>
<Text size="sm" c="dimmed">
Configure alternate AI models to benchmark against the production model.
Each model can use any OpenAI-compatible API endpoint.
</Text>
<SimpleGrid cols={{ base: 1, md: 3 }}>
<ProductionModelCard />
<ModelSlotCard slot="A" model={modelA} isLoading={isLoading} />
<ModelSlotCard slot="B" model={modelB} isLoading={isLoading} />
</SimpleGrid>
</Stack>
);
}
function ProductionModelCard() {
return (
<Card withBorder shadow="sm">
<Stack gap="sm">
<Group justify="space-between">
<Text fw={600}>Production Model</Text>
<Badge color="green" variant="light">Active</Badge>
</Group>
<Divider />
<Text size="sm" c="dimmed">Configured via environment variables</Text>
<TextInput label="Model" value="(from AI_MODEL env var)" readOnly disabled size="sm" />
<TextInput label="API URL" value="(from AI_API_URL env var)" readOnly disabled size="sm" />
<Text size="xs" c="dimmed" mt="xs">
Production model settings are managed through server environment
variables and cannot be changed from the UI.
</Text>
</Stack>
</Card>
);
}
function ModelSlotCard({ slot, model, isLoading }: { slot: string; model?: ShadowModel; isLoading: boolean }) {
const queryClient = useQueryClient();
const [name, setName] = useState('');
const [apiUrl, setApiUrl] = useState('');
const [apiKey, setApiKey] = useState('');
const [modelName, setModelName] = useState('');
const [isActive, setIsActive] = useState(true);
useEffect(() => {
if (model) {
setName(model.name);
setApiUrl(model.api_url);
setApiKey(model.api_key);
setModelName(model.model_name);
setIsActive(model.is_active);
}
}, [model]);
const saveMutation = useMutation({
mutationFn: () => api.put(`/admin/shadow-ai/models/${slot}`, { name, apiUrl, apiKey, modelName, isActive }),
onSuccess: () => queryClient.invalidateQueries({ queryKey: ['shadow-ai-models'] }),
});
const deleteMutation = useMutation({
mutationFn: () => api.delete(`/admin/shadow-ai/models/${slot}`),
onSuccess: () => {
setName(''); setApiUrl(''); setApiKey(''); setModelName(''); setIsActive(true);
queryClient.invalidateQueries({ queryKey: ['shadow-ai-models'] });
},
});
if (isLoading) return <Card withBorder shadow="sm"><Center h={200}><Loader size="sm" /></Center></Card>;
return (
<Card withBorder shadow="sm">
<Stack gap="sm">
<Group justify="space-between">
<Text fw={600}>Alternate {slot}</Text>
{model ? (
<Badge color={isActive ? 'blue' : 'gray'} variant="light">
{isActive ? 'Active' : 'Inactive'}
</Badge>
) : (
<Badge color="gray" variant="light">Not configured</Badge>
)}
</Group>
<Divider />
<TextInput label="Display Name" placeholder="e.g. GPT-4o" value={name} onChange={(e) => setName(e.target.value)} size="sm" />
<TextInput label="API URL" placeholder="https://api.openai.com/v1" value={apiUrl} onChange={(e) => setApiUrl(e.target.value)} size="sm" />
<PasswordInput label="API Key" placeholder="sk-..." value={apiKey} onChange={(e) => setApiKey(e.target.value)} size="sm" />
<TextInput label="Model Name" placeholder="gpt-4o" value={modelName} onChange={(e) => setModelName(e.target.value)} size="sm" />
<Switch label="Active" checked={isActive} onChange={(e) => setIsActive(e.currentTarget.checked)} />
<Group>
<Button
size="sm"
onClick={() => saveMutation.mutate()}
loading={saveMutation.isPending}
disabled={!name || !apiUrl || !apiKey || !modelName}
>
Save
</Button>
{model && (
<Button size="sm" color="red" variant="light" onClick={() => deleteMutation.mutate()} loading={deleteMutation.isPending}>
<IconTrash size={16} />
</Button>
)}
</Group>
{saveMutation.isError && <Text size="xs" c="red">Failed to save</Text>}
{saveMutation.isSuccess && <Text size="xs" c="green">Saved</Text>}
</Stack>
</Card>
);
}
// ── Run Comparison Tab ──
function RunComparisonTab() {
const queryClient = useQueryClient();
const [tenantId, setTenantId] = useState<string | null>(null);
const [feature, setFeature] = useState<string | null>(null);
const [activeRunId, setActiveRunId] = useState<string | null>(null);
const { data: orgs } = useQuery<AdminOrg[]>({
queryKey: ['admin-orgs'],
queryFn: () => api.get('/admin/organizations').then((r) => r.data),
});
const triggerMutation = useMutation({
mutationFn: () => api.post('/admin/shadow-ai/runs', { tenantId, feature }),
onSuccess: (res) => {
setActiveRunId(res.data.runId);
},
});
const { data: activeRun } = useQuery<ShadowRun>({
queryKey: ['shadow-ai-run', activeRunId],
queryFn: () => api.get(`/admin/shadow-ai/runs/${activeRunId}`).then((r) => r.data),
enabled: !!activeRunId,
refetchInterval: (query) => {
const run = query.state.data;
return run?.status === 'running' ? 3000 : false;
},
});
const orgOptions = (orgs || [])
.filter((o) => o.status === 'active')
.map((o) => ({ value: o.id, label: o.name }));
const featureOptions = [
{ value: 'operating_health', label: 'Operating Health Score' },
{ value: 'reserve_health', label: 'Reserve Health Score' },
{ value: 'investment_recommendations', label: 'Investment Recommendations' },
];
return (
<Stack>
<Card withBorder shadow="sm">
<Stack gap="md">
<Text fw={600}>Run Shadow Comparison</Text>
<SimpleGrid cols={{ base: 1, sm: 3 }}>
<Select
label="Tenant"
placeholder="Select a tenant"
data={orgOptions}
value={tenantId}
onChange={setTenantId}
searchable
/>
<Select
label="AI Feature"
placeholder="Select feature"
data={featureOptions}
value={feature}
onChange={setFeature}
/>
<Stack justify="flex-end">
<Button
leftSection={<IconPlayerPlay size={16} />}
onClick={() => triggerMutation.mutate()}
loading={triggerMutation.isPending}
disabled={!tenantId || !feature}
>
Run Comparison
</Button>
</Stack>
</SimpleGrid>
{triggerMutation.isError && (
<Alert color="red" icon={<IconAlertTriangle size={16} />}>
Failed to start comparison. Ensure at least one alternate model is configured.
</Alert>
)}
</Stack>
</Card>
{activeRun && (
<Card withBorder shadow="sm">
<Stack gap="md">
<Group justify="space-between">
<Group>
<Text fw={600}>
{featureLabels[activeRun.feature] || activeRun.feature}
</Text>
<Badge color={statusColor[activeRun.status]}>{activeRun.status}</Badge>
</Group>
{activeRun.tenant_name && (
<Text size="sm" c="dimmed">Tenant: {activeRun.tenant_name}</Text>
)}
</Group>
{activeRun.status === 'running' && (
<Center py="md">
<Stack align="center" gap="xs">
<Loader size="md" />
<Text size="sm" c="dimmed">Running models... This may take a few minutes.</Text>
<Group gap="xs">
{(activeRun.results || []).map((r) => (
<Badge key={r.model_role} color={statusColor[r.status]} variant="light">
{roleLabels[r.model_role]}: {r.status}
</Badge>
))}
</Group>
</Stack>
</Center>
)}
{activeRun.status !== 'running' && activeRun.results && (
<ComparisonResults results={activeRun.results} feature={activeRun.feature} />
)}
</Stack>
</Card>
)}
</Stack>
);
}
// ── History Tab ──
function HistoryTab() {
const [selectedRunId, setSelectedRunId] = useState<string | null>(null);
const [tenantFilter, setTenantFilter] = useState<string | null>(null);
const [featureFilter, setFeatureFilter] = useState<string | null>(null);
const { data: orgs } = useQuery<AdminOrg[]>({
queryKey: ['admin-orgs'],
queryFn: () => api.get('/admin/organizations').then((r) => r.data),
});
const { data: historyData, isLoading } = useQuery({
queryKey: ['shadow-ai-runs', tenantFilter, featureFilter],
queryFn: () => {
const params = new URLSearchParams();
if (tenantFilter) params.set('tenantId', tenantFilter);
if (featureFilter) params.set('feature', featureFilter);
params.set('limit', '50');
return api.get(`/admin/shadow-ai/runs?${params}`).then((r) => r.data);
},
});
const { data: selectedRun } = useQuery<ShadowRun>({
queryKey: ['shadow-ai-run', selectedRunId],
queryFn: () => api.get(`/admin/shadow-ai/runs/${selectedRunId}`).then((r) => r.data),
enabled: !!selectedRunId,
});
const orgOptions = [
{ value: '', label: 'All Tenants' },
...(orgs || []).map((o) => ({ value: o.id, label: o.name })),
];
const featureOptions = [
{ value: '', label: 'All Features' },
{ value: 'operating_health', label: 'Operating Health' },
{ value: 'reserve_health', label: 'Reserve Health' },
{ value: 'investment_recommendations', label: 'Investment Recommendations' },
];
const runs: ShadowRun[] = historyData?.runs || [];
return (
<Stack>
<Group>
<Select
size="sm"
placeholder="Filter by tenant"
data={orgOptions}
value={tenantFilter || ''}
onChange={(v) => setTenantFilter(v || null)}
clearable
w={200}
/>
<Select
size="sm"
placeholder="Filter by feature"
data={featureOptions}
value={featureFilter || ''}
onChange={(v) => setFeatureFilter(v || null)}
clearable
w={200}
/>
</Group>
{isLoading ? (
<Center py="xl"><Loader /></Center>
) : runs.length === 0 ? (
<Text c="dimmed" ta="center" py="xl">No shadow runs found.</Text>
) : (
<Table striped highlightOnHover>
<Table.Thead>
<Table.Tr>
<Table.Th>Date</Table.Th>
<Table.Th>Tenant</Table.Th>
<Table.Th>Feature</Table.Th>
<Table.Th>Status</Table.Th>
<Table.Th>Models</Table.Th>
<Table.Th>Duration</Table.Th>
<Table.Th></Table.Th>
</Table.Tr>
</Table.Thead>
<Table.Tbody>
{runs.map((run) => {
const duration = run.completed_at && run.started_at
? new Date(run.completed_at).getTime() - new Date(run.started_at).getTime()
: null;
return (
<Table.Tr
key={run.id}
style={{ cursor: 'pointer' }}
onClick={() => setSelectedRunId(run.id)}
bg={selectedRunId === run.id ? 'var(--mantine-color-blue-light)' : undefined}
>
<Table.Td>{formatDate(run.created_at)}</Table.Td>
<Table.Td>{run.tenant_name || '-'}</Table.Td>
<Table.Td>{featureLabels[run.feature] || run.feature}</Table.Td>
<Table.Td><Badge color={statusColor[run.status]} size="sm">{run.status}</Badge></Table.Td>
<Table.Td>{run.success_count || '0'}/{run.result_count || '0'}</Table.Td>
<Table.Td>{formatDuration(duration)}</Table.Td>
<Table.Td><IconArrowRight size={14} /></Table.Td>
</Table.Tr>
);
})}
</Table.Tbody>
</Table>
)}
{selectedRun && selectedRun.results && (
<Card withBorder shadow="sm" mt="md">
<Stack gap="md">
<Group justify="space-between">
<Group>
<Text fw={600}>{featureLabels[selectedRun.feature] || selectedRun.feature}</Text>
<Badge color={statusColor[selectedRun.status]}>{selectedRun.status}</Badge>
</Group>
<Text size="sm" c="dimmed">
{selectedRun.tenant_name} | {formatDate(selectedRun.created_at)}
</Text>
</Group>
<ComparisonResults results={selectedRun.results} feature={selectedRun.feature} />
</Stack>
</Card>
)}
</Stack>
);
}
// ── Comparison Results Component ──
function ComparisonResults({ results, feature }: { results: ShadowRunResult[]; feature: string }) {
const isHealthScore = feature === 'operating_health' || feature === 'reserve_health';
// Collect all parsed values for diff highlighting
const parsedValues = results
.filter((r) => r.status === 'success' && r.parsed_response)
.map((r) => r.parsed_response);
return (
<SimpleGrid cols={{ base: 1, md: Math.min(results.length, 3) }}>
{results.map((result) => (
<ResultCard
key={result.model_role}
result={result}
isHealthScore={isHealthScore}
allParsed={parsedValues}
/>
))}
</SimpleGrid>
);
}
function ResultCard({
result,
isHealthScore,
allParsed,
}: {
result: ShadowRunResult;
isHealthScore: boolean;
allParsed: any[];
}) {
const roleColor: Record<string, string> = {
production: 'green',
alternate_a: 'blue',
alternate_b: 'violet',
};
return (
<Card withBorder shadow="xs" padding="md">
<Stack gap="sm">
<Group justify="space-between">
<Group gap="xs">
<Badge color={roleColor[result.model_role] || 'gray'} variant="filled">
{roleLabels[result.model_role]}
</Badge>
</Group>
<Badge
color={statusColor[result.status]}
variant="light"
leftSection={result.status === 'success' ? <IconCheck size={12} /> : result.status === 'error' ? <IconX size={12} /> : <IconClock size={12} />}
>
{result.status}
</Badge>
</Group>
<Text size="xs" c="dimmed" truncate>{result.model_name}</Text>
{result.response_time_ms && (
<Badge color="gray" variant="light" size="sm">
{formatDuration(result.response_time_ms)}
</Badge>
)}
{result.token_usage && (
<Text size="xs" c="dimmed">
Tokens: {result.token_usage.prompt_tokens || '?'} prompt / {result.token_usage.completion_tokens || '?'} completion
</Text>
)}
<Divider />
{result.status === 'error' && (
<Alert color="red" icon={<IconAlertTriangle size={16} />}>
<Text size="sm">{result.error_message || 'Unknown error'}</Text>
</Alert>
)}
{result.status === 'success' && result.parsed_response && (
isHealthScore
? <HealthScoreDisplay data={result.parsed_response} allParsed={allParsed} />
: <InvestmentDisplay data={result.parsed_response} allParsed={allParsed} />
)}
{result.status === 'success' && (
<Accordion variant="contained">
<Accordion.Item value="raw">
<Accordion.Control>
<Text size="xs">Raw JSON Response</Text>
</Accordion.Control>
<Accordion.Panel>
<ScrollArea h={300}>
<Code block style={{ fontSize: 11 }}>
{JSON.stringify(result.parsed_response, null, 2)}
</Code>
</ScrollArea>
</Accordion.Panel>
</Accordion.Item>
</Accordion>
)}
</Stack>
</Card>
);
}
// ── Health Score Display ──
function HealthScoreDisplay({ data, allParsed }: { data: any; allParsed: any[] }) {
const score = data.score ?? data.raw_text;
const label = data.label || '';
const summary = data.summary || '';
const factors = data.factors || [];
const recommendations = data.recommendations || [];
// Check if score differs from other models
const scores = allParsed.map((p) => p.score).filter((s) => typeof s === 'number');
const scoreDiffers = scores.length > 1 && !scores.every((s) => s === scores[0]);
const labelColor: Record<string, string> = {
Excellent: 'green', Good: 'teal', Fair: 'yellow',
'Needs Attention': 'orange', 'At Risk': 'red', Critical: 'red',
};
return (
<Stack gap="sm">
{typeof score === 'number' && (
<Group justify="center">
<Box bg={scoreDiffers ? 'yellow.0' : undefined} p="xs" style={{ borderRadius: 8 }}>
<RingProgress
size={100}
thickness={10}
roundCaps
sections={[{ value: score, color: labelColor[label] || 'blue' }]}
label={
<Text ta="center" fw={700} size="lg">{score}</Text>
}
/>
</Box>
</Group>
)}
{label && (
<Group justify="center">
<Badge color={labelColor[label] || 'gray'} size="lg">{label}</Badge>
</Group>
)}
{summary && <Text size="sm">{summary}</Text>}
{factors.length > 0 && (
<>
<Text size="xs" fw={600} c="dimmed" tt="uppercase">Factors</Text>
{factors.map((f: any, i: number) => (
<Group key={i} gap="xs" wrap="nowrap">
<Badge
size="xs"
variant="light"
color={f.impact === 'positive' ? 'green' : f.impact === 'negative' ? 'red' : 'gray'}
>
{f.impact}
</Badge>
<Text size="xs" style={{ flex: 1 }}><b>{f.name}:</b> {f.detail}</Text>
</Group>
))}
</>
)}
{recommendations.length > 0 && (
<>
<Text size="xs" fw={600} c="dimmed" tt="uppercase">Recommendations</Text>
{recommendations.map((r: any, i: number) => (
<Group key={i} gap="xs" wrap="nowrap">
<Badge
size="xs"
variant="light"
color={r.priority === 'high' ? 'red' : r.priority === 'medium' ? 'yellow' : 'blue'}
>
{r.priority}
</Badge>
<Text size="xs" style={{ flex: 1 }}>{r.text}</Text>
</Group>
))}
</>
)}
</Stack>
);
}
// ── Investment Display ──
function InvestmentDisplay({ data, allParsed }: { data: any; allParsed: any[] }) {
const recommendations = data.recommendations || [];
const overall = data.overall_assessment || '';
const riskNotes = data.risk_notes || [];
const recCounts = allParsed.map((p) => (p.recommendations || []).length);
const countDiffers = recCounts.length > 1 && !recCounts.every((c) => c === recCounts[0]);
const typeColors: Record<string, string> = {
cd_ladder: 'violet', new_investment: 'blue', reallocation: 'teal',
maturity_action: 'orange', liquidity_warning: 'red', general: 'gray',
};
return (
<Stack gap="sm">
{overall && (
<Paper p="xs" bg="gray.0" radius="sm">
<Text size="sm">{overall}</Text>
</Paper>
)}
{recommendations.length > 0 && (
<>
<Group gap="xs">
<Text size="xs" fw={600} c="dimmed" tt="uppercase">
Recommendations
</Text>
<Badge
size="xs"
variant="light"
color={countDiffers ? 'yellow' : 'gray'}
>
{recommendations.length}
</Badge>
</Group>
{recommendations.map((rec: any, i: number) => (
<Card key={i} withBorder padding="xs" radius="sm">
<Stack gap={4}>
<Group gap="xs">
<Badge size="xs" color={typeColors[rec.type] || 'gray'}>{rec.type}</Badge>
<Badge size="xs" variant="light" color={rec.priority === 'high' ? 'red' : rec.priority === 'medium' ? 'yellow' : 'blue'}>
{rec.priority}
</Badge>
{rec.fund_type && <Badge size="xs" variant="outline">{rec.fund_type}</Badge>}
</Group>
<Text size="sm" fw={600}>{rec.title}</Text>
<Text size="xs">{rec.summary}</Text>
{rec.suggested_amount && (
<Text size="xs" c="dimmed">
Amount: ${rec.suggested_amount.toLocaleString()}
{rec.suggested_rate ? ` | Rate: ${rec.suggested_rate}%` : ''}
{rec.suggested_term ? ` | Term: ${rec.suggested_term}` : ''}
</Text>
)}
</Stack>
</Card>
))}
</>
)}
{riskNotes.length > 0 && (
<>
<Text size="xs" fw={600} c="dimmed" tt="uppercase">Risk Notes</Text>
{riskNotes.map((note: string, i: number) => (
<Group key={i} gap="xs" wrap="nowrap">
<IconAlertTriangle size={14} color="orange" />
<Text size="xs">{note}</Text>
</Group>
))}
</>
)}
</Stack>
);
}
// ── Main Page ──
export function AdminShadowAiPage() {
return (
<Stack gap="lg" p="md">
<Group>
<IconScale size={28} />
<Title order={2}>AI Benchmarking</Title>
</Group>
<Text c="dimmed" size="sm">
Compare AI model outputs side-by-side using real tenant data.
Configure alternate models, run shadow comparisons, and review historical results.
</Text>
<Tabs defaultValue="run">
<Tabs.List>
<Tabs.Tab value="config" leftSection={<IconSettings size={16} />}>
Model Configuration
</Tabs.Tab>
<Tabs.Tab value="run" leftSection={<IconPlayerPlay size={16} />}>
Run Comparison
</Tabs.Tab>
<Tabs.Tab value="history" leftSection={<IconHistory size={16} />}>
History
</Tabs.Tab>
</Tabs.List>
<Tabs.Panel value="config" pt="md">
<ModelConfigTab />
</Tabs.Panel>
<Tabs.Panel value="run" pt="md">
<RunComparisonTab />
</Tabs.Panel>
<Tabs.Panel value="history" pt="md">
<HistoryTab />
</Tabs.Panel>
</Tabs>
</Stack>
);
}