feat: add shadow AI benchmarking for admin model comparison
Add a new admin-only feature that allows the platform owner to benchmark the production AI model against up to 2 alternate models (any OpenAI-compatible API) using real tenant data, without impacting users. Backend: - Shared AI caller utility (ai-caller.ts) for OpenAI-compatible endpoints - Shadow AI module with service, controller, and 3 entities - 6 admin API endpoints for model config CRUD, run trigger, and history - Auto-creates shadow_ai_models, shadow_runs, shadow_run_results tables - Exposes health-scores and investment-planning prompt builders for reuse Frontend: - New admin page at /admin/shadow-ai with 3 tabs: - Model Configuration (production + 2 alternate slots) - Run Comparison (tenant select, feature select, side-by-side results) - History (filterable run log with detail drill-down) - Full side-by-side output display with diff highlighting - Sidebar navigation link for AI Benchmarking Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -30,6 +30,7 @@ import { UserPreferencesPage } from './pages/preferences/UserPreferencesPage';
|
||||
import { OrgMembersPage } from './pages/org-members/OrgMembersPage';
|
||||
import { AdminPage } from './pages/admin/AdminPage';
|
||||
import { AdminIdeasPage } from './pages/admin/AdminIdeasPage';
|
||||
import { AdminShadowAiPage } from './pages/admin/AdminShadowAiPage';
|
||||
import { AssessmentGroupsPage } from './pages/assessment-groups/AssessmentGroupsPage';
|
||||
import { CashFlowForecastPage } from './pages/cash-flow/CashFlowForecastPage';
|
||||
import { MonthlyActualsPage } from './pages/monthly-actuals/MonthlyActualsPage';
|
||||
@@ -135,6 +136,7 @@ export function App() {
|
||||
>
|
||||
<Route index element={<AdminPage />} />
|
||||
<Route path="ideas" element={<AdminIdeasPage />} />
|
||||
<Route path="shadow-ai" element={<AdminShadowAiPage />} />
|
||||
</Route>
|
||||
|
||||
{/* Main app routes (require auth + org) */}
|
||||
|
||||
@@ -140,6 +140,13 @@ export function Sidebar({ onNavigate }: SidebarProps) {
|
||||
onClick={() => go('/admin/ideas')}
|
||||
color="yellow"
|
||||
/>
|
||||
<NavLink
|
||||
label="AI Benchmarking"
|
||||
leftSection={<IconScale size={18} />}
|
||||
active={location.pathname === '/admin/shadow-ai'}
|
||||
onClick={() => go('/admin/shadow-ai')}
|
||||
color="violet"
|
||||
/>
|
||||
{organizations && organizations.length > 0 && (
|
||||
<>
|
||||
<Divider my="sm" />
|
||||
@@ -245,6 +252,13 @@ export function Sidebar({ onNavigate }: SidebarProps) {
|
||||
onClick={() => go('/admin/ideas')}
|
||||
color="yellow"
|
||||
/>
|
||||
<NavLink
|
||||
label="AI Benchmarking"
|
||||
leftSection={<IconScale size={18} />}
|
||||
active={location.pathname === '/admin/shadow-ai'}
|
||||
onClick={() => go('/admin/shadow-ai')}
|
||||
color="violet"
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</ScrollArea>
|
||||
|
||||
780
frontend/src/pages/admin/AdminShadowAiPage.tsx
Normal file
780
frontend/src/pages/admin/AdminShadowAiPage.tsx
Normal file
@@ -0,0 +1,780 @@
|
||||
import { useState, useEffect } from 'react';
|
||||
import {
|
||||
Title, Text, Card, SimpleGrid, Group, Stack, Badge, Loader, Center,
|
||||
Tabs, TextInput, Button, PasswordInput, Select, Table, Accordion,
|
||||
Switch, Paper, RingProgress, Divider, Alert, Code, ScrollArea, Box,
|
||||
Tooltip, ActionIcon,
|
||||
} from '@mantine/core';
|
||||
import {
|
||||
IconScale, IconSettings, IconPlayerPlay, IconHistory,
|
||||
IconCheck, IconX, IconAlertTriangle, IconClock, IconTrash,
|
||||
IconRefresh, IconArrowRight, IconChevronDown,
|
||||
} from '@tabler/icons-react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import api from '../../services/api';
|
||||
|
||||
// ── Interfaces ──
|
||||
|
||||
interface ShadowModel {
|
||||
id: string;
|
||||
slot: string;
|
||||
name: string;
|
||||
api_url: string;
|
||||
api_key: string;
|
||||
model_name: string;
|
||||
is_active: boolean;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
interface ShadowRunResult {
|
||||
id: string;
|
||||
run_id: string;
|
||||
model_role: string;
|
||||
model_name: string;
|
||||
api_url: string;
|
||||
raw_response: string;
|
||||
parsed_response: any;
|
||||
response_time_ms: number;
|
||||
token_usage: any;
|
||||
status: string;
|
||||
error_message: string;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
interface ShadowRun {
|
||||
id: string;
|
||||
tenant_id: string;
|
||||
tenant_name: string;
|
||||
feature: string;
|
||||
status: string;
|
||||
prompt_messages: any;
|
||||
started_at: string;
|
||||
completed_at: string;
|
||||
created_at: string;
|
||||
results: ShadowRunResult[];
|
||||
result_count?: string;
|
||||
success_count?: string;
|
||||
}
|
||||
|
||||
interface AdminOrg {
|
||||
id: string;
|
||||
name: string;
|
||||
status: string;
|
||||
}
|
||||
|
||||
// ── Helper Functions ──
|
||||
|
||||
const featureLabels: Record<string, string> = {
|
||||
operating_health: 'Operating Health',
|
||||
reserve_health: 'Reserve Health',
|
||||
investment_recommendations: 'Investment Recommendations',
|
||||
};
|
||||
|
||||
const roleLabels: Record<string, string> = {
|
||||
production: 'Production',
|
||||
alternate_a: 'Alternate A',
|
||||
alternate_b: 'Alternate B',
|
||||
};
|
||||
|
||||
const statusColor: Record<string, string> = {
|
||||
running: 'blue',
|
||||
completed: 'green',
|
||||
partial: 'yellow',
|
||||
failed: 'red',
|
||||
pending: 'gray',
|
||||
success: 'green',
|
||||
error: 'red',
|
||||
};
|
||||
|
||||
function formatDuration(ms: number | null): string {
|
||||
if (!ms) return '-';
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
return `${(ms / 1000).toFixed(1)}s`;
|
||||
}
|
||||
|
||||
function formatDate(d: string): string {
|
||||
if (!d) return '-';
|
||||
return new Date(d).toLocaleString();
|
||||
}
|
||||
|
||||
// ── Model Configuration Tab ──
|
||||
|
||||
function ModelConfigTab() {
|
||||
const queryClient = useQueryClient();
|
||||
const { data: models, isLoading } = useQuery<ShadowModel[]>({
|
||||
queryKey: ['shadow-ai-models'],
|
||||
queryFn: () => api.get('/admin/shadow-ai/models').then((r) => r.data),
|
||||
});
|
||||
|
||||
const modelA = models?.find((m) => m.slot === 'A');
|
||||
const modelB = models?.find((m) => m.slot === 'B');
|
||||
|
||||
return (
|
||||
<Stack>
|
||||
<Text size="sm" c="dimmed">
|
||||
Configure alternate AI models to benchmark against the production model.
|
||||
Each model can use any OpenAI-compatible API endpoint.
|
||||
</Text>
|
||||
<SimpleGrid cols={{ base: 1, md: 3 }}>
|
||||
<ProductionModelCard />
|
||||
<ModelSlotCard slot="A" model={modelA} isLoading={isLoading} />
|
||||
<ModelSlotCard slot="B" model={modelB} isLoading={isLoading} />
|
||||
</SimpleGrid>
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
|
||||
function ProductionModelCard() {
|
||||
return (
|
||||
<Card withBorder shadow="sm">
|
||||
<Stack gap="sm">
|
||||
<Group justify="space-between">
|
||||
<Text fw={600}>Production Model</Text>
|
||||
<Badge color="green" variant="light">Active</Badge>
|
||||
</Group>
|
||||
<Divider />
|
||||
<Text size="sm" c="dimmed">Configured via environment variables</Text>
|
||||
<TextInput label="Model" value="(from AI_MODEL env var)" readOnly disabled size="sm" />
|
||||
<TextInput label="API URL" value="(from AI_API_URL env var)" readOnly disabled size="sm" />
|
||||
<Text size="xs" c="dimmed" mt="xs">
|
||||
Production model settings are managed through server environment
|
||||
variables and cannot be changed from the UI.
|
||||
</Text>
|
||||
</Stack>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
function ModelSlotCard({ slot, model, isLoading }: { slot: string; model?: ShadowModel; isLoading: boolean }) {
|
||||
const queryClient = useQueryClient();
|
||||
const [name, setName] = useState('');
|
||||
const [apiUrl, setApiUrl] = useState('');
|
||||
const [apiKey, setApiKey] = useState('');
|
||||
const [modelName, setModelName] = useState('');
|
||||
const [isActive, setIsActive] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
if (model) {
|
||||
setName(model.name);
|
||||
setApiUrl(model.api_url);
|
||||
setApiKey(model.api_key);
|
||||
setModelName(model.model_name);
|
||||
setIsActive(model.is_active);
|
||||
}
|
||||
}, [model]);
|
||||
|
||||
const saveMutation = useMutation({
|
||||
mutationFn: () => api.put(`/admin/shadow-ai/models/${slot}`, { name, apiUrl, apiKey, modelName, isActive }),
|
||||
onSuccess: () => queryClient.invalidateQueries({ queryKey: ['shadow-ai-models'] }),
|
||||
});
|
||||
|
||||
const deleteMutation = useMutation({
|
||||
mutationFn: () => api.delete(`/admin/shadow-ai/models/${slot}`),
|
||||
onSuccess: () => {
|
||||
setName(''); setApiUrl(''); setApiKey(''); setModelName(''); setIsActive(true);
|
||||
queryClient.invalidateQueries({ queryKey: ['shadow-ai-models'] });
|
||||
},
|
||||
});
|
||||
|
||||
if (isLoading) return <Card withBorder shadow="sm"><Center h={200}><Loader size="sm" /></Center></Card>;
|
||||
|
||||
return (
|
||||
<Card withBorder shadow="sm">
|
||||
<Stack gap="sm">
|
||||
<Group justify="space-between">
|
||||
<Text fw={600}>Alternate {slot}</Text>
|
||||
{model ? (
|
||||
<Badge color={isActive ? 'blue' : 'gray'} variant="light">
|
||||
{isActive ? 'Active' : 'Inactive'}
|
||||
</Badge>
|
||||
) : (
|
||||
<Badge color="gray" variant="light">Not configured</Badge>
|
||||
)}
|
||||
</Group>
|
||||
<Divider />
|
||||
<TextInput label="Display Name" placeholder="e.g. GPT-4o" value={name} onChange={(e) => setName(e.target.value)} size="sm" />
|
||||
<TextInput label="API URL" placeholder="https://api.openai.com/v1" value={apiUrl} onChange={(e) => setApiUrl(e.target.value)} size="sm" />
|
||||
<PasswordInput label="API Key" placeholder="sk-..." value={apiKey} onChange={(e) => setApiKey(e.target.value)} size="sm" />
|
||||
<TextInput label="Model Name" placeholder="gpt-4o" value={modelName} onChange={(e) => setModelName(e.target.value)} size="sm" />
|
||||
<Switch label="Active" checked={isActive} onChange={(e) => setIsActive(e.currentTarget.checked)} />
|
||||
<Group>
|
||||
<Button
|
||||
size="sm"
|
||||
onClick={() => saveMutation.mutate()}
|
||||
loading={saveMutation.isPending}
|
||||
disabled={!name || !apiUrl || !apiKey || !modelName}
|
||||
>
|
||||
Save
|
||||
</Button>
|
||||
{model && (
|
||||
<Button size="sm" color="red" variant="light" onClick={() => deleteMutation.mutate()} loading={deleteMutation.isPending}>
|
||||
<IconTrash size={16} />
|
||||
</Button>
|
||||
)}
|
||||
</Group>
|
||||
{saveMutation.isError && <Text size="xs" c="red">Failed to save</Text>}
|
||||
{saveMutation.isSuccess && <Text size="xs" c="green">Saved</Text>}
|
||||
</Stack>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Run Comparison Tab ──
|
||||
|
||||
function RunComparisonTab() {
|
||||
const queryClient = useQueryClient();
|
||||
const [tenantId, setTenantId] = useState<string | null>(null);
|
||||
const [feature, setFeature] = useState<string | null>(null);
|
||||
const [activeRunId, setActiveRunId] = useState<string | null>(null);
|
||||
|
||||
const { data: orgs } = useQuery<AdminOrg[]>({
|
||||
queryKey: ['admin-orgs'],
|
||||
queryFn: () => api.get('/admin/organizations').then((r) => r.data),
|
||||
});
|
||||
|
||||
const triggerMutation = useMutation({
|
||||
mutationFn: () => api.post('/admin/shadow-ai/runs', { tenantId, feature }),
|
||||
onSuccess: (res) => {
|
||||
setActiveRunId(res.data.runId);
|
||||
},
|
||||
});
|
||||
|
||||
const { data: activeRun } = useQuery<ShadowRun>({
|
||||
queryKey: ['shadow-ai-run', activeRunId],
|
||||
queryFn: () => api.get(`/admin/shadow-ai/runs/${activeRunId}`).then((r) => r.data),
|
||||
enabled: !!activeRunId,
|
||||
refetchInterval: (query) => {
|
||||
const run = query.state.data;
|
||||
return run?.status === 'running' ? 3000 : false;
|
||||
},
|
||||
});
|
||||
|
||||
const orgOptions = (orgs || [])
|
||||
.filter((o) => o.status === 'active')
|
||||
.map((o) => ({ value: o.id, label: o.name }));
|
||||
|
||||
const featureOptions = [
|
||||
{ value: 'operating_health', label: 'Operating Health Score' },
|
||||
{ value: 'reserve_health', label: 'Reserve Health Score' },
|
||||
{ value: 'investment_recommendations', label: 'Investment Recommendations' },
|
||||
];
|
||||
|
||||
return (
|
||||
<Stack>
|
||||
<Card withBorder shadow="sm">
|
||||
<Stack gap="md">
|
||||
<Text fw={600}>Run Shadow Comparison</Text>
|
||||
<SimpleGrid cols={{ base: 1, sm: 3 }}>
|
||||
<Select
|
||||
label="Tenant"
|
||||
placeholder="Select a tenant"
|
||||
data={orgOptions}
|
||||
value={tenantId}
|
||||
onChange={setTenantId}
|
||||
searchable
|
||||
/>
|
||||
<Select
|
||||
label="AI Feature"
|
||||
placeholder="Select feature"
|
||||
data={featureOptions}
|
||||
value={feature}
|
||||
onChange={setFeature}
|
||||
/>
|
||||
<Stack justify="flex-end">
|
||||
<Button
|
||||
leftSection={<IconPlayerPlay size={16} />}
|
||||
onClick={() => triggerMutation.mutate()}
|
||||
loading={triggerMutation.isPending}
|
||||
disabled={!tenantId || !feature}
|
||||
>
|
||||
Run Comparison
|
||||
</Button>
|
||||
</Stack>
|
||||
</SimpleGrid>
|
||||
{triggerMutation.isError && (
|
||||
<Alert color="red" icon={<IconAlertTriangle size={16} />}>
|
||||
Failed to start comparison. Ensure at least one alternate model is configured.
|
||||
</Alert>
|
||||
)}
|
||||
</Stack>
|
||||
</Card>
|
||||
|
||||
{activeRun && (
|
||||
<Card withBorder shadow="sm">
|
||||
<Stack gap="md">
|
||||
<Group justify="space-between">
|
||||
<Group>
|
||||
<Text fw={600}>
|
||||
{featureLabels[activeRun.feature] || activeRun.feature}
|
||||
</Text>
|
||||
<Badge color={statusColor[activeRun.status]}>{activeRun.status}</Badge>
|
||||
</Group>
|
||||
{activeRun.tenant_name && (
|
||||
<Text size="sm" c="dimmed">Tenant: {activeRun.tenant_name}</Text>
|
||||
)}
|
||||
</Group>
|
||||
|
||||
{activeRun.status === 'running' && (
|
||||
<Center py="md">
|
||||
<Stack align="center" gap="xs">
|
||||
<Loader size="md" />
|
||||
<Text size="sm" c="dimmed">Running models... This may take a few minutes.</Text>
|
||||
<Group gap="xs">
|
||||
{(activeRun.results || []).map((r) => (
|
||||
<Badge key={r.model_role} color={statusColor[r.status]} variant="light">
|
||||
{roleLabels[r.model_role]}: {r.status}
|
||||
</Badge>
|
||||
))}
|
||||
</Group>
|
||||
</Stack>
|
||||
</Center>
|
||||
)}
|
||||
|
||||
{activeRun.status !== 'running' && activeRun.results && (
|
||||
<ComparisonResults results={activeRun.results} feature={activeRun.feature} />
|
||||
)}
|
||||
</Stack>
|
||||
</Card>
|
||||
)}
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
|
||||
// ── History Tab ──
|
||||
|
||||
function HistoryTab() {
|
||||
const [selectedRunId, setSelectedRunId] = useState<string | null>(null);
|
||||
const [tenantFilter, setTenantFilter] = useState<string | null>(null);
|
||||
const [featureFilter, setFeatureFilter] = useState<string | null>(null);
|
||||
|
||||
const { data: orgs } = useQuery<AdminOrg[]>({
|
||||
queryKey: ['admin-orgs'],
|
||||
queryFn: () => api.get('/admin/organizations').then((r) => r.data),
|
||||
});
|
||||
|
||||
const { data: historyData, isLoading } = useQuery({
|
||||
queryKey: ['shadow-ai-runs', tenantFilter, featureFilter],
|
||||
queryFn: () => {
|
||||
const params = new URLSearchParams();
|
||||
if (tenantFilter) params.set('tenantId', tenantFilter);
|
||||
if (featureFilter) params.set('feature', featureFilter);
|
||||
params.set('limit', '50');
|
||||
return api.get(`/admin/shadow-ai/runs?${params}`).then((r) => r.data);
|
||||
},
|
||||
});
|
||||
|
||||
const { data: selectedRun } = useQuery<ShadowRun>({
|
||||
queryKey: ['shadow-ai-run', selectedRunId],
|
||||
queryFn: () => api.get(`/admin/shadow-ai/runs/${selectedRunId}`).then((r) => r.data),
|
||||
enabled: !!selectedRunId,
|
||||
});
|
||||
|
||||
const orgOptions = [
|
||||
{ value: '', label: 'All Tenants' },
|
||||
...(orgs || []).map((o) => ({ value: o.id, label: o.name })),
|
||||
];
|
||||
|
||||
const featureOptions = [
|
||||
{ value: '', label: 'All Features' },
|
||||
{ value: 'operating_health', label: 'Operating Health' },
|
||||
{ value: 'reserve_health', label: 'Reserve Health' },
|
||||
{ value: 'investment_recommendations', label: 'Investment Recommendations' },
|
||||
];
|
||||
|
||||
const runs: ShadowRun[] = historyData?.runs || [];
|
||||
|
||||
return (
|
||||
<Stack>
|
||||
<Group>
|
||||
<Select
|
||||
size="sm"
|
||||
placeholder="Filter by tenant"
|
||||
data={orgOptions}
|
||||
value={tenantFilter || ''}
|
||||
onChange={(v) => setTenantFilter(v || null)}
|
||||
clearable
|
||||
w={200}
|
||||
/>
|
||||
<Select
|
||||
size="sm"
|
||||
placeholder="Filter by feature"
|
||||
data={featureOptions}
|
||||
value={featureFilter || ''}
|
||||
onChange={(v) => setFeatureFilter(v || null)}
|
||||
clearable
|
||||
w={200}
|
||||
/>
|
||||
</Group>
|
||||
|
||||
{isLoading ? (
|
||||
<Center py="xl"><Loader /></Center>
|
||||
) : runs.length === 0 ? (
|
||||
<Text c="dimmed" ta="center" py="xl">No shadow runs found.</Text>
|
||||
) : (
|
||||
<Table striped highlightOnHover>
|
||||
<Table.Thead>
|
||||
<Table.Tr>
|
||||
<Table.Th>Date</Table.Th>
|
||||
<Table.Th>Tenant</Table.Th>
|
||||
<Table.Th>Feature</Table.Th>
|
||||
<Table.Th>Status</Table.Th>
|
||||
<Table.Th>Models</Table.Th>
|
||||
<Table.Th>Duration</Table.Th>
|
||||
<Table.Th></Table.Th>
|
||||
</Table.Tr>
|
||||
</Table.Thead>
|
||||
<Table.Tbody>
|
||||
{runs.map((run) => {
|
||||
const duration = run.completed_at && run.started_at
|
||||
? new Date(run.completed_at).getTime() - new Date(run.started_at).getTime()
|
||||
: null;
|
||||
return (
|
||||
<Table.Tr
|
||||
key={run.id}
|
||||
style={{ cursor: 'pointer' }}
|
||||
onClick={() => setSelectedRunId(run.id)}
|
||||
bg={selectedRunId === run.id ? 'var(--mantine-color-blue-light)' : undefined}
|
||||
>
|
||||
<Table.Td>{formatDate(run.created_at)}</Table.Td>
|
||||
<Table.Td>{run.tenant_name || '-'}</Table.Td>
|
||||
<Table.Td>{featureLabels[run.feature] || run.feature}</Table.Td>
|
||||
<Table.Td><Badge color={statusColor[run.status]} size="sm">{run.status}</Badge></Table.Td>
|
||||
<Table.Td>{run.success_count || '0'}/{run.result_count || '0'}</Table.Td>
|
||||
<Table.Td>{formatDuration(duration)}</Table.Td>
|
||||
<Table.Td><IconArrowRight size={14} /></Table.Td>
|
||||
</Table.Tr>
|
||||
);
|
||||
})}
|
||||
</Table.Tbody>
|
||||
</Table>
|
||||
)}
|
||||
|
||||
{selectedRun && selectedRun.results && (
|
||||
<Card withBorder shadow="sm" mt="md">
|
||||
<Stack gap="md">
|
||||
<Group justify="space-between">
|
||||
<Group>
|
||||
<Text fw={600}>{featureLabels[selectedRun.feature] || selectedRun.feature}</Text>
|
||||
<Badge color={statusColor[selectedRun.status]}>{selectedRun.status}</Badge>
|
||||
</Group>
|
||||
<Text size="sm" c="dimmed">
|
||||
{selectedRun.tenant_name} | {formatDate(selectedRun.created_at)}
|
||||
</Text>
|
||||
</Group>
|
||||
<ComparisonResults results={selectedRun.results} feature={selectedRun.feature} />
|
||||
</Stack>
|
||||
</Card>
|
||||
)}
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Comparison Results Component ──
|
||||
|
||||
function ComparisonResults({ results, feature }: { results: ShadowRunResult[]; feature: string }) {
|
||||
const isHealthScore = feature === 'operating_health' || feature === 'reserve_health';
|
||||
|
||||
// Collect all parsed values for diff highlighting
|
||||
const parsedValues = results
|
||||
.filter((r) => r.status === 'success' && r.parsed_response)
|
||||
.map((r) => r.parsed_response);
|
||||
|
||||
return (
|
||||
<SimpleGrid cols={{ base: 1, md: Math.min(results.length, 3) }}>
|
||||
{results.map((result) => (
|
||||
<ResultCard
|
||||
key={result.model_role}
|
||||
result={result}
|
||||
isHealthScore={isHealthScore}
|
||||
allParsed={parsedValues}
|
||||
/>
|
||||
))}
|
||||
</SimpleGrid>
|
||||
);
|
||||
}
|
||||
|
||||
function ResultCard({
|
||||
result,
|
||||
isHealthScore,
|
||||
allParsed,
|
||||
}: {
|
||||
result: ShadowRunResult;
|
||||
isHealthScore: boolean;
|
||||
allParsed: any[];
|
||||
}) {
|
||||
const roleColor: Record<string, string> = {
|
||||
production: 'green',
|
||||
alternate_a: 'blue',
|
||||
alternate_b: 'violet',
|
||||
};
|
||||
|
||||
return (
|
||||
<Card withBorder shadow="xs" padding="md">
|
||||
<Stack gap="sm">
|
||||
<Group justify="space-between">
|
||||
<Group gap="xs">
|
||||
<Badge color={roleColor[result.model_role] || 'gray'} variant="filled">
|
||||
{roleLabels[result.model_role]}
|
||||
</Badge>
|
||||
</Group>
|
||||
<Badge
|
||||
color={statusColor[result.status]}
|
||||
variant="light"
|
||||
leftSection={result.status === 'success' ? <IconCheck size={12} /> : result.status === 'error' ? <IconX size={12} /> : <IconClock size={12} />}
|
||||
>
|
||||
{result.status}
|
||||
</Badge>
|
||||
</Group>
|
||||
|
||||
<Text size="xs" c="dimmed" truncate>{result.model_name}</Text>
|
||||
|
||||
{result.response_time_ms && (
|
||||
<Badge color="gray" variant="light" size="sm">
|
||||
{formatDuration(result.response_time_ms)}
|
||||
</Badge>
|
||||
)}
|
||||
|
||||
{result.token_usage && (
|
||||
<Text size="xs" c="dimmed">
|
||||
Tokens: {result.token_usage.prompt_tokens || '?'} prompt / {result.token_usage.completion_tokens || '?'} completion
|
||||
</Text>
|
||||
)}
|
||||
|
||||
<Divider />
|
||||
|
||||
{result.status === 'error' && (
|
||||
<Alert color="red" icon={<IconAlertTriangle size={16} />}>
|
||||
<Text size="sm">{result.error_message || 'Unknown error'}</Text>
|
||||
</Alert>
|
||||
)}
|
||||
|
||||
{result.status === 'success' && result.parsed_response && (
|
||||
isHealthScore
|
||||
? <HealthScoreDisplay data={result.parsed_response} allParsed={allParsed} />
|
||||
: <InvestmentDisplay data={result.parsed_response} allParsed={allParsed} />
|
||||
)}
|
||||
|
||||
{result.status === 'success' && (
|
||||
<Accordion variant="contained">
|
||||
<Accordion.Item value="raw">
|
||||
<Accordion.Control>
|
||||
<Text size="xs">Raw JSON Response</Text>
|
||||
</Accordion.Control>
|
||||
<Accordion.Panel>
|
||||
<ScrollArea h={300}>
|
||||
<Code block style={{ fontSize: 11 }}>
|
||||
{JSON.stringify(result.parsed_response, null, 2)}
|
||||
</Code>
|
||||
</ScrollArea>
|
||||
</Accordion.Panel>
|
||||
</Accordion.Item>
|
||||
</Accordion>
|
||||
)}
|
||||
</Stack>
|
||||
</Card>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Health Score Display ──
|
||||
|
||||
function HealthScoreDisplay({ data, allParsed }: { data: any; allParsed: any[] }) {
|
||||
const score = data.score ?? data.raw_text;
|
||||
const label = data.label || '';
|
||||
const summary = data.summary || '';
|
||||
const factors = data.factors || [];
|
||||
const recommendations = data.recommendations || [];
|
||||
|
||||
// Check if score differs from other models
|
||||
const scores = allParsed.map((p) => p.score).filter((s) => typeof s === 'number');
|
||||
const scoreDiffers = scores.length > 1 && !scores.every((s) => s === scores[0]);
|
||||
|
||||
const labelColor: Record<string, string> = {
|
||||
Excellent: 'green', Good: 'teal', Fair: 'yellow',
|
||||
'Needs Attention': 'orange', 'At Risk': 'red', Critical: 'red',
|
||||
};
|
||||
|
||||
return (
|
||||
<Stack gap="sm">
|
||||
{typeof score === 'number' && (
|
||||
<Group justify="center">
|
||||
<Box bg={scoreDiffers ? 'yellow.0' : undefined} p="xs" style={{ borderRadius: 8 }}>
|
||||
<RingProgress
|
||||
size={100}
|
||||
thickness={10}
|
||||
roundCaps
|
||||
sections={[{ value: score, color: labelColor[label] || 'blue' }]}
|
||||
label={
|
||||
<Text ta="center" fw={700} size="lg">{score}</Text>
|
||||
}
|
||||
/>
|
||||
</Box>
|
||||
</Group>
|
||||
)}
|
||||
|
||||
{label && (
|
||||
<Group justify="center">
|
||||
<Badge color={labelColor[label] || 'gray'} size="lg">{label}</Badge>
|
||||
</Group>
|
||||
)}
|
||||
|
||||
{summary && <Text size="sm">{summary}</Text>}
|
||||
|
||||
{factors.length > 0 && (
|
||||
<>
|
||||
<Text size="xs" fw={600} c="dimmed" tt="uppercase">Factors</Text>
|
||||
{factors.map((f: any, i: number) => (
|
||||
<Group key={i} gap="xs" wrap="nowrap">
|
||||
<Badge
|
||||
size="xs"
|
||||
variant="light"
|
||||
color={f.impact === 'positive' ? 'green' : f.impact === 'negative' ? 'red' : 'gray'}
|
||||
>
|
||||
{f.impact}
|
||||
</Badge>
|
||||
<Text size="xs" style={{ flex: 1 }}><b>{f.name}:</b> {f.detail}</Text>
|
||||
</Group>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
|
||||
{recommendations.length > 0 && (
|
||||
<>
|
||||
<Text size="xs" fw={600} c="dimmed" tt="uppercase">Recommendations</Text>
|
||||
{recommendations.map((r: any, i: number) => (
|
||||
<Group key={i} gap="xs" wrap="nowrap">
|
||||
<Badge
|
||||
size="xs"
|
||||
variant="light"
|
||||
color={r.priority === 'high' ? 'red' : r.priority === 'medium' ? 'yellow' : 'blue'}
|
||||
>
|
||||
{r.priority}
|
||||
</Badge>
|
||||
<Text size="xs" style={{ flex: 1 }}>{r.text}</Text>
|
||||
</Group>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Investment Display ──
|
||||
|
||||
function InvestmentDisplay({ data, allParsed }: { data: any; allParsed: any[] }) {
|
||||
const recommendations = data.recommendations || [];
|
||||
const overall = data.overall_assessment || '';
|
||||
const riskNotes = data.risk_notes || [];
|
||||
|
||||
const recCounts = allParsed.map((p) => (p.recommendations || []).length);
|
||||
const countDiffers = recCounts.length > 1 && !recCounts.every((c) => c === recCounts[0]);
|
||||
|
||||
const typeColors: Record<string, string> = {
|
||||
cd_ladder: 'violet', new_investment: 'blue', reallocation: 'teal',
|
||||
maturity_action: 'orange', liquidity_warning: 'red', general: 'gray',
|
||||
};
|
||||
|
||||
return (
|
||||
<Stack gap="sm">
|
||||
{overall && (
|
||||
<Paper p="xs" bg="gray.0" radius="sm">
|
||||
<Text size="sm">{overall}</Text>
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
{recommendations.length > 0 && (
|
||||
<>
|
||||
<Group gap="xs">
|
||||
<Text size="xs" fw={600} c="dimmed" tt="uppercase">
|
||||
Recommendations
|
||||
</Text>
|
||||
<Badge
|
||||
size="xs"
|
||||
variant="light"
|
||||
color={countDiffers ? 'yellow' : 'gray'}
|
||||
>
|
||||
{recommendations.length}
|
||||
</Badge>
|
||||
</Group>
|
||||
{recommendations.map((rec: any, i: number) => (
|
||||
<Card key={i} withBorder padding="xs" radius="sm">
|
||||
<Stack gap={4}>
|
||||
<Group gap="xs">
|
||||
<Badge size="xs" color={typeColors[rec.type] || 'gray'}>{rec.type}</Badge>
|
||||
<Badge size="xs" variant="light" color={rec.priority === 'high' ? 'red' : rec.priority === 'medium' ? 'yellow' : 'blue'}>
|
||||
{rec.priority}
|
||||
</Badge>
|
||||
{rec.fund_type && <Badge size="xs" variant="outline">{rec.fund_type}</Badge>}
|
||||
</Group>
|
||||
<Text size="sm" fw={600}>{rec.title}</Text>
|
||||
<Text size="xs">{rec.summary}</Text>
|
||||
{rec.suggested_amount && (
|
||||
<Text size="xs" c="dimmed">
|
||||
Amount: ${rec.suggested_amount.toLocaleString()}
|
||||
{rec.suggested_rate ? ` | Rate: ${rec.suggested_rate}%` : ''}
|
||||
{rec.suggested_term ? ` | Term: ${rec.suggested_term}` : ''}
|
||||
</Text>
|
||||
)}
|
||||
</Stack>
|
||||
</Card>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
|
||||
{riskNotes.length > 0 && (
|
||||
<>
|
||||
<Text size="xs" fw={600} c="dimmed" tt="uppercase">Risk Notes</Text>
|
||||
{riskNotes.map((note: string, i: number) => (
|
||||
<Group key={i} gap="xs" wrap="nowrap">
|
||||
<IconAlertTriangle size={14} color="orange" />
|
||||
<Text size="xs">{note}</Text>
|
||||
</Group>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
|
||||
// ── Main Page ──
|
||||
|
||||
export function AdminShadowAiPage() {
|
||||
return (
|
||||
<Stack gap="lg" p="md">
|
||||
<Group>
|
||||
<IconScale size={28} />
|
||||
<Title order={2}>AI Benchmarking</Title>
|
||||
</Group>
|
||||
<Text c="dimmed" size="sm">
|
||||
Compare AI model outputs side-by-side using real tenant data.
|
||||
Configure alternate models, run shadow comparisons, and review historical results.
|
||||
</Text>
|
||||
|
||||
<Tabs defaultValue="run">
|
||||
<Tabs.List>
|
||||
<Tabs.Tab value="config" leftSection={<IconSettings size={16} />}>
|
||||
Model Configuration
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab value="run" leftSection={<IconPlayerPlay size={16} />}>
|
||||
Run Comparison
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab value="history" leftSection={<IconHistory size={16} />}>
|
||||
History
|
||||
</Tabs.Tab>
|
||||
</Tabs.List>
|
||||
|
||||
<Tabs.Panel value="config" pt="md">
|
||||
<ModelConfigTab />
|
||||
</Tabs.Panel>
|
||||
|
||||
<Tabs.Panel value="run" pt="md">
|
||||
<RunComparisonTab />
|
||||
</Tabs.Panel>
|
||||
|
||||
<Tabs.Panel value="history" pt="md">
|
||||
<HistoryTab />
|
||||
</Tabs.Panel>
|
||||
</Tabs>
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user