From 3e5185e725f1af12c7b48823d181f091c7542419 Mon Sep 17 00:00:00 2001 From: olsch01 Date: Mon, 23 Mar 2026 15:13:48 -0400 Subject: [PATCH] Deduplicate performance snapshots: one row per platform per day Added snapshot_date column with unique constraint on (platform_id, snapshot_date). Worker now upserts instead of inserting, so repeated scrapes on the same day update the existing row rather than creating duplicates that cause inflated chart values. Cleaned up existing duplicate rows in migration. Co-Authored-By: Claude Opus 4.6 --- apps/api/src/db/schema.ts | 5 ++++- apps/api/src/routes/performance.ts | 2 +- apps/frontend/src/pages/Performance.tsx | 4 ++-- apps/scraper/src/queue/worker.ts | 14 +++++++++++++- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/apps/api/src/db/schema.ts b/apps/api/src/db/schema.ts index 5ea9f6b..8e4e867 100644 --- a/apps/api/src/db/schema.ts +++ b/apps/api/src/db/schema.ts @@ -42,6 +42,7 @@ export const performanceSnapshots = pgTable('performance_snapshots', { .notNull() .references(() => platforms.id), capturedAt: timestamptz('captured_at').defaultNow().notNull(), + snapshotDate: date('snapshot_date').defaultNow().notNull(), periodLabel: varchar('period_label').notNull(), viewsSearch: integer('views_search'), viewsListing: integer('views_listing'), @@ -51,7 +52,9 @@ export const performanceSnapshots = pgTable('performance_snapshots', { avgDailyRate: numeric('avg_daily_rate'), revenueTotal: numeric('revenue_total'), rawJson: jsonb('raw_json'), -}); +}, (table) => [ + unique('perf_snap_platform_date_uniq').on(table.platformId, table.snapshotDate), +]); export const performanceSnapshotsRelations = relations( performanceSnapshots, diff --git a/apps/api/src/routes/performance.ts b/apps/api/src/routes/performance.ts index aec7b75..3d1b9b1 100644 --- a/apps/api/src/routes/performance.ts +++ b/apps/api/src/routes/performance.ts @@ -122,7 +122,7 @@ export default async function (fastify: FastifyInstance) { const rows = await db .select({ - date: performanceSnapshots.periodLabel, + date: performanceSnapshots.snapshotDate, capturedAt: performanceSnapshots.capturedAt, platformId: performanceSnapshots.platformId, viewsSearch: performanceSnapshots.viewsSearch, diff --git a/apps/frontend/src/pages/Performance.tsx b/apps/frontend/src/pages/Performance.tsx index 3c68906..06def8f 100644 --- a/apps/frontend/src/pages/Performance.tsx +++ b/apps/frontend/src/pages/Performance.tsx @@ -206,7 +206,7 @@ export default function Performance() { const trends: TrendPoint[] = rawTrends?.length ? rawTrends.map((r: any) => ({ - date: r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.date || r.periodLabel, + date: r.snapshotDate || r.date || (r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.periodLabel), platform: r.platformId || r.platform, views_search: Number(r.viewsSearch ?? r.views_search ?? 0), conversion_rate: Number(r.conversionRate ?? r.conversion_rate ?? 0), @@ -217,7 +217,7 @@ export default function Performance() { : MOCK_TRENDS; const snapshots: SnapshotRow[] = rawSnapshots?.length ? rawSnapshots.map((r: any) => ({ - date: r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.periodLabel || r.date, + date: r.snapshotDate || (r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.periodLabel || r.date), platform: r.platformId || r.platform, views: Number(r.viewsSearch ?? r.views ?? 0), clicks: Number(r.viewsListing ?? r.clicks ?? 0), diff --git a/apps/scraper/src/queue/worker.ts b/apps/scraper/src/queue/worker.ts index 0bbcad9..7ec3306 100644 --- a/apps/scraper/src/queue/worker.ts +++ b/apps/scraper/src/queue/worker.ts @@ -113,11 +113,13 @@ async function persistPerformanceSnapshot(snapshot: any): Promise { if (!db) return; try { + const snapshotDate = snapshot.capturedAt.split('T')[0]; await db` - INSERT INTO performance_snapshots (platform_id, captured_at, period_label, views_search, views_listing, conversion_rate, bookings_count, occupancy_rate, avg_daily_rate, revenue_total, raw_json) + INSERT INTO performance_snapshots (platform_id, captured_at, snapshot_date, period_label, views_search, views_listing, conversion_rate, bookings_count, occupancy_rate, avg_daily_rate, revenue_total, raw_json) VALUES ( ${snapshot.platformId}, ${snapshot.capturedAt}, + ${snapshotDate}, ${snapshot.periodLabel}, ${snapshot.viewsSearch}, ${snapshot.viewsListing}, @@ -128,6 +130,16 @@ async function persistPerformanceSnapshot(snapshot: any): Promise { ${snapshot.revenueTotal}, ${JSON.stringify(snapshot.rawJson)} ) + ON CONFLICT (platform_id, snapshot_date) DO UPDATE SET + captured_at = EXCLUDED.captured_at, + views_search = EXCLUDED.views_search, + views_listing = EXCLUDED.views_listing, + conversion_rate = EXCLUDED.conversion_rate, + bookings_count = EXCLUDED.bookings_count, + occupancy_rate = EXCLUDED.occupancy_rate, + avg_daily_rate = EXCLUDED.avg_daily_rate, + revenue_total = EXCLUDED.revenue_total, + raw_json = EXCLUDED.raw_json `; } catch (err) { console.error('[worker] Failed to persist performance snapshot:', err);