Deduplicate performance snapshots: one row per platform per day

Added snapshot_date column with unique constraint on (platform_id,
snapshot_date). Worker now upserts instead of inserting, so repeated
scrapes on the same day update the existing row rather than creating
duplicates that cause inflated chart values. Cleaned up existing
duplicate rows in migration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-23 15:13:48 -04:00
parent d1a26269bc
commit 3e5185e725
4 changed files with 20 additions and 5 deletions

View File

@@ -42,6 +42,7 @@ export const performanceSnapshots = pgTable('performance_snapshots', {
.notNull()
.references(() => platforms.id),
capturedAt: timestamptz('captured_at').defaultNow().notNull(),
snapshotDate: date('snapshot_date').defaultNow().notNull(),
periodLabel: varchar('period_label').notNull(),
viewsSearch: integer('views_search'),
viewsListing: integer('views_listing'),
@@ -51,7 +52,9 @@ export const performanceSnapshots = pgTable('performance_snapshots', {
avgDailyRate: numeric('avg_daily_rate'),
revenueTotal: numeric('revenue_total'),
rawJson: jsonb('raw_json'),
});
}, (table) => [
unique('perf_snap_platform_date_uniq').on(table.platformId, table.snapshotDate),
]);
export const performanceSnapshotsRelations = relations(
performanceSnapshots,

View File

@@ -122,7 +122,7 @@ export default async function (fastify: FastifyInstance) {
const rows = await db
.select({
date: performanceSnapshots.periodLabel,
date: performanceSnapshots.snapshotDate,
capturedAt: performanceSnapshots.capturedAt,
platformId: performanceSnapshots.platformId,
viewsSearch: performanceSnapshots.viewsSearch,

View File

@@ -206,7 +206,7 @@ export default function Performance() {
const trends: TrendPoint[] = rawTrends?.length
? rawTrends.map((r: any) => ({
date: r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.date || r.periodLabel,
date: r.snapshotDate || r.date || (r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.periodLabel),
platform: r.platformId || r.platform,
views_search: Number(r.viewsSearch ?? r.views_search ?? 0),
conversion_rate: Number(r.conversionRate ?? r.conversion_rate ?? 0),
@@ -217,7 +217,7 @@ export default function Performance() {
: MOCK_TRENDS;
const snapshots: SnapshotRow[] = rawSnapshots?.length
? rawSnapshots.map((r: any) => ({
date: r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.periodLabel || r.date,
date: r.snapshotDate || (r.capturedAt ? r.capturedAt.split('T')[0].split(' ')[0] : r.periodLabel || r.date),
platform: r.platformId || r.platform,
views: Number(r.viewsSearch ?? r.views ?? 0),
clicks: Number(r.viewsListing ?? r.clicks ?? 0),

View File

@@ -113,11 +113,13 @@ async function persistPerformanceSnapshot(snapshot: any): Promise<void> {
if (!db) return;
try {
const snapshotDate = snapshot.capturedAt.split('T')[0];
await db`
INSERT INTO performance_snapshots (platform_id, captured_at, period_label, views_search, views_listing, conversion_rate, bookings_count, occupancy_rate, avg_daily_rate, revenue_total, raw_json)
INSERT INTO performance_snapshots (platform_id, captured_at, snapshot_date, period_label, views_search, views_listing, conversion_rate, bookings_count, occupancy_rate, avg_daily_rate, revenue_total, raw_json)
VALUES (
${snapshot.platformId},
${snapshot.capturedAt},
${snapshotDate},
${snapshot.periodLabel},
${snapshot.viewsSearch},
${snapshot.viewsListing},
@@ -128,6 +130,16 @@ async function persistPerformanceSnapshot(snapshot: any): Promise<void> {
${snapshot.revenueTotal},
${JSON.stringify(snapshot.rawJson)}
)
ON CONFLICT (platform_id, snapshot_date) DO UPDATE SET
captured_at = EXCLUDED.captured_at,
views_search = EXCLUDED.views_search,
views_listing = EXCLUDED.views_listing,
conversion_rate = EXCLUDED.conversion_rate,
bookings_count = EXCLUDED.bookings_count,
occupancy_rate = EXCLUDED.occupancy_rate,
avg_daily_rate = EXCLUDED.avg_daily_rate,
revenue_total = EXCLUDED.revenue_total,
raw_json = EXCLUDED.raw_json
`;
} catch (err) {
console.error('[worker] Failed to persist performance snapshot:', err);