Initial commit: STR Optimization Manager MVP

Full-stack short-term rental management platform with:
- React/Vite frontend with dark theme dashboard, performance, pricing,
  reservations, experiments, and settings pages
- Fastify API server with auth, platform management, performance tracking,
  pricing, reservations, experiments, and weekly report endpoints
- Playwright-based scraper service with Airbnb adapter (login with MFA,
  performance metrics, reservations, calendar pricing, price changes)
- VRBO adapter scaffold and mock adapter for development
- PostgreSQL with Drizzle ORM, migrations, and seed scripts
- Job queue with worker for async scraping tasks
- AES-256-GCM credential encryption for platform credentials
- Session cookie persistence for scraper browser sessions
- Docker Compose for PostgreSQL database

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-23 15:03:21 -04:00
parent 4735c73b3a
commit d4c714fadc
76 changed files with 18465 additions and 0 deletions

25
apps/scraper/package.json Normal file
View File

@@ -0,0 +1,25 @@
{
"name": "@str/scraper",
"version": "0.1.0",
"private": true,
"type": "module",
"scripts": {
"dev": "tsx watch src/index.ts",
"build": "tsc",
"start": "node dist/index.js"
},
"dependencies": {
"@str/shared-types": "*",
"fastify": "^5.2.0",
"drizzle-orm": "^0.38.0",
"postgres": "^3.4.0",
"playwright": "^1.49.0",
"dotenv": "^16.4.0",
"zod": "^3.24.0"
},
"devDependencies": {
"@types/node": "^22.0.0",
"tsx": "^4.19.0",
"typescript": "^5.7.0"
}
}

View File

@@ -0,0 +1,215 @@
import { chromium, type Browser, type BrowserContext, type Page } from 'playwright';
import { PlatformAdapter } from '../base/PlatformAdapter.js';
import {
loginFlow,
checkSessionFlow,
scrapePerformanceFlow,
scrapeReservationsFlow,
scrapePricingFlow,
applyPriceChangesFlow,
} from './airbnb.flows.js';
const SESSION_DIR = process.env.AIRBNB_SESSION_DIR || './.airbnb-session';
function isHeadless(): boolean {
return process.env.AIRBNB_HEADLESS !== 'false'; // evaluated at call time, not import time
}
export class AirbnbAdapter extends PlatformAdapter {
readonly platformId = 'airbnb';
readonly displayName = 'Airbnb';
private browser: Browser | null = null;
private context: BrowserContext | null = null;
private page: Page | null = null;
// ── Browser Lifecycle ──────────────────────────────────────────────────
async ensureBrowser(): Promise<Page> {
if (this.page && !this.page.isClosed()) {
return this.page;
}
if (!this.browser || !this.browser.isConnected()) {
const headless = isHeadless();
console.log(`[airbnb] Launching browser (headless=${headless}, env=${process.env.AIRBNB_HEADLESS})`);
this.browser = await chromium.launch({
headless,
args: ['--disable-blink-features=AutomationControlled'],
});
}
// Use persistent context to maintain cookies/session across runs
this.context = await this.browser.newContext({
userAgent:
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
viewport: { width: 1440, height: 900 },
locale: 'en-US',
timezoneId: 'America/New_York',
});
// Try to restore saved cookies
try {
const fs = await import('fs');
const cookiePath = `${SESSION_DIR}/cookies.json`;
if (fs.existsSync(cookiePath)) {
const cookies = JSON.parse(fs.readFileSync(cookiePath, 'utf-8'));
await this.context.addCookies(cookies);
console.log('[airbnb] Restored saved session cookies');
}
} catch {
// No saved cookies, that's fine
}
this.page = await this.context.newPage();
return this.page;
}
private async saveCookies(): Promise<void> {
if (!this.context) return;
try {
const fs = await import('fs');
const cookies = await this.context.cookies();
fs.mkdirSync(SESSION_DIR, { recursive: true });
fs.writeFileSync(`${SESSION_DIR}/cookies.json`, JSON.stringify(cookies, null, 2));
console.log('[airbnb] Session cookies saved');
} catch (err) {
console.warn('[airbnb] Failed to save cookies:', err);
}
}
async closeBrowser(): Promise<void> {
await this.saveCookies();
if (this.page && !this.page.isClosed()) await this.page.close();
if (this.context) await this.context.close();
if (this.browser) await this.browser.close();
this.page = null;
this.context = null;
this.browser = null;
}
// ── Adapter Interface ──────────────────────────────────────────────────
async login(credentials: { email: string; password: string }): Promise<void> {
const page = await this.ensureBrowser();
await loginFlow(page, credentials.email, credentials.password);
await this.saveCookies();
}
async isSessionValid(): Promise<boolean> {
try {
const page = await this.ensureBrowser();
return await checkSessionFlow(page);
} catch {
return false;
}
}
async scrapePerformanceMetrics(): Promise<any> {
const page = await this.ensureBrowser();
// Ensure session is valid first
const valid = await checkSessionFlow(page);
if (!valid) {
throw new Error('Airbnb session is not valid. Please log in first.');
}
return await scrapePerformanceFlow(page);
}
async scrapeReservations(): Promise<any[]> {
const page = await this.ensureBrowser();
const valid = await checkSessionFlow(page);
if (!valid) {
throw new Error('Airbnb session is not valid. Please log in first.');
}
return await scrapeReservationsFlow(page);
}
async scrapePricing(dateRange: { from: string; to: string }): Promise<any[]> {
const page = await this.ensureBrowser();
const valid = await checkSessionFlow(page);
if (!valid) {
throw new Error('Airbnb session is not valid. Please log in first.');
}
return await scrapePricingFlow(page, dateRange);
}
async previewPriceChanges(changes: any[]): Promise<any> {
// Preview doesn't actually apply — just compute diffs
return {
platformId: this.platformId,
previewedAt: new Date().toISOString(),
changesCount: changes.length,
changes: changes.map((c) => ({
date: c.date,
currentPrice: c.currentPrice ?? 0,
proposedPrice: c.newPrice,
diff: c.newPrice - (c.currentPrice ?? 0),
diffPercent:
c.currentPrice > 0
? Number((((c.newPrice - c.currentPrice) / c.currentPrice) * 100).toFixed(1))
: 0,
})),
};
}
async applyPriceChanges(changes: any[]): Promise<any> {
const page = await this.ensureBrowser();
const valid = await checkSessionFlow(page);
if (!valid) {
throw new Error('Airbnb session is not valid. Please log in first.');
}
const results = await applyPriceChangesFlow(page, changes);
const appliedCount = results.filter((r) => r.applied).length;
return {
platformId: this.platformId,
appliedAt: new Date().toISOString(),
success: appliedCount === changes.length,
appliedCount,
results,
};
}
async selfTest(): Promise<{
platformId: string;
healthy: boolean;
message: string;
checkedAt: string;
}> {
try {
const page = await this.ensureBrowser();
const sessionValid = await checkSessionFlow(page);
if (sessionValid) {
return {
platformId: this.platformId,
healthy: true,
message: 'Airbnb adapter operational — session active',
checkedAt: new Date().toISOString(),
};
}
return {
platformId: this.platformId,
healthy: false,
message: 'Airbnb adapter operational but session expired — login required',
checkedAt: new Date().toISOString(),
};
} catch (err: any) {
return {
platformId: this.platformId,
healthy: false,
message: `Airbnb adapter error: ${err.message}`,
checkedAt: new Date().toISOString(),
};
}
}
}

View File

@@ -0,0 +1,663 @@
import type { Page } from 'playwright';
import { URLS, SELECTORS } from './airbnb.selectors.js';
// ── Helpers ──────────────────────────────────────────────────────────────────
async function waitForNavigation(page: Page, timeoutMs = 15000): Promise<void> {
await page.waitForLoadState('networkidle', { timeout: timeoutMs }).catch(() => {
// networkidle can be flaky; fall back to domcontentloaded
});
}
function parseCurrency(text: string): number {
return Number(text.replace(/[^0-9.\-]/g, '')) || 0;
}
function parseDate(text: string): string {
// Handle Airbnb date formats:
// "May 21, 2026" — standard
// "Mar 2, 20269:09 PM ET" — time glued to year
// "Mar 2, 2026 9:09 PM ET" — time with space
// Strip time portion (everything after the 4-digit year)
const cleaned = text.replace(/(\d{4})\d{1,2}:\d{2}.*/, '$1').replace(/(\d{4})\s+\d{1,2}:\d{2}.*/, '$1').trim();
const d = new Date(cleaned);
if (!isNaN(d.getTime())) {
return d.toISOString().split('T')[0];
}
// Also try the original text
const d2 = new Date(text);
if (!isNaN(d2.getTime())) {
return d2.toISOString().split('T')[0];
}
return text;
}
function computeNights(checkIn: string, checkOut: string): number {
const a = new Date(checkIn);
const b = new Date(checkOut);
return Math.max(1, Math.round((b.getTime() - a.getTime()) / 86400000));
}
// ── Login Flow ───────────────────────────────────────────────────────────────
export async function loginFlow(
page: Page,
email: string,
password: string,
): Promise<void> {
console.log('[airbnb] Navigating to login page...');
await page.goto('https://www.airbnb.com/login', { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(3000);
// Take a screenshot of what we see for debugging
console.log('[airbnb] Login page loaded, URL:', page.url());
// Click "Continue with email" if present (Airbnb sometimes shows social login first)
for (const text of ['Continue with email', 'Email', 'Use email']) {
const btn = page.locator(`button:has-text("${text}")`).first();
if (await btn.isVisible({ timeout: 2000 }).catch(() => false)) {
console.log(`[airbnb] Clicking "${text}" button`);
await btn.click();
await page.waitForTimeout(2000);
break;
}
}
// Look for email input with broader selectors
const emailInput = page.locator(
'input[type="email"], input[name="email"], input[autocomplete="email"], input[autocomplete="username"], input[data-testid*="email"]'
).first();
// If no email input visible, try clicking any visible text input
if (!(await emailInput.isVisible({ timeout: 5000 }).catch(() => false))) {
console.log('[airbnb] Email input not found with standard selectors, trying text input...');
const textInput = page.locator('input[type="text"]').first();
if (await textInput.isVisible({ timeout: 3000 }).catch(() => false)) {
await textInput.fill(email);
} else {
// Last resort: log page content for debugging
const bodyText = await page.locator('body').textContent().catch(() => '');
console.log('[airbnb] Page text preview:', bodyText?.substring(0, 500));
throw new Error('Could not find email input on login page. The page structure may have changed.');
}
} else {
console.log('[airbnb] Found email input, filling...');
await emailInput.fill(email);
}
// Click Continue / Next / Submit after email
for (const selector of [
'button:has-text("Continue")',
'button:has-text("Next")',
'button[type="submit"]',
]) {
const btn = page.locator(selector).first();
if (await btn.isVisible({ timeout: 2000 }).catch(() => false)) {
console.log(`[airbnb] Clicking: ${selector}`);
await btn.click();
await page.waitForTimeout(3000);
break;
}
}
// Enter password
const passwordInput = page.locator(
'input[type="password"], input[name="password"], input[autocomplete="current-password"]'
).first();
if (await passwordInput.isVisible({ timeout: 10000 }).catch(() => false)) {
console.log('[airbnb] Found password input, filling...');
await passwordInput.fill(password);
// Submit the password form
for (const selector of [
'button:has-text("Log in")',
'button:has-text("Login")',
'button:has-text("Continue")',
'button[type="submit"]',
]) {
const btn = page.locator(selector).first();
if (await btn.isVisible({ timeout: 2000 }).catch(() => false)) {
console.log(`[airbnb] Submitting with: ${selector}`);
await btn.click();
break;
}
}
} else {
console.log('[airbnb] No password field — may be a passwordless flow or MFA-only');
}
// Wait for either MFA prompt or successful redirect
await page.waitForTimeout(5000);
console.log('[airbnb] Post-submit URL:', page.url());
// Check for MFA
const mfaInput = page.locator('input[inputmode="numeric"], input[autocomplete="one-time-code"], input[name*="code"]').first();
if (await mfaInput.isVisible({ timeout: 5000 }).catch(() => false)) {
console.log('[airbnb] MFA required. Waiting for manual code entry (up to 3 minutes)...');
await page.waitForURL(/\/(hosting|dashboard|account|users)/, { timeout: 180000 }).catch(() => {
throw new Error('MFA timeout: code was not entered within 3 minutes');
});
}
// Check if we're on a logged-in page
const currentUrl = page.url();
if (currentUrl.includes('/login') || currentUrl.includes('/signup')) {
// Still on login page — maybe waiting for user action in non-headless mode
console.log('[airbnb] Still on login page. Waiting for user to complete login (up to 3 minutes)...');
await page.waitForURL(/\/(hosting|dashboard|account|users)/, { timeout: 180000 }).catch(() => {
throw new Error('Login timeout: did not reach a logged-in page within 3 minutes');
});
}
console.log('[airbnb] Login successful, URL:', page.url());
}
// ── Session Check ────────────────────────────────────────────────────────────
export async function checkSessionFlow(page: Page): Promise<boolean> {
try {
await page.goto(URLS.HOST_HOME, { waitUntil: 'domcontentloaded', timeout: 15000 });
await page.waitForTimeout(2000);
// If we see the host nav, we're logged in
const nav = page.locator(SELECTORS.NAV_PRIMARY);
return await nav.isVisible({ timeout: 5000 }).catch(() => false);
} catch {
return false;
}
}
// ── Discover Listing ID ──────────────────────────────────────────────────────
export async function discoverListingId(page: Page): Promise<string> {
await page.goto(URLS.LISTINGS, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(3000);
// Try multiple selector patterns for finding the listing ID
const selectors = [
'a[href*="/hosting/listings/editor/"]',
'a[href*="/multicalendar/"]',
'a[href*="/hosting/listings/"]',
'a[href*="/rooms/"]',
];
for (const selector of selectors) {
const link = page.locator(selector).first();
if (await link.isVisible({ timeout: 3000 }).catch(() => false)) {
const href = await link.getAttribute('href');
if (!href) continue;
// Extract numeric ID from various URL patterns
const match = href.match(/\/(?:editor|multicalendar|rooms|listings)\/(\d+)/);
if (match) {
console.log(`[airbnb] Discovered listing ID: ${match[1]} (from ${selector})`);
return match[1];
}
}
}
// Fallback: scan all links on the page for any numeric ID pattern
const allHrefs = await page.evaluate(() => {
return Array.from(document.querySelectorAll('a[href]'))
.map((a) => a.getAttribute('href') || '')
.filter((h) => /\/\d{5,}/.test(h));
});
for (const href of allHrefs) {
const match = href.match(/\/(\d{5,})/);
if (match) {
console.log(`[airbnb] Discovered listing ID: ${match[1]} (from page scan: ${href})`);
return match[1];
}
}
// Last resort: check the calendar URL pattern from nav
const calendarLink = page.locator('a[href*="/calendar"]').first();
if (await calendarLink.isVisible({ timeout: 2000 }).catch(() => false)) {
const href = await calendarLink.getAttribute('href');
const match = href?.match(/\/(\d{5,})/);
if (match) {
console.log(`[airbnb] Discovered listing ID: ${match[1]} (from calendar nav)`);
return match[1];
}
}
// Hardcoded fallback from initial DOM exploration
const fallbackId = process.env.AIRBNB_LISTING_ID;
if (fallbackId) {
console.log(`[airbnb] Using fallback listing ID from env: ${fallbackId}`);
return fallbackId;
}
throw new Error('Could not find listing ID from listings page. Set AIRBNB_LISTING_ID env var as fallback.');
}
// ── Scrape Performance Metrics ───────────────────────────────────────────────
export async function scrapePerformanceFlow(page: Page): Promise<any> {
// First get listing ID for views page
let listingId: string;
try {
listingId = await discoverListingId(page);
} catch {
listingId = '';
}
// ── Scrape Earnings / Performance ────────────────────────────────────
await page.goto(URLS.EARNINGS_PERFORMANCE, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(3000);
const earningsText = await page.locator('main').textContent() ?? '';
// Parse monthly earnings — look for the summary card
let revenueTotal = 0;
let nightsBooked = 0;
let avgNightStay = 0;
// Parse "Total (USD)" amount
const totalMatch = earningsText.match(/Total \(USD\)\s*\$?([\d,]+\.?\d*)/);
if (totalMatch) {
revenueTotal = parseCurrency(totalMatch[1]);
} else {
// Fallback: look for the first currency amount after "Paid"
const paidMatch = earningsText.match(/\$([\d,]+\.?\d*)\s*Paid/);
if (paidMatch) revenueTotal = parseCurrency(paidMatch[1]);
}
// Expand Performance stats if collapsed
const perfStatsBtn = page.locator('button:has-text("Performance stats")');
if (await perfStatsBtn.isVisible({ timeout: 2000 }).catch(() => false)) {
await perfStatsBtn.click();
await page.waitForTimeout(500);
}
const updatedText = await page.locator('main').textContent() ?? '';
const nightsMatch = updatedText.match(/(\d+)\s*Nights? booked/i);
if (nightsMatch) nightsBooked = parseInt(nightsMatch[1], 10);
const avgStayMatch = updatedText.match(/(\d+)\s*Avg night stay/i);
if (avgStayMatch) avgNightStay = parseInt(avgStayMatch[1], 10);
// ── Scrape Insights / Views ──────────────────────────────────────────
let viewsSearch = 0;
let newBookings = 0;
let bookingRate = 0;
if (listingId) {
await page.goto(URLS.INSIGHTS_VIEWS(listingId), { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(3000);
const viewsPageText = await page.locator('main').textContent() ?? '';
// "161" Views, past 30 days
const viewsMatch = viewsPageText.match(/(\d+)\s*Views,?\s*past 30 days/i);
if (viewsMatch) viewsSearch = parseInt(viewsMatch[1], 10);
// "2" New bookings, past 30 days
const bookingsMatch = viewsPageText.match(/(\d+)\s*New bookings,?\s*past 30 days/i);
if (bookingsMatch) newBookings = parseInt(bookingsMatch[1], 10);
// "1.2%" Booking rate
const rateMatch = viewsPageText.match(/([\d.]+)%\s*Booking rate/i);
if (rateMatch) bookingRate = parseFloat(rateMatch[1]);
}
// ── Scrape Insights / Reviews ────────────────────────────────────────
let overallRating = 0;
let reviewCount = 0;
await page.goto(URLS.INSIGHTS_REVIEWS, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(3000);
const reviewsText = await page.locator('main').textContent() ?? '';
const ratingMatch = reviewsText.match(/([\d.]+)\s*overall rating/i);
if (ratingMatch) overallRating = parseFloat(ratingMatch[1]);
const reviewCountMatch = reviewsText.match(/(\d+)\s*reviews/i);
if (reviewCountMatch) reviewCount = parseInt(reviewCountMatch[1], 10);
// ── Compute derived metrics ──────────────────────────────────────────
const avgDailyRate = nightsBooked > 0 ? revenueTotal / nightsBooked : 0;
// Occupancy: nights booked / 30 days * 100
const occupancyRate = (nightsBooked / 30) * 100;
return {
platformId: 'airbnb',
capturedAt: new Date().toISOString(),
periodLabel: 'last_30_days',
viewsSearch,
viewsListing: viewsSearch, // Airbnb doesn't split search vs listing views
conversionRate: bookingRate,
bookingsCount: newBookings || nightsBooked,
occupancyRate: Number(occupancyRate.toFixed(1)),
avgDailyRate: Number(avgDailyRate.toFixed(2)),
revenueTotal: Number(revenueTotal.toFixed(2)),
rawJson: {
source: 'airbnb',
scrapedAt: new Date().toISOString(),
listingId,
nightsBooked,
avgNightStay,
overallRating,
reviewCount,
bookingRate,
},
};
}
// ── Scrape Reservations ──────────────────────────────────────────────────────
export async function scrapeReservationsFlow(page: Page): Promise<any[]> {
const reservations: any[] = [];
// Scrape both completed and upcoming
for (const url of [URLS.RESERVATIONS_COMPLETED, URLS.RESERVATIONS]) {
await page.goto(url, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(3000);
// Wait for table to appear
const table = page.locator('table');
if (!(await table.isVisible({ timeout: 5000 }).catch(() => false))) {
continue;
}
// Get all rows
const rows = page.locator('table tbody tr, table tr').filter({
has: page.locator('td'),
});
const rowCount = await rows.count();
for (let i = 0; i < rowCount; i++) {
try {
const row = rows.nth(i);
const cells = row.locator('td');
const cellCount = await cells.count();
if (cellCount < 8) continue;
// Actual Airbnb columns (10 cols):
// [0]=Status [1]=Guests [2]=Contact [3]=Check-in [4]=Checkout
// [5]=Booked [6]=Listing [7]=Confirmation Code [8]=Total Payout [9]=Actions
const status = (await cells.nth(0).textContent())?.trim().toLowerCase() ?? '';
const guestText = (await cells.nth(1).textContent())?.trim() ?? '';
const checkInText = (await cells.nth(3).textContent())?.trim() ?? '';
const checkOutText = (await cells.nth(4).textContent())?.trim() ?? '';
const bookedText = (await cells.nth(5).textContent())?.trim() ?? '';
const confirmationCode = (await cells.nth(7).textContent())?.trim() ?? '';
const payoutText = cellCount > 8 ? (await cells.nth(8).textContent())?.trim() ?? '' : '';
// Parse guest name from profile link (clean text) or fall back to cell text
let guestName = 'Unknown';
const profileLink = cells.nth(1).locator('a[href*="/users/profile/"], a[href*="/users/show/"]').first();
if (await profileLink.count() > 0) {
guestName = (await profileLink.textContent())?.trim() || 'Unknown';
}
if (guestName === 'Unknown' || /\d+\s*(adult|guest)/i.test(guestName)) {
// Fallback: split merged "NameNadults" text — e.g., "Cassie Graham7 adults"
const nameMatch = guestText.match(/^(.+?)(\d+\s*(?:adult|guest|infant|child|pet))/i);
guestName = nameMatch ? nameMatch[1].trim() : guestText.split('\n')[0]?.trim() || 'Unknown';
}
const guestsCountMatch = guestText.match(/(\d+)\s*(adult|guest)/i);
const guestsCount = guestsCountMatch ? parseInt(guestsCountMatch[1], 10) : 1;
const checkIn = parseDate(checkInText);
const checkOut = parseDate(checkOutText);
const nights = computeNights(checkIn, checkOut);
const totalPayout = parseCurrency(payoutText);
// Map status
let mappedStatus: string;
if (status.includes('past guest') || status.includes('completed')) {
mappedStatus = 'completed';
} else if (status.includes('confirmed') || status.includes('upcoming')) {
mappedStatus = 'confirmed';
} else if (status.includes('cancel')) {
mappedStatus = 'cancelled';
} else if (status.includes('check')) {
mappedStatus = 'checked_in';
} else {
mappedStatus = status || 'unknown';
}
// Estimate nightly rate from total payout (total / nights is rough)
const nightlyRate = nights > 0 ? Number((totalPayout / nights).toFixed(2)) : 0;
// Safely parse bookedAt — fallback to now if invalid
let bookedAt: string;
try {
const parsed = new Date(parseDate(bookedText));
bookedAt = isNaN(parsed.getTime()) ? new Date().toISOString() : parsed.toISOString();
} catch {
bookedAt = new Date().toISOString();
}
reservations.push({
platformId: 'airbnb',
platformReservationId: confirmationCode || `ABB-${Date.now()}-${i}`,
guestName,
checkIn,
checkOut,
nights,
guestsCount,
nightlyRate,
cleaningFee: 0, // Not available in table view; could be scraped from detail
platformFee: 0, // Not available in table view
totalPayout,
status: mappedStatus,
bookedAt,
rawJson: {
source: 'airbnb',
confirmationCode,
scrapedAt: new Date().toISOString(),
},
});
} catch (err) {
console.warn(`[airbnb] Failed to parse reservation row ${i}:`, err);
}
}
}
// Deduplicate by confirmation code
const seen = new Set<string>();
const unique = reservations.filter((r) => {
if (seen.has(r.platformReservationId)) return false;
seen.add(r.platformReservationId);
return true;
});
console.log(`[airbnb] Scraped ${unique.length} reservations`);
return unique.sort((a, b) => new Date(a.checkIn).getTime() - new Date(b.checkIn).getTime());
}
// ── Scrape Pricing from Calendar ─────────────────────────────────────────────
export async function scrapePricingFlow(
page: Page,
dateRange: { from: string; to: string },
): Promise<any[]> {
const listingId = await discoverListingId(page);
await page.goto(URLS.CALENDAR(listingId), { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(5000);
// Airbnb's multicalendar uses a virtualized scroll list (virtuoso).
// Day text follows pattern: "Wednesday 4 Mar4Nightly price$275"
// or "Unavailable" for blocked days, or no price text for reserved days.
// We parse the main text content to extract date-price pairs.
const prices: any[] = [];
const targetEnd = new Date(dateRange.to);
const targetStart = new Date(dateRange.from);
let scrollAttempts = 0;
const maxScrolls = 20;
// Determine current year from page context
const currentYear = new Date().getFullYear();
while (scrollAttempts < maxScrolls) {
const dayData = await page.evaluate((year: number) => {
const results: { dateText: string; price: number; available: boolean }[] = [];
const mainEl = document.querySelector('[data-testid="listing-calendar"]') || document.querySelector('main');
if (!mainEl) return results;
const text = mainEl.textContent || '';
// Match day entries like "Sunday 1 Feb1Nightly price$275" or "Monday 20 Mar20UnavailableNightly price$275"
// The pattern is: DayName DD MonDD[Unavailable][Nightly price$NNN]
// Month headers appear as standalone month names like "February", "March", etc.
const months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'];
const fullMonths = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
const dayNames = '(?:Sunday|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday)';
const monthAbbr = '(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)';
// Match each day entry
const dayRegex = new RegExp(
`(?:Today, )?${dayNames}\\s+(\\d{1,2})\\s+(${monthAbbr})\\1(Unavailable)?(?:Nightly price\\$(\\d+))?`,
'g',
);
let match;
while ((match = dayRegex.exec(text)) !== null) {
const day = parseInt(match[1], 10);
const monthAbbreviation = match[2];
const isUnavailable = !!match[3];
const price = match[4] ? parseInt(match[4], 10) : 0;
const monthIndex = months.indexOf(monthAbbreviation);
if (monthIndex === -1) continue;
// Determine the year — if month is before current month, it might be next year
const currentMonth = new Date().getMonth();
let dateYear = year;
if (monthIndex < currentMonth - 1) {
dateYear = year + 1;
}
const dateStr = `${dateYear}-${String(monthIndex + 1).padStart(2, '0')}-${String(day).padStart(2, '0')}`;
results.push({
dateText: dateStr,
price,
available: !isUnavailable && price > 0,
});
}
return results;
}, currentYear);
for (const d of dayData) {
if (d.dateText >= dateRange.from && d.dateText <= dateRange.to) {
prices.push({
platformId: 'airbnb',
date: d.dateText,
price: d.price,
isAvailable: d.available,
minStayNights: 3,
syncedAt: new Date().toISOString(),
});
}
}
// Check if we've reached the target end date
const latestDate = dayData.length > 0
? new Date(dayData[dayData.length - 1].dateText)
: new Date();
if (latestDate >= targetEnd) break;
// Scroll the virtuoso scroller down to load more months
const scrolled = await page.evaluate(() => {
const scroller = document.querySelector('[data-testid="virtuoso-scroller"]');
if (scroller) {
const prevTop = scroller.scrollTop;
scroller.scrollTop += 800;
return scroller.scrollTop > prevTop;
}
return false;
});
if (!scrolled) {
// Try clicking next month button as fallback
const nextBtn = page.locator('button[aria-label*="Move forward"], button[aria-label*="next month"]').first();
if (await nextBtn.isVisible({ timeout: 2000 }).catch(() => false)) {
await nextBtn.click();
await page.waitForTimeout(2000);
} else {
break;
}
} else {
await page.waitForTimeout(1500);
}
scrollAttempts++;
}
// Deduplicate by date
const seen = new Set<string>();
const unique = prices.filter((p) => {
if (seen.has(p.date)) return false;
seen.add(p.date);
return true;
});
console.log(`[airbnb] Scraped ${unique.length} daily prices`);
return unique.sort((a, b) => a.date.localeCompare(b.date));
}
// ── Apply Price Changes via Calendar ─────────────────────────────────────────
export async function applyPriceChangesFlow(
page: Page,
changes: Array<{ date: string; newPrice: number }>,
): Promise<Array<{ date: string; newPrice: number; applied: boolean; error?: string }>> {
const listingId = await discoverListingId(page);
const results: Array<{ date: string; newPrice: number; applied: boolean; error?: string }> = [];
for (const change of changes) {
try {
// Navigate to the calendar
await page.goto(URLS.CALENDAR(listingId), { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(2000);
// Click on the specific date cell
const dateObj = new Date(change.date);
const label = dateObj.toLocaleDateString('en-US', {
month: 'long',
day: 'numeric',
year: 'numeric',
});
const dayCell = page.locator(`td[aria-label*="${label}"], td:has-text("${dateObj.getDate()}")`).first();
if (!(await dayCell.isVisible({ timeout: 3000 }).catch(() => false))) {
results.push({ ...change, applied: false, error: 'Date cell not found' });
continue;
}
await dayCell.click();
await page.waitForTimeout(1000);
// Look for price input in the sidebar
const priceInput = page.locator('input[aria-label*="price"], input[name*="price"]').first();
if (!(await priceInput.isVisible({ timeout: 3000 }).catch(() => false))) {
results.push({ ...change, applied: false, error: 'Price input not found' });
continue;
}
await priceInput.click({ clickCount: 3 }); // Select all
await priceInput.fill(String(change.newPrice));
// Save
const saveBtn = page.locator('button:has-text("Save")').first();
if (await saveBtn.isVisible({ timeout: 2000 }).catch(() => false)) {
await saveBtn.click();
await page.waitForTimeout(2000);
results.push({ ...change, applied: true });
} else {
results.push({ ...change, applied: false, error: 'Save button not found' });
}
} catch (err: any) {
results.push({ ...change, applied: false, error: err.message });
}
}
return results;
}

View File

@@ -0,0 +1,86 @@
// Airbnb Host Dashboard selectors mapped from live DOM exploration (March 2026)
// These target the accessibility tree structure rather than fragile CSS classes.
export const URLS = {
HOST_HOME: 'https://www.airbnb.com/hosting',
CALENDAR: (listingId: string) => `https://www.airbnb.com/multicalendar/${listingId}`,
LISTINGS: 'https://www.airbnb.com/hosting/listings',
LISTING_EDITOR: (listingId: string) => `https://www.airbnb.com/hosting/listings/editor/${listingId}/details/photo-tour`,
RESERVATIONS: 'https://www.airbnb.com/hosting/reservations',
RESERVATIONS_COMPLETED: 'https://www.airbnb.com/hosting/reservations/completed',
RESERVATIONS_ALL: 'https://www.airbnb.com/hosting/reservations/all',
RESERVATION_DETAIL: (confirmationCode: string) => `https://www.airbnb.com/hosting/reservations/details/${confirmationCode}`,
EARNINGS_PERFORMANCE: 'https://www.airbnb.com/users/transaction_history',
INSIGHTS_REVIEWS: 'https://www.airbnb.com/progress/reviews',
INSIGHTS_VIEWS: (listingId: string) => `https://www.airbnb.com/progress/views/${listingId}`,
} as const;
export const SELECTORS = {
// ── Navigation ───────────────────────────────────────────────────────
NAV_PRIMARY: 'nav[aria-label="Primary"]',
NAV_TODAY: 'a[href="/hosting"]',
NAV_CALENDAR: 'a[href="/calendar-router"]',
NAV_LISTINGS: 'a[href="/hosting/listings"]',
NAV_MESSAGES: 'a[href="/hosting/messages"]',
NAV_MENU_BUTTON: 'button[aria-label="Main navigation menu"]',
// ── Login ────────────────────────────────────────────────────────────
LOGIN_EMAIL: 'input[type="email"], input[name="email"]',
LOGIN_PASSWORD: 'input[type="password"], input[name="password"]',
LOGIN_SUBMIT: 'button[type="submit"]',
LOGIN_CONTINUE: 'button:has-text("Continue")',
MFA_INPUT: 'input[inputmode="numeric"]',
// ── Reservations Page ────────────────────────────────────────────────
RESERVATIONS_TAB_UPCOMING: 'tab:has-text("Upcoming"), button:has-text("Upcoming")',
RESERVATIONS_TAB_COMPLETED: 'tab:has-text("Completed"), button:has-text("Completed")',
RESERVATIONS_TAB_CANCELLED: 'tab:has-text("Cancelled"), button:has-text("Cancelled")',
RESERVATIONS_TAB_ALL: 'tab:has-text("All"), button:has-text("All")',
RESERVATIONS_TABLE: 'table',
RESERVATIONS_TABLE_ROWS: 'table tbody tr',
RESERVATIONS_DETAIL_BUTTON: 'button:has-text("Details"), a:has-text("Details")',
RESERVATIONS_EXPORT_BUTTON: 'button:has-text("Export")',
// Reservation detail modal
RESERVATION_DETAIL_MODAL: '[role="dialog"], [aria-modal="true"]',
RESERVATION_DETAIL_CLOSE: 'button:has-text("×"), button[aria-label="Close"]',
// ── Earnings / Performance Page ──────────────────────────────────────
EARNINGS_NAV_PERFORMANCE: 'text=Performance',
EARNINGS_NAV_UPCOMING: 'text=Upcoming',
EARNINGS_NAV_PAID: 'text=Paid',
EARNINGS_NAV_REPORTS: 'text=Reports',
EARNINGS_MONTH_LABEL: 'text=/^\\w+ \\d{4}$/', // e.g., "March 2026"
EARNINGS_PAID_AMOUNT: 'text=/^\\$[\\d,]+\\.\\d{2}$/',
EARNINGS_PERFORMANCE_STATS: 'button:has-text("Performance stats")',
EARNINGS_PAID_BREAKDOWN: 'button:has-text("Paid breakdown")',
// ── Insights / Views Page ────────────────────────────────────────────
INSIGHTS_TAB_REVIEWS: 'tab:has-text("Reviews"), button:has-text("Reviews")',
INSIGHTS_TAB_VIEWS: 'tab:has-text("Views"), button:has-text("Views")',
INSIGHTS_TAB_OPPORTUNITIES: 'tab:has-text("Opportunities"), button:has-text("Opportunities")',
INSIGHTS_TAB_SUPERHOST: 'tab:has-text("Superhost"), button:has-text("Superhost")',
INSIGHTS_VIEWS_COUNT: 'text=/^\\d+$/', // "161"
INSIGHTS_VIEWS_LABEL: 'text="Views, past 30 days"',
INSIGHTS_BOOKINGS_LABEL: 'text="New bookings, past 30 days"',
INSIGHTS_BOOKING_RATE_LABEL: 'text="Booking rate"',
INSIGHTS_OVERALL_RATING: 'text=/★ [\\d.]+ overall rating/',
// ── Calendar / Pricing ───────────────────────────────────────────────
CALENDAR_DAY_CELL: 'td[data-testid]',
CALENDAR_PRICE_DISPLAY: '[data-testid="price-item-container"]',
CALENDAR_SIDEBAR: '[data-testid="calendar-sidebar"]',
CALENDAR_PRICE_INPUT: 'input[aria-label*="price"], input[name*="price"]',
CALENDAR_SAVE_BUTTON: 'button:has-text("Save")',
CALENDAR_NEXT_MONTH: 'button[aria-label="Move forward to switch to the next month"]',
CALENDAR_PREV_MONTH: 'button[aria-label="Move backward to switch to the previous month"]',
// ── Listings Page ────────────────────────────────────────────────────
LISTING_CARD: 'a[href*="/hosting/listings/editor/"]',
LISTING_STATUS_BADGE: 'text="Listed"',
LISTING_TITLE: 'h1, [data-testid="listing-title"]',
// ── General ──────────────────────────────────────────────────────────
LOADING_SPINNER: '[role="progressbar"], [aria-busy="true"]',
PAGE_MAIN: 'main',
} as const;

View File

@@ -0,0 +1,31 @@
import type { PlatformAdapterInterface } from './PlatformAdapter.js';
class AdapterRegistry {
private adapters = new Map<string, PlatformAdapterInterface>();
register(adapter: PlatformAdapterInterface): void {
this.adapters.set(adapter.platformId, adapter);
}
get(platformId: string): PlatformAdapterInterface | undefined {
return this.adapters.get(platformId);
}
getOrThrow(platformId: string): PlatformAdapterInterface {
const adapter = this.adapters.get(platformId);
if (!adapter) {
throw new Error(`No adapter registered for platform: ${platformId}`);
}
return adapter;
}
list(): string[] {
return Array.from(this.adapters.keys());
}
has(platformId: string): boolean {
return this.adapters.has(platformId);
}
}
export const registry = new AdapterRegistry();

View File

@@ -0,0 +1,36 @@
export interface PlatformAdapterInterface {
readonly platformId: string;
readonly displayName: string;
login(credentials: { email: string; password: string }): Promise<void>;
isSessionValid(): Promise<boolean>;
scrapePerformanceMetrics(): Promise<any>;
scrapeReservations(): Promise<any[]>;
scrapePricing(dateRange: { from: string; to: string }): Promise<any[]>;
previewPriceChanges(changes: any[]): Promise<any>;
applyPriceChanges(changes: any[]): Promise<any>;
selfTest(): Promise<{
platformId: string;
healthy: boolean;
message: string;
checkedAt: string;
}>;
}
export abstract class PlatformAdapter implements PlatformAdapterInterface {
abstract readonly platformId: string;
abstract readonly displayName: string;
abstract login(credentials: { email: string; password: string }): Promise<void>;
abstract isSessionValid(): Promise<boolean>;
abstract scrapePerformanceMetrics(): Promise<any>;
abstract scrapeReservations(): Promise<any[]>;
abstract scrapePricing(dateRange: { from: string; to: string }): Promise<any[]>;
abstract previewPriceChanges(changes: any[]): Promise<any>;
abstract applyPriceChanges(changes: any[]): Promise<any>;
abstract selfTest(): Promise<{
platformId: string;
healthy: boolean;
message: string;
checkedAt: string;
}>;
}

View File

@@ -0,0 +1,79 @@
import { PlatformAdapter } from '../base/PlatformAdapter.js';
import {
generatePerformanceSnapshot,
generateReservations,
generateDailyPrices,
} from './mock-data.js';
export class MockAdapter extends PlatformAdapter {
readonly platformId = 'mock';
readonly displayName = 'Mock Platform';
async login(_credentials: { email: string; password: string }): Promise<void> {
// No-op for mock adapter
}
async isSessionValid(): Promise<boolean> {
return true;
}
async scrapePerformanceMetrics(): Promise<any> {
return generatePerformanceSnapshot(this.platformId);
}
async scrapeReservations(): Promise<any[]> {
return generateReservations(this.platformId);
}
async scrapePricing(dateRange: { from: string; to: string }): Promise<any[]> {
return generateDailyPrices(this.platformId, dateRange.from, dateRange.to);
}
async previewPriceChanges(changes: any[]): Promise<any> {
return {
platformId: this.platformId,
previewedAt: new Date().toISOString(),
changesCount: changes.length,
changes: changes.map((c) => ({
date: c.date,
currentPrice: c.currentPrice ?? Math.round(Math.random() * 100 + 150),
proposedPrice: c.newPrice,
diff: c.newPrice - (c.currentPrice ?? 200),
diffPercent: Number(
(((c.newPrice - (c.currentPrice ?? 200)) / (c.currentPrice ?? 200)) * 100).toFixed(1),
),
})),
};
}
async applyPriceChanges(changes: any[]): Promise<any> {
// Simulate network delay
await new Promise((resolve) => setTimeout(resolve, 500));
return {
platformId: this.platformId,
appliedAt: new Date().toISOString(),
success: true,
appliedCount: changes.length,
results: changes.map((c) => ({
date: c.date,
newPrice: c.newPrice,
applied: true,
})),
};
}
async selfTest(): Promise<{
platformId: string;
healthy: boolean;
message: string;
checkedAt: string;
}> {
return {
platformId: this.platformId,
healthy: true,
message: 'Mock adapter is operational',
checkedAt: new Date().toISOString(),
};
}
}

View File

@@ -0,0 +1,151 @@
const GUEST_NAMES = [
'Sarah Johnson',
'Michael Chen',
'Emily Rodriguez',
'James Williams',
'Olivia Martinez',
'David Kim',
'Sophia Brown',
'Daniel Taylor',
'Isabella Anderson',
'Matthew Thomas',
'Ava Wilson',
'Christopher Lee',
'Mia Garcia',
'Andrew Jackson',
'Charlotte White',
];
const RESERVATION_STATUSES = ['confirmed', 'checked_in', 'completed', 'cancelled'] as const;
function randomBetween(min: number, max: number): number {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function randomFloat(min: number, max: number, decimals = 2): number {
const val = Math.random() * (max - min) + min;
return Number(val.toFixed(decimals));
}
function randomItem<T>(arr: readonly T[]): T {
return arr[Math.floor(Math.random() * arr.length)];
}
function addDays(dateStr: string, days: number): string {
const d = new Date(dateStr);
d.setDate(d.getDate() + days);
return d.toISOString().split('T')[0];
}
function getDayOfWeek(dateStr: string): number {
return new Date(dateStr).getDay();
}
export function generatePerformanceSnapshot(platformId: string) {
const viewsSearch = randomBetween(500, 2000);
const viewsListing = randomBetween(Math.floor(viewsSearch * 0.3), Math.floor(viewsSearch * 0.7));
const bookingsCount = randomBetween(5, 25);
const conversionRate = randomFloat(1.5, 8.0);
const occupancyRate = randomFloat(60, 85);
const avgDailyRate = randomFloat(150, 250);
const revenueTotal = randomFloat(
avgDailyRate * bookingsCount * 2,
avgDailyRate * bookingsCount * 5,
);
return {
platformId,
capturedAt: new Date().toISOString(),
periodLabel: 'last_30_days',
viewsSearch,
viewsListing,
conversionRate,
bookingsCount,
occupancyRate,
avgDailyRate,
revenueTotal,
rawJson: {
source: 'mock',
generatedAt: new Date().toISOString(),
},
};
}
export function generateReservations(platformId: string, count: number = randomBetween(5, 10)) {
const reservations = [];
const today = new Date().toISOString().split('T')[0];
for (let i = 0; i < count; i++) {
const daysOffset = randomBetween(-30, 60);
const checkIn = addDays(today, daysOffset);
const nights = randomBetween(2, 7);
const checkOut = addDays(checkIn, nights);
const nightlyRate = randomFloat(150, 300);
const cleaningFee = randomFloat(75, 150);
const platformFee = randomFloat(nightlyRate * nights * 0.03, nightlyRate * nights * 0.05);
const totalPayout = Number((nightlyRate * nights + cleaningFee - platformFee).toFixed(2));
const guestsCount = randomBetween(1, 6);
let status: (typeof RESERVATION_STATUSES)[number];
if (daysOffset < -7) {
status = 'completed';
} else if (daysOffset < 0) {
status = 'checked_in';
} else {
status = Math.random() > 0.1 ? 'confirmed' : 'cancelled';
}
const bookedDaysAgo = randomBetween(14, 90);
reservations.push({
platformId,
platformReservationId: `MOCK-${platformId.toUpperCase()}-${Date.now()}-${i}`,
guestName: randomItem(GUEST_NAMES),
checkIn,
checkOut,
nights,
guestsCount,
nightlyRate,
cleaningFee,
platformFee: Number(platformFee.toFixed(2)),
totalPayout,
status,
bookedAt: new Date(Date.now() - bookedDaysAgo * 86400000).toISOString(),
rawJson: {
source: 'mock',
generatedAt: new Date().toISOString(),
},
});
}
return reservations.sort(
(a, b) => new Date(a.checkIn).getTime() - new Date(b.checkIn).getTime(),
);
}
export function generateDailyPrices(platformId: string, from: string, to: string) {
const prices = [];
let current = from;
while (current <= to) {
const dayOfWeek = getDayOfWeek(current);
const isWeekend = dayOfWeek === 5 || dayOfWeek === 6;
const basePrice = isWeekend ? randomFloat(200, 300) : randomFloat(150, 220);
const isAvailable = Math.random() > 0.15;
const minStay = isWeekend ? randomBetween(2, 3) : 1;
prices.push({
platformId,
date: current,
price: basePrice,
isAvailable,
minStayNights: minStay,
syncedAt: new Date().toISOString(),
});
current = addDays(current, 1);
}
return prices;
}

View File

@@ -0,0 +1,48 @@
import { PlatformAdapter } from '../base/PlatformAdapter.js';
export class VrboAdapter extends PlatformAdapter {
readonly platformId = 'vrbo';
readonly displayName = 'VRBO';
async login(_credentials: { email: string; password: string }): Promise<void> {
throw new Error('VRBO adapter not yet implemented');
}
async isSessionValid(): Promise<boolean> {
throw new Error('VRBO adapter not yet implemented');
}
async scrapePerformanceMetrics(): Promise<any> {
throw new Error('VRBO adapter not yet implemented');
}
async scrapeReservations(): Promise<any[]> {
throw new Error('VRBO adapter not yet implemented');
}
async scrapePricing(_dateRange: { from: string; to: string }): Promise<any[]> {
throw new Error('VRBO adapter not yet implemented');
}
async previewPriceChanges(_changes: any[]): Promise<any> {
throw new Error('VRBO adapter not yet implemented');
}
async applyPriceChanges(_changes: any[]): Promise<any> {
throw new Error('VRBO adapter not yet implemented');
}
async selfTest(): Promise<{
platformId: string;
healthy: boolean;
message: string;
checkedAt: string;
}> {
return {
platformId: this.platformId,
healthy: false,
message: 'Not implemented',
checkedAt: new Date().toISOString(),
};
}
}

View File

@@ -0,0 +1,20 @@
import type { Page } from 'playwright';
export async function loginFlow(_page: Page, _email: string, _password: string): Promise<void> {
throw new Error('VRBO login flow not yet implemented');
}
export async function scrapePerformanceFlow(_page: Page): Promise<any> {
throw new Error('VRBO scrapePerformance flow not yet implemented');
}
export async function scrapePricingFlow(
_page: Page,
_dateRange: { from: string; to: string },
): Promise<any[]> {
throw new Error('VRBO scrapePricing flow not yet implemented');
}
export async function scrapeReservationsFlow(_page: Page): Promise<any[]> {
throw new Error('VRBO scrapeReservations flow not yet implemented');
}

View File

@@ -0,0 +1,18 @@
export const SELECTORS = {
LOGIN_EMAIL: '',
LOGIN_PASSWORD: '',
LOGIN_SUBMIT: '',
DASHBOARD_NAV: '',
PERFORMANCE_TAB: '',
RESERVATIONS_TAB: '',
PRICING_TAB: '',
DATE_PICKER_FROM: '',
DATE_PICKER_TO: '',
METRICS_CONTAINER: '',
RESERVATIONS_TABLE: '',
PRICING_CALENDAR: '',
PRICE_INPUT: '',
SAVE_PRICE_BUTTON: '',
NEXT_PAGE_BUTTON: '',
LOADING_SPINNER: '',
} as const;

270
apps/scraper/src/index.ts Normal file
View File

@@ -0,0 +1,270 @@
import dotenv from 'dotenv';
import { fileURLToPath } from 'url';
import { dirname, resolve } from 'path';
const __dirname = dirname(fileURLToPath(import.meta.url));
dotenv.config({ path: resolve(__dirname, '../../../.env') });
import Fastify from 'fastify';
import { registry } from './adapters/base/AdapterRegistry.js';
import { MockAdapter } from './adapters/mock/MockAdapter.js';
import { AirbnbAdapter } from './adapters/airbnb/AirbnbAdapter.js';
import { VrboAdapter } from './adapters/vrbo/VrboAdapter.js';
import { jobQueue, type JobType } from './queue/jobQueue.js';
import { startWorker } from './queue/worker.js';
// Register adapters
registry.register(new MockAdapter());
registry.register(new AirbnbAdapter());
registry.register(new VrboAdapter());
const app = Fastify({ logger: true });
// ── Health Check ──────────────────────────────────────────────────────────────
app.get('/health', async () => {
return {
status: 'ok',
service: 'scraper',
timestamp: new Date().toISOString(),
adapters: registry.list(),
};
});
// ── Create Scrape Job ─────────────────────────────────────────────────────────
app.post<{
Body: { platformId: string; jobType: JobType; triggeredBy: string };
}>('/jobs', async (request, reply) => {
const { platformId, jobType, triggeredBy } = request.body;
if (!platformId || !jobType || !triggeredBy) {
return reply.status(400).send({ error: 'Missing required fields: platformId, jobType, triggeredBy' });
}
if (!registry.has(platformId)) {
return reply.status(400).send({ error: `Unknown platform: ${platformId}` });
}
const validJobTypes: JobType[] = ['performance', 'reservations', 'pricing', 'full'];
if (!validJobTypes.includes(jobType)) {
return reply.status(400).send({ error: `Invalid jobType. Must be one of: ${validJobTypes.join(', ')}` });
}
const job = jobQueue.enqueue({ platformId, jobType, triggeredBy });
return reply.status(201).send(job);
});
// ── Get Job Status ────────────────────────────────────────────────────────────
app.get<{
Params: { id: string };
}>('/jobs/:id', async (request, reply) => {
const job = jobQueue.getJob(request.params.id);
if (!job) {
return reply.status(404).send({ error: 'Job not found' });
}
return job;
});
// ── Platform Self-Test ────────────────────────────────────────────────────────
app.post<{
Params: { id: string };
}>('/platforms/:id/test', async (request, reply) => {
const adapter = registry.get(request.params.id);
if (!adapter) {
return reply.status(404).send({ error: `Unknown platform: ${request.params.id}` });
}
try {
const result = await adapter.selfTest();
return result;
} catch (err: any) {
return reply.status(500).send({
platformId: request.params.id,
healthy: false,
message: err.message,
checkedAt: new Date().toISOString(),
});
}
});
// ── Platform Login ───────────────────────────────────────────────────────────
app.post<{
Params: { id: string };
Body: { email?: string; password?: string };
}>('/platforms/:id/login', async (request, reply) => {
const adapter = registry.get(request.params.id);
if (!adapter) {
return reply.status(404).send({ error: `Unknown platform: ${request.params.id}` });
}
const email = request.body?.email || process.env[`${request.params.id.toUpperCase()}_EMAIL`] || '';
const password = request.body?.password || process.env[`${request.params.id.toUpperCase()}_PASSWORD`] || '';
if (!email || !password) {
return reply.status(400).send({
error: `Missing credentials. Provide email/password in body or set ${request.params.id.toUpperCase()}_EMAIL and ${request.params.id.toUpperCase()}_PASSWORD env vars.`,
});
}
try {
await adapter.login({ email, password });
return {
platformId: request.params.id,
status: 'logged_in',
message: 'Login successful. Session cookies saved.',
at: new Date().toISOString(),
};
} catch (err: any) {
return reply.status(500).send({
platformId: request.params.id,
status: 'login_failed',
message: err.message,
at: new Date().toISOString(),
});
}
});
// ── Platform Session Check ──────────────────────────────────────────────────
app.get<{
Params: { id: string };
}>('/platforms/:id/session', async (request, reply) => {
const adapter = registry.get(request.params.id);
if (!adapter) {
return reply.status(404).send({ error: `Unknown platform: ${request.params.id}` });
}
try {
const valid = await adapter.isSessionValid();
return {
platformId: request.params.id,
sessionValid: valid,
checkedAt: new Date().toISOString(),
};
} catch (err: any) {
return reply.status(500).send({
platformId: request.params.id,
sessionValid: false,
error: err.message,
checkedAt: new Date().toISOString(),
});
}
});
// ── Debug DOM Inspection ────────────────────────────────────────────────────
app.get<{
Params: { id: string };
Querystring: { url: string };
}>('/platforms/:id/debug-dom', async (request, reply) => {
const adapter = registry.get(request.params.id) as any;
if (!adapter) {
return reply.status(404).send({ error: `Unknown platform: ${request.params.id}` });
}
const url = (request.query as any).url;
if (!url) {
return reply.status(400).send({ error: 'Provide ?url= parameter' });
}
try {
const page = await adapter.ensureBrowser();
await page.goto(url, { waitUntil: 'domcontentloaded' });
await page.waitForTimeout(5000);
const result = await page.evaluate(() => {
// Table structure
const headers: string[] = [];
document.querySelectorAll('table th, table thead td').forEach((th: any) => {
headers.push(th.textContent?.trim() || '');
});
const rows: string[][] = [];
document.querySelectorAll('table tbody tr, table tr').forEach((tr: any) => {
const cells: string[] = [];
tr.querySelectorAll('td').forEach((td: any) => {
cells.push(td.textContent?.trim().replace(/\n/g, ' | ') || '');
});
if (cells.length > 0) rows.push(cells);
});
// data-testid values
const testIds = new Set<string>();
document.querySelectorAll('[data-testid]').forEach(el => {
testIds.add(el.getAttribute('data-testid') || '');
});
// Price text
const priceTexts: string[] = [];
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);
while (walker.nextNode()) {
const text = walker.currentNode.textContent?.trim() || '';
if (/^\$\d+$/.test(text)) priceTexts.push(text);
}
// All links with numeric IDs
const links: { href: string; text: string }[] = [];
document.querySelectorAll('a[href]').forEach((a: any) => {
const href = a.getAttribute('href') || '';
if (/\/\d{5,}/.test(href)) {
links.push({ href, text: a.textContent?.trim().substring(0, 100) || '' });
}
});
// Main text
const mainText = document.querySelector('main')?.textContent?.substring(0, 2000) || '';
return { headers, rows: rows.slice(0, 5), testIds: Array.from(testIds), priceTexts: priceTexts.slice(0, 20), links, mainText };
});
return reply.send({ url, ...result });
} catch (err: any) {
return reply.status(500).send({ error: err.message });
}
});
// ── Apply Price Changes ───────────────────────────────────────────────────────
app.post<{
Params: { id: string };
Body: { changes: any[] };
}>('/platforms/:id/price-apply', async (request, reply) => {
const adapter = registry.get(request.params.id);
if (!adapter) {
return reply.status(404).send({ error: `Unknown platform: ${request.params.id}` });
}
const { changes } = request.body;
if (!changes || !Array.isArray(changes)) {
return reply.status(400).send({ error: 'Request body must include a changes array' });
}
try {
const result = await adapter.applyPriceChanges(changes);
return result;
} catch (err: any) {
return reply.status(500).send({ error: err.message });
}
});
// ── Start Server ──────────────────────────────────────────────────────────────
const PORT = Number(process.env.SCRAPER_PORT) || 3001;
async function start() {
try {
startWorker();
await app.listen({ port: PORT, host: '0.0.0.0' });
console.log(`Scraper service running on port ${PORT}`);
} catch (err) {
app.log.error(err);
process.exit(1);
}
}
start();

View File

@@ -0,0 +1,78 @@
import { randomUUID } from 'node:crypto';
export type JobStatus = 'pending' | 'running' | 'completed' | 'failed';
export type JobType = 'performance' | 'reservations' | 'pricing' | 'full';
export interface Job {
id: string;
platformId: string;
jobType: JobType;
triggeredBy: string;
status: JobStatus;
createdAt: string;
startedAt: string | null;
completedAt: string | null;
errorMessage: string | null;
rowsCollected: number | null;
result: any | null;
}
class JobQueue {
private jobs = new Map<string, Job>();
private pending: string[] = [];
enqueue(params: { platformId: string; jobType: JobType; triggeredBy: string }): Job {
const job: Job = {
id: randomUUID(),
platformId: params.platformId,
jobType: params.jobType,
triggeredBy: params.triggeredBy,
status: 'pending',
createdAt: new Date().toISOString(),
startedAt: null,
completedAt: null,
errorMessage: null,
rowsCollected: null,
result: null,
};
this.jobs.set(job.id, job);
this.pending.push(job.id);
return job;
}
dequeue(): Job | undefined {
const id = this.pending.shift();
if (!id) return undefined;
const job = this.jobs.get(id);
if (job) {
job.status = 'running';
job.startedAt = new Date().toISOString();
}
return job;
}
getJob(id: string): Job | undefined {
return this.jobs.get(id);
}
updateJob(id: string, updates: Partial<Pick<Job, 'status' | 'completedAt' | 'errorMessage' | 'rowsCollected' | 'result'>>): Job | undefined {
const job = this.jobs.get(id);
if (!job) return undefined;
Object.assign(job, updates);
return job;
}
pendingCount(): number {
return this.pending.length;
}
listJobs(): Job[] {
return Array.from(this.jobs.values());
}
}
export const jobQueue = new JobQueue();

View File

@@ -0,0 +1,288 @@
import postgres from 'postgres';
import { jobQueue, type Job } from './jobQueue.js';
import { registry } from '../adapters/base/AdapterRegistry.js';
// Safely convert a value to a valid ISO date string, or return fallback
function safeDate(val: any, fallback?: string): string {
if (!val) return fallback || new Date().toISOString();
try {
const d = new Date(val);
if (isNaN(d.getTime())) return fallback || new Date().toISOString();
return d.toISOString();
} catch {
return fallback || new Date().toISOString();
}
}
// Safely convert to a YYYY-MM-DD date string
function safeDateOnly(val: any, fallback?: string): string {
if (!val) return fallback || new Date().toISOString().split('T')[0];
try {
const d = new Date(val);
if (isNaN(d.getTime())) return fallback || new Date().toISOString().split('T')[0];
return d.toISOString().split('T')[0];
} catch {
return fallback || new Date().toISOString().split('T')[0];
}
}
// Lazy-init raw postgres connection (tagged template = auto-parameterized)
let sql: ReturnType<typeof postgres> | null = null;
function getSql() {
if (!sql) {
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
console.warn('[worker] DATABASE_URL not set - DB writes will be skipped');
return null;
}
sql = postgres(connectionString);
}
return sql;
}
async function processJob(job: Job): Promise<void> {
const adapter = registry.getOrThrow(job.platformId);
let result: any;
let rowsCollected = 0;
switch (job.jobType) {
case 'performance': {
result = await adapter.scrapePerformanceMetrics();
rowsCollected = 1;
await persistPerformanceSnapshot(result);
break;
}
case 'reservations': {
result = await adapter.scrapeReservations();
rowsCollected = result.length;
await persistReservations(result);
break;
}
case 'pricing': {
const today = new Date();
const from = today.toISOString().split('T')[0];
const toDate = new Date(today);
toDate.setDate(toDate.getDate() + 90);
const to = toDate.toISOString().split('T')[0];
result = await adapter.scrapePricing({ from, to });
rowsCollected = result.length;
await persistDailyPrices(result);
break;
}
case 'full': {
const perfResult = await adapter.scrapePerformanceMetrics();
await persistPerformanceSnapshot(perfResult);
const reservationsResult = await adapter.scrapeReservations();
await persistReservations(reservationsResult);
const todayFull = new Date();
const fromFull = todayFull.toISOString().split('T')[0];
const toDateFull = new Date(todayFull);
toDateFull.setDate(toDateFull.getDate() + 90);
const toFull = toDateFull.toISOString().split('T')[0];
const pricingResult = await adapter.scrapePricing({ from: fromFull, to: toFull });
await persistDailyPrices(pricingResult);
rowsCollected = 1 + reservationsResult.length + pricingResult.length;
result = {
performance: perfResult,
reservations: reservationsResult,
pricing: pricingResult,
};
break;
}
default:
throw new Error(`Unknown job type: ${job.jobType}`);
}
jobQueue.updateJob(job.id, {
status: 'completed',
completedAt: new Date().toISOString(),
rowsCollected,
result,
});
await persistJobStatus(job.id, 'completed', rowsCollected);
}
async function persistPerformanceSnapshot(snapshot: any): Promise<void> {
const db = getSql();
if (!db) return;
try {
await db`
INSERT INTO performance_snapshots (platform_id, captured_at, period_label, views_search, views_listing, conversion_rate, bookings_count, occupancy_rate, avg_daily_rate, revenue_total, raw_json)
VALUES (
${snapshot.platformId},
${snapshot.capturedAt},
${snapshot.periodLabel},
${snapshot.viewsSearch},
${snapshot.viewsListing},
${snapshot.conversionRate},
${snapshot.bookingsCount},
${snapshot.occupancyRate},
${snapshot.avgDailyRate},
${snapshot.revenueTotal},
${JSON.stringify(snapshot.rawJson)}
)
`;
} catch (err) {
console.error('[worker] Failed to persist performance snapshot:', err);
}
}
async function persistReservations(reservations: any[]): Promise<void> {
const db = getSql();
if (!db) return;
for (const r of reservations) {
try {
const checkIn = safeDateOnly(r.checkIn);
const checkOut = safeDateOnly(r.checkOut);
const bookedAt = safeDate(r.bookedAt);
const nights = isNaN(Number(r.nights)) ? 1 : Number(r.nights);
const guestsCount = isNaN(Number(r.guestsCount)) ? 1 : Number(r.guestsCount);
const nightlyRate = isNaN(Number(r.nightlyRate)) ? 0 : Number(r.nightlyRate);
const cleaningFee = isNaN(Number(r.cleaningFee)) ? 0 : Number(r.cleaningFee);
const platformFee = isNaN(Number(r.platformFee)) ? 0 : Number(r.platformFee);
const totalPayout = isNaN(Number(r.totalPayout)) ? 0 : Number(r.totalPayout);
await db`
INSERT INTO reservations (platform_id, platform_reservation_id, guest_name, check_in, check_out, nights, guests_count, nightly_rate, cleaning_fee, platform_fee, total_payout, status, booked_at, raw_json)
VALUES (
${r.platformId},
${r.platformReservationId},
${r.guestName},
${checkIn},
${checkOut},
${nights},
${guestsCount},
${nightlyRate},
${cleaningFee},
${platformFee},
${totalPayout},
${r.status},
${bookedAt},
${JSON.stringify(r.rawJson)}
)
ON CONFLICT (platform_id, platform_reservation_id) DO UPDATE SET
guest_name = EXCLUDED.guest_name,
check_in = EXCLUDED.check_in,
check_out = EXCLUDED.check_out,
nights = EXCLUDED.nights,
guests_count = EXCLUDED.guests_count,
nightly_rate = EXCLUDED.nightly_rate,
cleaning_fee = EXCLUDED.cleaning_fee,
platform_fee = EXCLUDED.platform_fee,
total_payout = EXCLUDED.total_payout,
status = EXCLUDED.status,
synced_at = NOW()
`;
} catch (err) {
console.error('[worker] Failed to persist reservation:', err);
}
}
}
async function persistDailyPrices(prices: any[]): Promise<void> {
const db = getSql();
if (!db) return;
for (const p of prices) {
try {
await db`
INSERT INTO daily_prices (platform_id, date, price, is_available, min_stay_nights, synced_at)
VALUES (
${p.platformId},
${p.date},
${p.price},
${p.isAvailable},
${p.minStayNights},
${p.syncedAt}
)
ON CONFLICT (platform_id, date) DO UPDATE SET
price = EXCLUDED.price,
is_available = EXCLUDED.is_available,
min_stay_nights = EXCLUDED.min_stay_nights,
synced_at = NOW()
`;
} catch (err) {
console.error('[worker] Failed to persist daily price:', err);
}
}
}
async function persistJobStatus(jobId: string, status: string, rowsCollected: number): Promise<void> {
const db = getSql();
if (!db) return;
try {
await db`
UPDATE scrape_jobs SET status = ${status}, completed_at = NOW(), rows_collected = ${rowsCollected}
WHERE id = ${jobId}::uuid
`;
} catch (err) {
// Job might not be in DB (e.g., in-memory only mode)
console.warn('[worker] Could not update job in DB:', err);
}
}
let polling = false;
let pollInterval: ReturnType<typeof setInterval> | null = null;
async function poll(): Promise<void> {
if (polling) return;
polling = true;
try {
const job = jobQueue.dequeue();
if (!job) return;
console.log(`[worker] Processing job ${job.id} (${job.jobType} for ${job.platformId})`);
try {
await processJob(job);
console.log(`[worker] Job ${job.id} completed`);
} catch (err: any) {
console.error(`[worker] Job ${job.id} failed:`, err.message);
jobQueue.updateJob(job.id, {
status: 'failed',
completedAt: new Date().toISOString(),
errorMessage: err.message,
});
const db = getSql();
if (db) {
try {
await db`
UPDATE scrape_jobs SET status = 'failed', completed_at = NOW(), error_message = ${err.message}
WHERE id = ${job.id}::uuid
`;
} catch {
// Ignore DB errors for job status
}
}
}
} finally {
polling = false;
}
}
export function startWorker(intervalMs = 2000): void {
if (pollInterval) return;
console.log(`[worker] Starting worker (polling every ${intervalMs}ms)`);
pollInterval = setInterval(poll, intervalMs);
// Run once immediately
poll();
}
export function stopWorker(): void {
if (pollInterval) {
clearInterval(pollInterval);
pollInterval = null;
console.log('[worker] Worker stopped');
}
}

View File

@@ -0,0 +1,54 @@
import { chromium, type Browser } from 'playwright';
const DEFAULT_USER_AGENT =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36';
export interface BrowserOptions {
headless?: boolean;
userAgent?: string;
viewportWidth?: number;
viewportHeight?: number;
}
export async function createBrowser(options: BrowserOptions = {}): Promise<Browser> {
const {
headless = true,
userAgent = DEFAULT_USER_AGENT,
viewportWidth = 1920,
viewportHeight = 1080,
} = options;
const browser = await chromium.launch({
headless,
args: [
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
`--window-size=${viewportWidth},${viewportHeight}`,
],
});
const context = await browser.newContext({
userAgent,
viewport: { width: viewportWidth, height: viewportHeight },
locale: 'en-US',
timezoneId: 'America/New_York',
permissions: [],
javaScriptEnabled: true,
});
// Remove the webdriver flag to avoid detection
await context.addInitScript(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
});
// Close the default context page - callers use context.newPage()
const pages = context.pages();
if (pages.length > 0) {
await pages[0].close();
}
return browser;
}

View File

@@ -0,0 +1,8 @@
/**
* Returns a promise that resolves after a random delay between min and max milliseconds.
* Useful for mimicking human-like timing in browser automation.
*/
export function randomDelay(min: number, max: number): Promise<void> {
const ms = Math.floor(Math.random() * (max - min + 1)) + min;
return new Promise((resolve) => setTimeout(resolve, ms));
}

View File

@@ -0,0 +1,62 @@
import { createCipheriv, createDecipheriv, randomBytes } from 'node:crypto';
const ALGORITHM = 'aes-256-gcm';
const IV_LENGTH = 12;
const TAG_LENGTH = 16;
const ENCODING = 'base64' as const;
function getEncryptionKey(): Buffer {
const key = process.env.ENCRYPTION_KEY;
if (!key) {
throw new Error('ENCRYPTION_KEY environment variable is not set');
}
const keyBuffer = Buffer.from(key, ENCODING);
if (keyBuffer.length !== 32) {
throw new Error('ENCRYPTION_KEY must be exactly 32 bytes (base64-encoded)');
}
return keyBuffer;
}
export function encrypt(plaintext: string): string {
const key = getEncryptionKey();
const iv = randomBytes(IV_LENGTH);
const cipher = createCipheriv(ALGORITHM, key, iv);
let encrypted = cipher.update(plaintext, 'utf8');
encrypted = Buffer.concat([encrypted, cipher.final()]);
const tag = cipher.getAuthTag();
// Format: iv:tag:ciphertext (all base64)
return [
iv.toString(ENCODING),
tag.toString(ENCODING),
encrypted.toString(ENCODING),
].join(':');
}
export function decrypt(encryptedStr: string): string {
const key = getEncryptionKey();
const parts = encryptedStr.split(':');
if (parts.length !== 3) {
throw new Error('Invalid encrypted string format');
}
const iv = Buffer.from(parts[0], ENCODING);
const tag = Buffer.from(parts[1], ENCODING);
const encrypted = Buffer.from(parts[2], ENCODING);
if (iv.length !== IV_LENGTH) {
throw new Error('Invalid IV length');
}
if (tag.length !== TAG_LENGTH) {
throw new Error('Invalid auth tag length');
}
const decipher = createDecipheriv(ALGORITHM, key, iv);
decipher.setAuthTag(tag);
let decrypted = decipher.update(encrypted);
decrypted = Buffer.concat([decrypted, decipher.final()]);
return decrypted.toString('utf8');
}

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.base.json",
"compilerOptions": {
"outDir": "./dist",
"rootDir": "./src"
},
"include": ["src"]
}