#!/usr/bin/env tsx /** * Market Rate Fetcher Script * * Scrapes the top CD, Money Market, and High Yield Savings rates from * Bankrate.com and stores them in the shared.cd_rates table in PostgreSQL. * Designed to run standalone via cron (once per day). * * Historical data is preserved — each fetch adds new rows with the current * timestamp. The application queries only the latest batch per rate type. * * Usage: * cd scripts * npm install * npx tsx fetch-cd-rates.ts * * Environment: * DATABASE_URL - PostgreSQL connection string (reads from ../.env) */ import * as dotenv from 'dotenv'; import { resolve } from 'path'; import { Pool } from 'pg'; import puppeteer, { type Browser, type Page } from 'puppeteer'; // Load .env from project root dotenv.config({ path: resolve(__dirname, '..', '.env') }); const MAX_RATES = 25; // Rate source configurations const RATE_SOURCES = [ { type: 'cd', label: 'CD Rates', url: 'https://www.bankrate.com/banking/cds/cd-rates/', }, { type: 'high_yield_savings', label: 'High Yield Savings', url: 'https://www.bankrate.com/banking/savings/best-high-yield-interests-savings-accounts/', }, { type: 'money_market', label: 'Money Market', url: 'https://www.bankrate.com/banking/money-market/rates/', }, ]; interface MarketRate { bank_name: string; apy: number; min_deposit: number | null; term: string; term_months: number | null; rate_type: string; } /** * Parse a term string like "3 months", "1 year", "18 months" into a month count. */ function parseTermMonths(term: string): number | null { const lower = term.toLowerCase().trim(); const monthMatch = lower.match(/(\d+)\s*month/); if (monthMatch) return parseInt(monthMatch[1], 10); const yearMatch = lower.match(/(\d+)\s*year/); if (yearMatch) return parseInt(yearMatch[1], 10) * 12; // Handle fractional years like "1.5 years" const fracYearMatch = lower.match(/([\d.]+)\s*year/); if (fracYearMatch) return Math.round(parseFloat(fracYearMatch[1]) * 12); return null; } /** * Parse a currency string like "$500", "$1,000", "$0", "No minimum" into a number or null. */ function parseMinDeposit(raw: string): number | null { if (!raw) return null; const cleaned = raw.replace(/[^0-9.]/g, ''); if (!cleaned) return null; const val = parseFloat(cleaned); return isNaN(val) ? null : val; } /** * Parse an APY string like "4.50%", "4.50% APY" into a number. */ function parseApy(raw: string): number { const cleaned = raw.replace(/[^0-9.]/g, ''); return parseFloat(cleaned) || 0; } /** * Pause execution for a given number of milliseconds. */ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } /** * Navigate to a Bankrate URL and scrape rate data. * Reuses an existing browser instance. */ async function fetchRatesFromPage( browser: Browser, sourceUrl: string, rateType: string, label: string, ): Promise { const page: Page = await browser.newPage(); await page.setUserAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', ); try { console.log(`\n--- Fetching ${label} ---`); console.log(`Navigating to ${sourceUrl}...`); await page.goto(sourceUrl, { waitUntil: 'networkidle2', timeout: 60000, }); // Wait for rate content to render console.log('Waiting for rate data to render...'); await page.waitForSelector( 'table, [data-testid*="rate"], .brc-table, [class*="ComparisonTable"], [class*="rate-table"]', { timeout: 30000 }, ).catch(() => { console.log('Primary selectors not found, proceeding with page scan...'); }); // Extra wait for dynamic content await sleep(3000); // Scroll down to load all content console.log('Scrolling to load all content...'); await page.evaluate(async () => { for (let i = 0; i < 10; i++) { window.scrollBy(0, 800); await new Promise((r) => setTimeout(r, 500)); } window.scrollTo(0, 0); }); await sleep(2000); // Extract rate data from the page const rawRates = await page.evaluate((maxRates: number) => { const results: Array<{ bank_name: string; apy_raw: string; min_deposit_raw: string; term_raw: string; }> = []; // Strategy 1: Look for detailed bank comparison tables const tables = document.querySelectorAll('table'); for (const table of tables) { const rows = table.querySelectorAll('tbody tr'); if (rows.length < 3) continue; for (const row of rows) { const cells = row.querySelectorAll('td, th'); if (cells.length < 3) continue; const texts = Array.from(cells).map((c) => c.textContent?.trim() || ''); const apyCell = texts.find((t) => /\d+\.\d+\s*%/.test(t)); if (!apyCell) continue; const bankCell = texts.find( (t) => t.length > 3 && !/^\d/.test(t) && !t.includes('%') && !t.startsWith('$') && !/^\d+\s*(month|year)/i.test(t), ); const linkEl = row.querySelector('a[href*="review"], a[href*="bank"], img[alt]'); const linkName = linkEl?.textContent?.trim() || (linkEl as HTMLImageElement)?.alt || ''; const name = linkName.length > 3 ? linkName : bankCell || ''; if (!name) continue; results.push({ bank_name: name, apy_raw: apyCell, min_deposit_raw: texts.find((t) => t.includes('$') || /no min/i.test(t)) || '', term_raw: texts.find((t) => /\d+\s*(month|year)/i.test(t)) || '', }); if (results.length >= maxRates) break; } if (results.length >= 5) break; } // Strategy 2: Look for card/list layouts if (results.length < 5) { const cardSelectors = [ '[class*="product"]', '[class*="offer-card"]', '[class*="rate-card"]', '[class*="ComparisonRow"]', '[class*="comparison-row"]', '[data-testid*="product"]', '[class*="partner"]', ]; for (const selector of cardSelectors) { const cards = document.querySelectorAll(selector); if (cards.length < 3) continue; for (const card of cards) { const text = card.textContent || ''; if (text.length < 20 || text.length > 2000) continue; const apyMatch = text.match(/([\d.]+)\s*%/); if (!apyMatch) continue; const nameEl = card.querySelector( 'h2, h3, h4, h5, strong, [class*="name"], [class*="bank"], [class*="title"], a[href*="review"], img[alt]', ); let bankName = nameEl?.textContent?.trim() || (nameEl as HTMLImageElement)?.alt || ''; if (!bankName || bankName.length < 3 || /^\d/.test(bankName) || bankName.includes('%')) continue; const depositMatch = text.match(/\$[\d,]+/); const termMatch = text.match(/\d+\s*(?:month|year)s?/i); results.push({ bank_name: bankName, apy_raw: apyMatch[0], min_deposit_raw: depositMatch?.[0] || '', term_raw: termMatch?.[0] || '', }); if (results.length >= maxRates) break; } if (results.length >= 5) break; } } // Strategy 3: Broad scan for rate-bearing elements if (results.length < 5) { const allElements = document.querySelectorAll( 'div, section, article, li', ); for (const el of allElements) { if (el.children.length > 20) continue; const text = el.textContent || ''; if (text.length < 20 || text.length > 500) continue; const apyMatch = text.match(/([\d.]+)\s*%\s*(?:APY)?/i); if (!apyMatch) continue; const bankEl = el.querySelector( 'h2, h3, h4, h5, strong, b, a[href*="review"]', ); let bankName = bankEl?.textContent?.trim() || ''; if (!bankName || bankName.length < 3 || /^\d/.test(bankName)) continue; const depositMatch = text.match(/\$[\d,]+/); const termMatch = text.match(/\d+\s*(?:month|year)s?/i); results.push({ bank_name: bankName, apy_raw: apyMatch[0], min_deposit_raw: depositMatch?.[0] || '', term_raw: termMatch?.[0] || '', }); if (results.length >= maxRates) break; } } return results; }, MAX_RATES); console.log(`Raw extraction found ${rawRates.length} rate entries.`); // Parse and normalize const isTermProduct = rateType === 'cd'; const parsed: MarketRate[] = rawRates .map((r) => { let bankName = r.bank_name.replace(/\s+/g, ' ').trim(); const term = isTermProduct ? (r.term_raw || 'N/A') : 'N/A'; // For CDs: if bank name looks like a term, label it descriptively if (isTermProduct) { const termText = r.term_raw || bankName; if ( /^\d+\s*(month|year)/i.test(bankName) || /no\s*min/i.test(bankName) || /^\$/.test(bankName) || bankName.length < 4 ) { bankName = `Top CD Rate - ${termText.replace(/^\d+/, (m: string) => m + ' ')}`.replace(/\s+/g, ' ').trim(); } } return { bank_name: bankName, apy: parseApy(r.apy_raw), min_deposit: parseMinDeposit(r.min_deposit_raw), term, term_months: isTermProduct ? parseTermMonths(r.term_raw || bankName) : null, rate_type: rateType, }; }) .filter((r) => r.bank_name && r.apy > 0); // Deduplicate by bank name + term (keep highest APY) const seen = new Map(); for (const rate of parsed) { const key = `${rate.bank_name}|${rate.term}`; const existing = seen.get(key); if (!existing || rate.apy > existing.apy) { seen.set(key, rate); } } return Array.from(seen.values()) .sort((a, b) => b.apy - a.apy) .slice(0, MAX_RATES); } finally { await page.close(); } } /** * Store scraped rates into shared.cd_rates. * Historical data is preserved — we no longer delete previous rows. * Each fetch batch shares a common fetched_at timestamp per rate_type. */ async function storeRates(rates: MarketRate[], sourceUrl: string): Promise { const connectionString = process.env.DATABASE_URL || 'postgresql://hoafinance:change_me@localhost:5432/hoafinance'; const pool = new Pool({ connectionString }); const client = await pool.connect(); try { await client.query('BEGIN'); const now = new Date().toISOString(); for (const rate of rates) { await client.query( `INSERT INTO shared.cd_rates (bank_name, apy, min_deposit, term, term_months, rate_type, fetched_at, source_url) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, [ rate.bank_name, rate.apy, rate.min_deposit, rate.term, rate.term_months, rate.rate_type, now, sourceUrl, ], ); } await client.query('COMMIT'); console.log(` Stored ${rates.length} ${rates[0]?.rate_type || ''} rates at ${now}`); } catch (err) { await client.query('ROLLBACK'); throw err; } finally { client.release(); await pool.end(); } } /** * Main entry point. */ async function main() { console.log('=== Market Rate Fetcher ==='); console.log(`Fetching rates from Bankrate.com...`); console.log(`Time: ${new Date().toISOString()}`); console.log(`Rate types: ${RATE_SOURCES.map((s) => s.label).join(', ')}`); let browser: Browser | null = null; try { console.log('\nLaunching headless browser...'); browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', ], }); let totalStored = 0; for (let i = 0; i < RATE_SOURCES.length; i++) { const source = RATE_SOURCES[i]; // Pause between fetches to avoid rate limiting (skip for first) if (i > 0) { const pauseSeconds = 8 + Math.floor(Math.random() * 5); // 8-12 seconds console.log(`\nPausing ${pauseSeconds} seconds before next fetch...`); await sleep(pauseSeconds * 1000); } try { const rates = await fetchRatesFromPage(browser, source.url, source.type, source.label); if (rates.length === 0) { console.warn(`\nWARNING: No ${source.label} rates were extracted.`); console.warn('This may mean Bankrate changed their page structure.'); continue; // Don't abort the whole run — try other rate types } console.log(`\nExtracted ${rates.length} ${source.label}:`); console.log('\u2500'.repeat(80)); for (const r of rates) { const termStr = r.term !== 'N/A' ? r.term.padEnd(15) : ''.padEnd(15); console.log( ` ${r.bank_name.padEnd(35)} ${String(r.apy + '%').padEnd(8)} ${termStr} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`, ); } console.log('\u2500'.repeat(80)); console.log(`\nStoring ${source.label} to database...`); await storeRates(rates, source.url); totalStored += rates.length; } catch (err: any) { console.error(`\nERROR fetching ${source.label}: ${err.message}`); // Continue to next rate type } } if (totalStored === 0) { console.warn('\nWARNING: No rates were stored for any type.'); console.warn('Review Bankrate page structure and update selectors.'); process.exit(1); } console.log(`\nDone. Total rates stored: ${totalStored}`); } catch (err) { console.error('\nFATAL ERROR:', err); process.exit(1); } finally { if (browser) { await browser.close(); } } } main();