#!/usr/bin/env tsx /** * Market Rate Fetcher Script * * Scrapes the top CD, Money Market, and High Yield Savings rates from * Bankrate.com and stores them in the shared.cd_rates table in PostgreSQL. * Designed to run standalone via cron (once per day). * * Historical data is preserved — each fetch adds new rows with the current * timestamp. The application queries only the latest batch per rate type. * * Usage: * cd scripts * npm install * npx tsx fetch-cd-rates.ts * * Environment: * DATABASE_URL - PostgreSQL connection string (reads from ../.env) */ import * as dotenv from 'dotenv'; import { resolve } from 'path'; import { Pool } from 'pg'; import puppeteer, { type Browser, type Page } from 'puppeteer'; // Load .env from project root dotenv.config({ path: resolve(__dirname, '..', '.env') }); const MAX_RATES = 25; // Rate source configurations const RATE_SOURCES = [ { type: 'cd', label: 'CD Rates', url: 'https://www.bankrate.com/banking/cds/cd-rates/', }, { type: 'high_yield_savings', label: 'High Yield Savings', url: 'https://www.bankrate.com/banking/savings/best-high-yield-interests-savings-accounts/', }, { type: 'money_market', label: 'Money Market', url: 'https://www.bankrate.com/banking/money-market/rates/', }, ]; interface MarketRate { bank_name: string; apy: number; min_deposit: number | null; term: string; term_months: number | null; rate_type: string; } /** * Parse a term string like "3 months", "1 year", "18 months" into a month count. */ function parseTermMonths(term: string): number | null { const lower = term.toLowerCase().trim(); const monthMatch = lower.match(/(\d+)\s*mo(?:nth)?/); if (monthMatch) return parseInt(monthMatch[1], 10); // Handle fractional years like "1.5 years" or "1.5 yr" const fracYearMatch = lower.match(/([\d.]+)\s*y(?:ear|r)/); if (fracYearMatch) return Math.round(parseFloat(fracYearMatch[1]) * 12); return null; } /** * Parse a currency string like "$500", "$1,000", "$0", "No minimum" into a number or null. */ function parseMinDeposit(raw: string): number | null { if (!raw) return null; const cleaned = raw.replace(/[^0-9.]/g, ''); if (!cleaned) return null; const val = parseFloat(cleaned); return isNaN(val) ? null : val; } /** * Parse an APY string like "4.50%", "4.50% APY" into a number. * Handles edge cases like ".4.50%" (leading period from adjacent text). */ function parseApy(raw: string): number { // Extract the first valid decimal number (digit-leading) from the string const match = raw.match(/(\d+\.?\d*)/); if (!match) return 0; const val = parseFloat(match[1]); return isNaN(val) ? 0 : val; } /** * Pause execution for a given number of milliseconds. */ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)); } /** * Navigate to a Bankrate URL and scrape rate data from individual bank offer cards. * * Bankrate uses a card-based layout with two sections: * - .wrt-RateSections-sponsoredoffers (sponsored bank offers) * - .wrt-RateSections-additionaloffers (additional bank offers) * * Each card (.rounded-md) contains: * - Bank name in img[alt] (the logo) * - APY after "APY as of" text * - Min. deposit (CDs) or Min. balance for APY (savings/MM) * - Term (CDs only): e.g. "1yr", "14mo" * * The page also has a summary table (.wealth-product-rate-list) with "best rates" * per term but NO bank names — we explicitly skip this table. */ async function fetchRatesFromPage( browser: Browser, sourceUrl: string, rateType: string, label: string, ): Promise { const page: Page = await browser.newPage(); await page.setUserAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36', ); try { console.log(`\n--- Fetching ${label} ---`); console.log(`Navigating to ${sourceUrl}...`); await page.goto(sourceUrl, { waitUntil: 'networkidle2', timeout: 60000, }); // Wait for rate card sections to render console.log('Waiting for rate cards to render...'); await page.waitForSelector( '.wrt-RateSections-sponsoredoffers .rounded-md, .wrt-RateSections-additionaloffers .rounded-md', { timeout: 30000 }, ).catch(() => { console.log('Bankrate card selectors not found, will try fallback...'); }); // Extra wait for dynamic content await sleep(3000); // Scroll down to load all content console.log('Scrolling to load all content...'); await page.evaluate(async () => { for (let i = 0; i < 10; i++) { window.scrollBy(0, 800); await new Promise((r) => setTimeout(r, 500)); } window.scrollTo(0, 0); }); await sleep(2000); // Extract rate data from individual bank offer cards const rawRates = await page.evaluate((maxRates: number) => { const results: Array<{ bank_name: string; apy_raw: string; min_deposit_raw: string; term_raw: string; }> = []; // Primary strategy: extract from Bankrate offer cards // Both sponsored and additional offer sections use the same card structure const cards = [ ...document.querySelectorAll('.wrt-RateSections-sponsoredoffers > .rounded-md'), ...document.querySelectorAll('.wrt-RateSections-additionaloffers > .rounded-md'), ]; for (const card of cards) { const text = card.textContent || ''; // Bank name: from the logo img alt attribute (most reliable) const img = card.querySelector('img[alt]'); let bankName = img ? (img as HTMLImageElement).alt.trim() : ''; // Fallback: extract from text before "Add to compare" if (!bankName) { const addIdx = text.indexOf('Add to compare'); if (addIdx > 0) { bankName = text.substring(0, addIdx) .replace(/Editor's pick/gi, '') .trim(); } } // Fallback: extract from product name pattern (e.g. "NexBank CD") if (!bankName) { const productMatch = text.match(/^(?:Editor's pick)?\s*([A-Z][\w\s®*.'&-]+?(?:CD|Account|Savings|Money Market))/); if (productMatch) bankName = productMatch[1].trim(); } if (!bankName || bankName.length < 2) continue; // APY: find the percentage that appears after "APY as of" context. // Avoid picking up the Bankrate score (e.g. "4.5 Bankrate CD score"). // Use \b or (?= maxRates) break; } // Fallback strategy: if card-based extraction found nothing, // scan for any elements with bank-like names and APY percentages. // This guards against future Bankrate layout changes. if (results.length === 0) { const fallbackCards = document.querySelectorAll( '[class*="product"], [class*="offer"], [class*="rate-card"], [class*="ComparisonRow"]', ); for (const card of fallbackCards) { const text = card.textContent || ''; if (text.length < 20 || text.length > 2000) continue; const apyMatch = text.match(/(\d+\.?\d*)\s*%\s*(?:APY)?/); if (!apyMatch) continue; const nameEl = card.querySelector('img[alt], h2, h3, h4, h5, [class*="name"], [class*="bank"]'); const bankName = (nameEl as HTMLImageElement)?.alt || nameEl?.textContent?.trim() || ''; if (!bankName || bankName.length < 2 || /^\d/.test(bankName) || bankName.includes('%')) continue; const depositMatch = text.match(/\$[\d,]+/); const termMatch = text.match(/(\d+)\s*(?:month|year)s?/i); results.push({ bank_name: bankName, apy_raw: apyMatch[1] + '%', min_deposit_raw: depositMatch?.[0] || '', term_raw: termMatch?.[0] || '', }); if (results.length >= maxRates) break; } } return results; }, MAX_RATES); console.log(`Raw extraction found ${rawRates.length} rate entries.`); // Parse and normalize const isTermProduct = rateType === 'cd'; const parsed: MarketRate[] = rawRates .map((r) => { let bankName = r.bank_name .replace(/\s+/g, ' ') .replace(/Editor's pick/gi, '') .trim(); // Strip trailing product suffixes to normalize bank name // e.g. "Marcus by Goldman Sachs CD" → "Marcus by Goldman Sachs" bankName = bankName .replace(/\s+(CD|Certificate of Deposit|Money Market|Savings|High[- ]Yield Savings)\s*$/i, '') .trim(); const term = isTermProduct ? (r.term_raw || 'N/A') : 'N/A'; // Skip entries where bank_name still looks like a term or number (not a real bank) if ( /^\d+\s*(month|year)/i.test(bankName) || /^\$/.test(bankName) || bankName.length < 2 ) { return null; } return { bank_name: bankName, apy: parseApy(r.apy_raw), min_deposit: parseMinDeposit(r.min_deposit_raw), term, term_months: isTermProduct ? parseTermMonths(r.term_raw) : null, rate_type: rateType, }; }) .filter((r): r is MarketRate => r !== null && r.bank_name.length > 0 && r.apy > 0 && r.apy <= 20); // Deduplicate by bank name + term (keep highest APY) const seen = new Map(); for (const rate of parsed) { const key = `${rate.bank_name}|${rate.term}`; const existing = seen.get(key); if (!existing || rate.apy > existing.apy) { seen.set(key, rate); } } return Array.from(seen.values()) .sort((a, b) => b.apy - a.apy) .slice(0, MAX_RATES); } finally { await page.close(); } } /** * Store scraped rates into shared.cd_rates. * Historical data is preserved — we no longer delete previous rows. * Each fetch batch shares a common fetched_at timestamp per rate_type. */ async function storeRates(rates: MarketRate[], sourceUrl: string): Promise { const connectionString = process.env.DATABASE_URL || 'postgresql://hoafinance:change_me@localhost:5432/hoafinance'; const pool = new Pool({ connectionString }); const client = await pool.connect(); try { await client.query('BEGIN'); const now = new Date().toISOString(); for (const rate of rates) { await client.query( `INSERT INTO shared.cd_rates (bank_name, apy, min_deposit, term, term_months, rate_type, fetched_at, source_url) VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, [ rate.bank_name, rate.apy, rate.min_deposit, rate.term, rate.term_months, rate.rate_type, now, sourceUrl, ], ); } await client.query('COMMIT'); console.log(` Stored ${rates.length} ${rates[0]?.rate_type || ''} rates at ${now}`); } catch (err) { await client.query('ROLLBACK'); throw err; } finally { client.release(); await pool.end(); } } /** * Main entry point. */ async function main() { console.log('=== Market Rate Fetcher ==='); console.log(`Fetching rates from Bankrate.com...`); console.log(`Time: ${new Date().toISOString()}`); console.log(`Rate types: ${RATE_SOURCES.map((s) => s.label).join(', ')}`); let browser: Browser | null = null; try { // Use system Chromium if PUPPETEER_EXECUTABLE_PATH is set, // or auto-detect common locations on Linux servers. const executablePath = process.env.PUPPETEER_EXECUTABLE_PATH || ['/usr/bin/chromium-browser', '/usr/bin/chromium', '/usr/bin/google-chrome'].find( (p) => { try { require('fs').accessSync(p); return true; } catch { return false; } }, ) || undefined; console.log('\nLaunching headless browser...'); if (executablePath) console.log(`Using browser: ${executablePath}`); browser = await puppeteer.launch({ headless: true, executablePath, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', ], }); let totalStored = 0; for (let i = 0; i < RATE_SOURCES.length; i++) { const source = RATE_SOURCES[i]; // Pause between fetches to avoid rate limiting (skip for first) if (i > 0) { const pauseSeconds = 8 + Math.floor(Math.random() * 5); // 8-12 seconds console.log(`\nPausing ${pauseSeconds} seconds before next fetch...`); await sleep(pauseSeconds * 1000); } try { const rates = await fetchRatesFromPage(browser, source.url, source.type, source.label); if (rates.length === 0) { console.warn(`\nWARNING: No ${source.label} rates were extracted.`); console.warn('This may mean Bankrate changed their page structure.'); continue; // Don't abort the whole run — try other rate types } console.log(`\nExtracted ${rates.length} ${source.label}:`); console.log('\u2500'.repeat(80)); for (const r of rates) { const termStr = r.term !== 'N/A' ? r.term.padEnd(15) : ''.padEnd(15); console.log( ` ${r.bank_name.padEnd(35)} ${String(r.apy + '%').padEnd(8)} ${termStr} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`, ); } console.log('\u2500'.repeat(80)); console.log(`\nStoring ${source.label} to database...`); await storeRates(rates, source.url); totalStored += rates.length; } catch (err: any) { console.error(`\nERROR fetching ${source.label}: ${err.message}`); // Continue to next rate type } } if (totalStored === 0) { console.warn('\nWARNING: No rates were stored for any type.'); console.warn('Review Bankrate page structure and update selectors.'); process.exit(1); } console.log(`\nDone. Total rates stored: ${totalStored}`); } catch (err) { console.error('\nFATAL ERROR:', err); process.exit(1); } finally { if (browser) { await browser.close(); } } } main();