#!/usr/bin/env tsx /** * CD Rate Fetcher Script * * Scrapes the top CD rates from Bankrate.com and stores them in the * shared.cd_rates table in PostgreSQL. Designed to run standalone via cron. * * Bankrate renders rate data dynamically via JavaScript, so this script * uses Puppeteer (headless Chrome) to fully render the page before scraping. * * Usage: * cd scripts * npm install * npx tsx fetch-cd-rates.ts * * Environment: * DATABASE_URL - PostgreSQL connection string (reads from ../.env) */ import * as dotenv from 'dotenv'; import { resolve } from 'path'; import { Pool } from 'pg'; import puppeteer, { type Browser } from 'puppeteer'; // Load .env from project root dotenv.config({ path: resolve(__dirname, '..', '.env') }); const BANKRATE_URL = 'https://www.bankrate.com/banking/cds/cd-rates/'; const MAX_RATES = 25; interface CdRate { bank_name: string; apy: number; min_deposit: number | null; term: string; term_months: number | null; } /** * Parse a term string like "3 months", "1 year", "18 months" into a month count. */ function parseTermMonths(term: string): number | null { const lower = term.toLowerCase().trim(); const monthMatch = lower.match(/(\d+)\s*month/); if (monthMatch) return parseInt(monthMatch[1], 10); const yearMatch = lower.match(/(\d+)\s*year/); if (yearMatch) return parseInt(yearMatch[1], 10) * 12; // Handle fractional years like "1.5 years" const fracYearMatch = lower.match(/([\d.]+)\s*year/); if (fracYearMatch) return Math.round(parseFloat(fracYearMatch[1]) * 12); return null; } /** * Parse a currency string like "$500", "$1,000", "$0", "No minimum" into a number or null. */ function parseMinDeposit(raw: string): number | null { if (!raw) return null; const cleaned = raw.replace(/[^0-9.]/g, ''); if (!cleaned) return null; const val = parseFloat(cleaned); return isNaN(val) ? null : val; } /** * Parse an APY string like "4.50%", "4.50% APY" into a number. */ function parseApy(raw: string): number { const cleaned = raw.replace(/[^0-9.]/g, ''); return parseFloat(cleaned) || 0; } /** * Launch headless Chrome, navigate to Bankrate, and scrape CD rate data. */ async function fetchRates(): Promise { let browser: Browser | null = null; try { console.log('Launching headless browser...'); browser = await puppeteer.launch({ headless: true, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', ], }); const page = await browser.newPage(); await page.setUserAgent( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', ); console.log(`Navigating to ${BANKRATE_URL}...`); await page.goto(BANKRATE_URL, { waitUntil: 'networkidle2', timeout: 60000, }); // Wait for rate content to render // Bankrate uses various table/card patterns; we'll try multiple selectors console.log('Waiting for rate data to render...'); await page.waitForSelector( 'table, [data-testid*="rate"], .brc-table, [class*="ComparisonTable"], [class*="rate-table"]', { timeout: 30000 }, ).catch(() => { console.log('Primary selectors not found, proceeding with page scan...'); }); // Extra wait for dynamic content await new Promise((resolve) => setTimeout(resolve, 3000)); // Scroll down to load all content (rate tables may be below the fold) console.log('Scrolling to load all content...'); await page.evaluate(async () => { for (let i = 0; i < 10; i++) { window.scrollBy(0, 800); await new Promise((r) => setTimeout(r, 500)); } window.scrollTo(0, 0); }); await new Promise((resolve) => setTimeout(resolve, 2000)); // Extract rate data from the page using multiple strategies const rates = await page.evaluate((maxRates: number) => { const results: Array<{ bank_name: string; apy_raw: string; min_deposit_raw: string; term_raw: string; }> = []; // Strategy 1: Look for detailed bank comparison tables with named banks // These typically have 4+ columns: Bank, APY, Min Deposit, Term const tables = document.querySelectorAll('table'); for (const table of tables) { const rows = table.querySelectorAll('tbody tr'); if (rows.length < 3) continue; // Skip small tables for (const row of rows) { const cells = row.querySelectorAll('td, th'); if (cells.length < 3) continue; const texts = Array.from(cells).map((c) => c.textContent?.trim() || ''); const apyCell = texts.find((t) => /\d+\.\d+\s*%/.test(t)); if (!apyCell) continue; // Bank name: look for a cell with a real name (not just number/percent/dollar) const bankCell = texts.find( (t) => t.length > 3 && !/^\d/.test(t) && !t.includes('%') && !t.startsWith('$') && !/^\d+\s*(month|year)/i.test(t), ); // Also try to find the bank name from links or images in the row const linkEl = row.querySelector('a[href*="review"], a[href*="bank"], img[alt]'); const linkName = linkEl?.textContent?.trim() || (linkEl as HTMLImageElement)?.alt || ''; const name = linkName.length > 3 ? linkName : bankCell || ''; if (!name) continue; results.push({ bank_name: name, apy_raw: apyCell, min_deposit_raw: texts.find((t) => t.includes('$') || /no min/i.test(t)) || '', term_raw: texts.find((t) => /\d+\s*(month|year)/i.test(t)) || '', }); if (results.length >= maxRates) break; } if (results.length >= 5) break; // Found a good table } // Strategy 2: Look for card/list layouts with bank names and rates if (results.length < 5) { const cardSelectors = [ '[class*="product"]', '[class*="offer-card"]', '[class*="rate-card"]', '[class*="ComparisonRow"]', '[class*="comparison-row"]', '[data-testid*="product"]', '[class*="partner"]', ]; for (const selector of cardSelectors) { const cards = document.querySelectorAll(selector); if (cards.length < 3) continue; for (const card of cards) { const text = card.textContent || ''; if (text.length < 20 || text.length > 2000) continue; const apyMatch = text.match(/([\d.]+)\s*%/); if (!apyMatch) continue; // Try to find bank name from heading, link, or image alt text const nameEl = card.querySelector( 'h2, h3, h4, h5, strong, [class*="name"], [class*="bank"], [class*="title"], a[href*="review"], img[alt]', ); let bankName = nameEl?.textContent?.trim() || (nameEl as HTMLImageElement)?.alt || ''; // Skip if the "name" is just a rate or term if (!bankName || bankName.length < 3 || /^\d/.test(bankName) || bankName.includes('%')) continue; const depositMatch = text.match(/\$[\d,]+/); const termMatch = text.match(/\d+\s*(?:month|year)s?/i); results.push({ bank_name: bankName, apy_raw: apyMatch[0], min_deposit_raw: depositMatch?.[0] || '', term_raw: termMatch?.[0] || '', }); if (results.length >= maxRates) break; } if (results.length >= 5) break; } } // Strategy 3: Broad scan for rate-bearing elements if (results.length < 5) { const allElements = document.querySelectorAll( 'div, section, article, li', ); for (const el of allElements) { if (el.children.length > 20) continue; const text = el.textContent || ''; if (text.length < 20 || text.length > 500) continue; const apyMatch = text.match(/([\d.]+)\s*%\s*(?:APY)?/i); if (!apyMatch) continue; const bankEl = el.querySelector( 'h2, h3, h4, h5, strong, b, a[href*="review"]', ); let bankName = bankEl?.textContent?.trim() || ''; if (!bankName || bankName.length < 3 || /^\d/.test(bankName)) continue; const depositMatch = text.match(/\$[\d,]+/); const termMatch = text.match(/\d+\s*(?:month|year)s?/i); results.push({ bank_name: bankName, apy_raw: apyMatch[0], min_deposit_raw: depositMatch?.[0] || '', term_raw: termMatch?.[0] || '', }); if (results.length >= maxRates) break; } } return results; }, MAX_RATES); console.log(`Raw extraction found ${rates.length} rate entries.`); // Parse and normalize the scraped data const parsed: CdRate[] = rates .map((r) => { let bankName = r.bank_name.replace(/\s+/g, ' ').trim(); const term = r.term_raw || 'N/A'; // If the bank name looks like a term or deposit info, it's a // summary card — label it more descriptively using the term const termText = r.term_raw || bankName; if ( /^\d+\s*(month|year)/i.test(bankName) || /no\s*min/i.test(bankName) || /^\$/.test(bankName) || bankName.length < 4 ) { bankName = `Top CD Rate - ${termText.replace(/^\d+/, (m: string) => m + ' ')}`.replace(/\s+/g, ' ').trim(); } return { bank_name: bankName, apy: parseApy(r.apy_raw), min_deposit: parseMinDeposit(r.min_deposit_raw), term, term_months: parseTermMonths(r.term_raw || bankName), }; }) .filter((r) => r.bank_name && r.apy > 0); // Deduplicate by bank name + term (keep highest APY) const seen = new Map(); for (const rate of parsed) { const key = `${rate.bank_name}|${rate.term}`; const existing = seen.get(key); if (!existing || rate.apy > existing.apy) { seen.set(key, rate); } } return Array.from(seen.values()) .sort((a, b) => b.apy - a.apy) .slice(0, MAX_RATES); } finally { if (browser) { await browser.close(); } } } /** * Store scraped rates into shared.cd_rates, replacing all previous data. */ async function storeRates(rates: CdRate[]): Promise { const connectionString = process.env.DATABASE_URL || 'postgresql://hoafinance:change_me@localhost:5432/hoafinance'; const pool = new Pool({ connectionString }); const client = await pool.connect(); try { await client.query('BEGIN'); // Clear previous batch (we only keep the latest fetch) await client.query('DELETE FROM shared.cd_rates'); const now = new Date().toISOString(); for (const rate of rates) { await client.query( `INSERT INTO shared.cd_rates (bank_name, apy, min_deposit, term, term_months, fetched_at, source_url) VALUES ($1, $2, $3, $4, $5, $6, $7)`, [ rate.bank_name, rate.apy, rate.min_deposit, rate.term, rate.term_months, now, BANKRATE_URL, ], ); } await client.query('COMMIT'); console.log(`Successfully stored ${rates.length} CD rates at ${now}`); } catch (err) { await client.query('ROLLBACK'); throw err; } finally { client.release(); await pool.end(); } } /** * Main entry point. */ async function main() { console.log('=== CD Rate Fetcher ==='); console.log(`Fetching top CD rates from Bankrate.com...`); console.log(`Time: ${new Date().toISOString()}`); console.log(''); try { const rates = await fetchRates(); if (rates.length === 0) { console.warn(''); console.warn('WARNING: No CD rates were extracted from Bankrate.'); console.warn( 'This likely means Bankrate changed their page structure.', ); console.warn( 'Review the page DOM and update selectors in fetch-cd-rates.ts.', ); process.exit(1); } console.log(`\nExtracted ${rates.length} rates:`); console.log('─'.repeat(70)); for (const r of rates) { console.log( ` ${r.bank_name.padEnd(30)} ${String(r.apy + '%').padEnd(8)} ${r.term.padEnd(15)} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`, ); } console.log('─'.repeat(70)); console.log('\nStoring to database...'); await storeRates(rates); console.log('Done.'); } catch (err) { console.error('\nFATAL ERROR:', err); process.exit(1); } } main();