Files
HOA_Financial_Platform/scripts/fetch-cd-rates.ts
olsch01 2fed5d6ce1 Phase 6: Expand market rates and enhance AI investment recommendations
- Rate fetcher now scrapes CD, Money Market, and High Yield Savings rates
  from Bankrate.com with pauses between fetches to avoid rate limiting
- Historical rate data is preserved (no longer deleted on each fetch)
- Database migration adds rate_type column and tenant ai_recommendations table
- Backend returns market rates grouped by type with latest-batch-only queries
- AI prompt now includes all three rate types for comprehensive analysis
- AI recommendations are saved per-tenant for retrieval on page load
- Frontend: "Market CD Rates" replaced with "Today's Market Rates" tabbed view
- Rates section is collapsible (expanded by default) to save screen space
- Saved recommendations load automatically with "Last Updated" timestamp

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 13:39:53 -05:00

459 lines
14 KiB
TypeScript

#!/usr/bin/env tsx
/**
* Market Rate Fetcher Script
*
* Scrapes the top CD, Money Market, and High Yield Savings rates from
* Bankrate.com and stores them in the shared.cd_rates table in PostgreSQL.
* Designed to run standalone via cron (once per day).
*
* Historical data is preserved — each fetch adds new rows with the current
* timestamp. The application queries only the latest batch per rate type.
*
* Usage:
* cd scripts
* npm install
* npx tsx fetch-cd-rates.ts
*
* Environment:
* DATABASE_URL - PostgreSQL connection string (reads from ../.env)
*/
import * as dotenv from 'dotenv';
import { resolve } from 'path';
import { Pool } from 'pg';
import puppeteer, { type Browser, type Page } from 'puppeteer';
// Load .env from project root
dotenv.config({ path: resolve(__dirname, '..', '.env') });
const MAX_RATES = 25;
// Rate source configurations
const RATE_SOURCES = [
{
type: 'cd',
label: 'CD Rates',
url: 'https://www.bankrate.com/banking/cds/cd-rates/',
},
{
type: 'high_yield_savings',
label: 'High Yield Savings',
url: 'https://www.bankrate.com/banking/savings/best-high-yield-interests-savings-accounts/',
},
{
type: 'money_market',
label: 'Money Market',
url: 'https://www.bankrate.com/banking/money-market/rates/',
},
];
interface MarketRate {
bank_name: string;
apy: number;
min_deposit: number | null;
term: string;
term_months: number | null;
rate_type: string;
}
/**
* Parse a term string like "3 months", "1 year", "18 months" into a month count.
*/
function parseTermMonths(term: string): number | null {
const lower = term.toLowerCase().trim();
const monthMatch = lower.match(/(\d+)\s*month/);
if (monthMatch) return parseInt(monthMatch[1], 10);
const yearMatch = lower.match(/(\d+)\s*year/);
if (yearMatch) return parseInt(yearMatch[1], 10) * 12;
// Handle fractional years like "1.5 years"
const fracYearMatch = lower.match(/([\d.]+)\s*year/);
if (fracYearMatch) return Math.round(parseFloat(fracYearMatch[1]) * 12);
return null;
}
/**
* Parse a currency string like "$500", "$1,000", "$0", "No minimum" into a number or null.
*/
function parseMinDeposit(raw: string): number | null {
if (!raw) return null;
const cleaned = raw.replace(/[^0-9.]/g, '');
if (!cleaned) return null;
const val = parseFloat(cleaned);
return isNaN(val) ? null : val;
}
/**
* Parse an APY string like "4.50%", "4.50% APY" into a number.
*/
function parseApy(raw: string): number {
const cleaned = raw.replace(/[^0-9.]/g, '');
return parseFloat(cleaned) || 0;
}
/**
* Pause execution for a given number of milliseconds.
*/
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Navigate to a Bankrate URL and scrape rate data.
* Reuses an existing browser instance.
*/
async function fetchRatesFromPage(
browser: Browser,
sourceUrl: string,
rateType: string,
label: string,
): Promise<MarketRate[]> {
const page: Page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
);
try {
console.log(`\n--- Fetching ${label} ---`);
console.log(`Navigating to ${sourceUrl}...`);
await page.goto(sourceUrl, {
waitUntil: 'networkidle2',
timeout: 60000,
});
// Wait for rate content to render
console.log('Waiting for rate data to render...');
await page.waitForSelector(
'table, [data-testid*="rate"], .brc-table, [class*="ComparisonTable"], [class*="rate-table"]',
{ timeout: 30000 },
).catch(() => {
console.log('Primary selectors not found, proceeding with page scan...');
});
// Extra wait for dynamic content
await sleep(3000);
// Scroll down to load all content
console.log('Scrolling to load all content...');
await page.evaluate(async () => {
for (let i = 0; i < 10; i++) {
window.scrollBy(0, 800);
await new Promise((r) => setTimeout(r, 500));
}
window.scrollTo(0, 0);
});
await sleep(2000);
// Extract rate data from the page
const rawRates = await page.evaluate((maxRates: number) => {
const results: Array<{
bank_name: string;
apy_raw: string;
min_deposit_raw: string;
term_raw: string;
}> = [];
// Strategy 1: Look for detailed bank comparison tables
const tables = document.querySelectorAll('table');
for (const table of tables) {
const rows = table.querySelectorAll('tbody tr');
if (rows.length < 3) continue;
for (const row of rows) {
const cells = row.querySelectorAll('td, th');
if (cells.length < 3) continue;
const texts = Array.from(cells).map((c) => c.textContent?.trim() || '');
const apyCell = texts.find((t) => /\d+\.\d+\s*%/.test(t));
if (!apyCell) continue;
const bankCell = texts.find(
(t) =>
t.length > 3 &&
!/^\d/.test(t) &&
!t.includes('%') &&
!t.startsWith('$') &&
!/^\d+\s*(month|year)/i.test(t),
);
const linkEl = row.querySelector('a[href*="review"], a[href*="bank"], img[alt]');
const linkName = linkEl?.textContent?.trim() || (linkEl as HTMLImageElement)?.alt || '';
const name = linkName.length > 3 ? linkName : bankCell || '';
if (!name) continue;
results.push({
bank_name: name,
apy_raw: apyCell,
min_deposit_raw:
texts.find((t) => t.includes('$') || /no min/i.test(t)) || '',
term_raw: texts.find((t) => /\d+\s*(month|year)/i.test(t)) || '',
});
if (results.length >= maxRates) break;
}
if (results.length >= 5) break;
}
// Strategy 2: Look for card/list layouts
if (results.length < 5) {
const cardSelectors = [
'[class*="product"]',
'[class*="offer-card"]',
'[class*="rate-card"]',
'[class*="ComparisonRow"]',
'[class*="comparison-row"]',
'[data-testid*="product"]',
'[class*="partner"]',
];
for (const selector of cardSelectors) {
const cards = document.querySelectorAll(selector);
if (cards.length < 3) continue;
for (const card of cards) {
const text = card.textContent || '';
if (text.length < 20 || text.length > 2000) continue;
const apyMatch = text.match(/([\d.]+)\s*%/);
if (!apyMatch) continue;
const nameEl =
card.querySelector(
'h2, h3, h4, h5, strong, [class*="name"], [class*="bank"], [class*="title"], a[href*="review"], img[alt]',
);
let bankName = nameEl?.textContent?.trim() || (nameEl as HTMLImageElement)?.alt || '';
if (!bankName || bankName.length < 3 || /^\d/.test(bankName) || bankName.includes('%')) continue;
const depositMatch = text.match(/\$[\d,]+/);
const termMatch = text.match(/\d+\s*(?:month|year)s?/i);
results.push({
bank_name: bankName,
apy_raw: apyMatch[0],
min_deposit_raw: depositMatch?.[0] || '',
term_raw: termMatch?.[0] || '',
});
if (results.length >= maxRates) break;
}
if (results.length >= 5) break;
}
}
// Strategy 3: Broad scan for rate-bearing elements
if (results.length < 5) {
const allElements = document.querySelectorAll(
'div, section, article, li',
);
for (const el of allElements) {
if (el.children.length > 20) continue;
const text = el.textContent || '';
if (text.length < 20 || text.length > 500) continue;
const apyMatch = text.match(/([\d.]+)\s*%\s*(?:APY)?/i);
if (!apyMatch) continue;
const bankEl = el.querySelector(
'h2, h3, h4, h5, strong, b, a[href*="review"]',
);
let bankName = bankEl?.textContent?.trim() || '';
if (!bankName || bankName.length < 3 || /^\d/.test(bankName)) continue;
const depositMatch = text.match(/\$[\d,]+/);
const termMatch = text.match(/\d+\s*(?:month|year)s?/i);
results.push({
bank_name: bankName,
apy_raw: apyMatch[0],
min_deposit_raw: depositMatch?.[0] || '',
term_raw: termMatch?.[0] || '',
});
if (results.length >= maxRates) break;
}
}
return results;
}, MAX_RATES);
console.log(`Raw extraction found ${rawRates.length} rate entries.`);
// Parse and normalize
const isTermProduct = rateType === 'cd';
const parsed: MarketRate[] = rawRates
.map((r) => {
let bankName = r.bank_name.replace(/\s+/g, ' ').trim();
const term = isTermProduct ? (r.term_raw || 'N/A') : 'N/A';
// For CDs: if bank name looks like a term, label it descriptively
if (isTermProduct) {
const termText = r.term_raw || bankName;
if (
/^\d+\s*(month|year)/i.test(bankName) ||
/no\s*min/i.test(bankName) ||
/^\$/.test(bankName) ||
bankName.length < 4
) {
bankName = `Top CD Rate - ${termText.replace(/^\d+/, (m: string) => m + ' ')}`.replace(/\s+/g, ' ').trim();
}
}
return {
bank_name: bankName,
apy: parseApy(r.apy_raw),
min_deposit: parseMinDeposit(r.min_deposit_raw),
term,
term_months: isTermProduct ? parseTermMonths(r.term_raw || bankName) : null,
rate_type: rateType,
};
})
.filter((r) => r.bank_name && r.apy > 0);
// Deduplicate by bank name + term (keep highest APY)
const seen = new Map<string, MarketRate>();
for (const rate of parsed) {
const key = `${rate.bank_name}|${rate.term}`;
const existing = seen.get(key);
if (!existing || rate.apy > existing.apy) {
seen.set(key, rate);
}
}
return Array.from(seen.values())
.sort((a, b) => b.apy - a.apy)
.slice(0, MAX_RATES);
} finally {
await page.close();
}
}
/**
* Store scraped rates into shared.cd_rates.
* Historical data is preserved — we no longer delete previous rows.
* Each fetch batch shares a common fetched_at timestamp per rate_type.
*/
async function storeRates(rates: MarketRate[], sourceUrl: string): Promise<void> {
const connectionString =
process.env.DATABASE_URL ||
'postgresql://hoafinance:change_me@localhost:5432/hoafinance';
const pool = new Pool({ connectionString });
const client = await pool.connect();
try {
await client.query('BEGIN');
const now = new Date().toISOString();
for (const rate of rates) {
await client.query(
`INSERT INTO shared.cd_rates
(bank_name, apy, min_deposit, term, term_months, rate_type, fetched_at, source_url)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
[
rate.bank_name,
rate.apy,
rate.min_deposit,
rate.term,
rate.term_months,
rate.rate_type,
now,
sourceUrl,
],
);
}
await client.query('COMMIT');
console.log(` Stored ${rates.length} ${rates[0]?.rate_type || ''} rates at ${now}`);
} catch (err) {
await client.query('ROLLBACK');
throw err;
} finally {
client.release();
await pool.end();
}
}
/**
* Main entry point.
*/
async function main() {
console.log('=== Market Rate Fetcher ===');
console.log(`Fetching rates from Bankrate.com...`);
console.log(`Time: ${new Date().toISOString()}`);
console.log(`Rate types: ${RATE_SOURCES.map((s) => s.label).join(', ')}`);
let browser: Browser | null = null;
try {
console.log('\nLaunching headless browser...');
browser = await puppeteer.launch({
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
],
});
let totalStored = 0;
for (let i = 0; i < RATE_SOURCES.length; i++) {
const source = RATE_SOURCES[i];
// Pause between fetches to avoid rate limiting (skip for first)
if (i > 0) {
const pauseSeconds = 8 + Math.floor(Math.random() * 5); // 8-12 seconds
console.log(`\nPausing ${pauseSeconds} seconds before next fetch...`);
await sleep(pauseSeconds * 1000);
}
try {
const rates = await fetchRatesFromPage(browser, source.url, source.type, source.label);
if (rates.length === 0) {
console.warn(`\nWARNING: No ${source.label} rates were extracted.`);
console.warn('This may mean Bankrate changed their page structure.');
continue; // Don't abort the whole run — try other rate types
}
console.log(`\nExtracted ${rates.length} ${source.label}:`);
console.log('\u2500'.repeat(80));
for (const r of rates) {
const termStr = r.term !== 'N/A' ? r.term.padEnd(15) : ''.padEnd(15);
console.log(
` ${r.bank_name.padEnd(35)} ${String(r.apy + '%').padEnd(8)} ${termStr} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`,
);
}
console.log('\u2500'.repeat(80));
console.log(`\nStoring ${source.label} to database...`);
await storeRates(rates, source.url);
totalStored += rates.length;
} catch (err: any) {
console.error(`\nERROR fetching ${source.label}: ${err.message}`);
// Continue to next rate type
}
}
if (totalStored === 0) {
console.warn('\nWARNING: No rates were stored for any type.');
console.warn('Review Bankrate page structure and update selectors.');
process.exit(1);
}
console.log(`\nDone. Total rates stored: ${totalStored}`);
} catch (err) {
console.error('\nFATAL ERROR:', err);
process.exit(1);
} finally {
if (browser) {
await browser.close();
}
}
}
main();