Phase 6: Expand market rates and enhance AI investment recommendations
- Rate fetcher now scrapes CD, Money Market, and High Yield Savings rates from Bankrate.com with pauses between fetches to avoid rate limiting - Historical rate data is preserved (no longer deleted on each fetch) - Database migration adds rate_type column and tenant ai_recommendations table - Backend returns market rates grouped by type with latest-batch-only queries - AI prompt now includes all three rate types for comprehensive analysis - AI recommendations are saved per-tenant for retrieval on page load - Frontend: "Market CD Rates" replaced with "Today's Market Rates" tabbed view - Rates section is collapsible (expanded by default) to save screen space - Saved recommendations load automatically with "Last Updated" timestamp Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
#!/usr/bin/env tsx
|
||||
/**
|
||||
* CD Rate Fetcher Script
|
||||
* Market Rate Fetcher Script
|
||||
*
|
||||
* Scrapes the top CD rates from Bankrate.com and stores them in the
|
||||
* shared.cd_rates table in PostgreSQL. Designed to run standalone via cron.
|
||||
* Scrapes the top CD, Money Market, and High Yield Savings rates from
|
||||
* Bankrate.com and stores them in the shared.cd_rates table in PostgreSQL.
|
||||
* Designed to run standalone via cron (once per day).
|
||||
*
|
||||
* Bankrate renders rate data dynamically via JavaScript, so this script
|
||||
* uses Puppeteer (headless Chrome) to fully render the page before scraping.
|
||||
* Historical data is preserved — each fetch adds new rows with the current
|
||||
* timestamp. The application queries only the latest batch per rate type.
|
||||
*
|
||||
* Usage:
|
||||
* cd scripts
|
||||
@@ -20,20 +21,39 @@
|
||||
import * as dotenv from 'dotenv';
|
||||
import { resolve } from 'path';
|
||||
import { Pool } from 'pg';
|
||||
import puppeteer, { type Browser } from 'puppeteer';
|
||||
import puppeteer, { type Browser, type Page } from 'puppeteer';
|
||||
|
||||
// Load .env from project root
|
||||
dotenv.config({ path: resolve(__dirname, '..', '.env') });
|
||||
|
||||
const BANKRATE_URL = 'https://www.bankrate.com/banking/cds/cd-rates/';
|
||||
const MAX_RATES = 25;
|
||||
|
||||
interface CdRate {
|
||||
// Rate source configurations
|
||||
const RATE_SOURCES = [
|
||||
{
|
||||
type: 'cd',
|
||||
label: 'CD Rates',
|
||||
url: 'https://www.bankrate.com/banking/cds/cd-rates/',
|
||||
},
|
||||
{
|
||||
type: 'high_yield_savings',
|
||||
label: 'High Yield Savings',
|
||||
url: 'https://www.bankrate.com/banking/savings/best-high-yield-interests-savings-accounts/',
|
||||
},
|
||||
{
|
||||
type: 'money_market',
|
||||
label: 'Money Market',
|
||||
url: 'https://www.bankrate.com/banking/money-market/rates/',
|
||||
},
|
||||
];
|
||||
|
||||
interface MarketRate {
|
||||
bank_name: string;
|
||||
apy: number;
|
||||
min_deposit: number | null;
|
||||
term: string;
|
||||
term_months: number | null;
|
||||
rate_type: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -71,35 +91,36 @@ function parseApy(raw: string): number {
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch headless Chrome, navigate to Bankrate, and scrape CD rate data.
|
||||
* Pause execution for a given number of milliseconds.
|
||||
*/
|
||||
async function fetchRates(): Promise<CdRate[]> {
|
||||
let browser: Browser | null = null;
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Navigate to a Bankrate URL and scrape rate data.
|
||||
* Reuses an existing browser instance.
|
||||
*/
|
||||
async function fetchRatesFromPage(
|
||||
browser: Browser,
|
||||
sourceUrl: string,
|
||||
rateType: string,
|
||||
label: string,
|
||||
): Promise<MarketRate[]> {
|
||||
const page: Page = await browser.newPage();
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
);
|
||||
|
||||
try {
|
||||
console.log('Launching headless browser...');
|
||||
browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
],
|
||||
});
|
||||
|
||||
const page = await browser.newPage();
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
);
|
||||
|
||||
console.log(`Navigating to ${BANKRATE_URL}...`);
|
||||
await page.goto(BANKRATE_URL, {
|
||||
console.log(`\n--- Fetching ${label} ---`);
|
||||
console.log(`Navigating to ${sourceUrl}...`);
|
||||
await page.goto(sourceUrl, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
// Wait for rate content to render
|
||||
// Bankrate uses various table/card patterns; we'll try multiple selectors
|
||||
console.log('Waiting for rate data to render...');
|
||||
await page.waitForSelector(
|
||||
'table, [data-testid*="rate"], .brc-table, [class*="ComparisonTable"], [class*="rate-table"]',
|
||||
@@ -109,9 +130,9 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
});
|
||||
|
||||
// Extra wait for dynamic content
|
||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||
await sleep(3000);
|
||||
|
||||
// Scroll down to load all content (rate tables may be below the fold)
|
||||
// Scroll down to load all content
|
||||
console.log('Scrolling to load all content...');
|
||||
await page.evaluate(async () => {
|
||||
for (let i = 0; i < 10; i++) {
|
||||
@@ -120,10 +141,10 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
}
|
||||
window.scrollTo(0, 0);
|
||||
});
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000));
|
||||
await sleep(2000);
|
||||
|
||||
// Extract rate data from the page using multiple strategies
|
||||
const rates = await page.evaluate((maxRates: number) => {
|
||||
// Extract rate data from the page
|
||||
const rawRates = await page.evaluate((maxRates: number) => {
|
||||
const results: Array<{
|
||||
bank_name: string;
|
||||
apy_raw: string;
|
||||
@@ -131,12 +152,11 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
term_raw: string;
|
||||
}> = [];
|
||||
|
||||
// Strategy 1: Look for detailed bank comparison tables with named banks
|
||||
// These typically have 4+ columns: Bank, APY, Min Deposit, Term
|
||||
// Strategy 1: Look for detailed bank comparison tables
|
||||
const tables = document.querySelectorAll('table');
|
||||
for (const table of tables) {
|
||||
const rows = table.querySelectorAll('tbody tr');
|
||||
if (rows.length < 3) continue; // Skip small tables
|
||||
if (rows.length < 3) continue;
|
||||
|
||||
for (const row of rows) {
|
||||
const cells = row.querySelectorAll('td, th');
|
||||
@@ -146,7 +166,6 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
const apyCell = texts.find((t) => /\d+\.\d+\s*%/.test(t));
|
||||
if (!apyCell) continue;
|
||||
|
||||
// Bank name: look for a cell with a real name (not just number/percent/dollar)
|
||||
const bankCell = texts.find(
|
||||
(t) =>
|
||||
t.length > 3 &&
|
||||
@@ -156,7 +175,6 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
!/^\d+\s*(month|year)/i.test(t),
|
||||
);
|
||||
|
||||
// Also try to find the bank name from links or images in the row
|
||||
const linkEl = row.querySelector('a[href*="review"], a[href*="bank"], img[alt]');
|
||||
const linkName = linkEl?.textContent?.trim() || (linkEl as HTMLImageElement)?.alt || '';
|
||||
|
||||
@@ -173,10 +191,10 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
|
||||
if (results.length >= maxRates) break;
|
||||
}
|
||||
if (results.length >= 5) break; // Found a good table
|
||||
if (results.length >= 5) break;
|
||||
}
|
||||
|
||||
// Strategy 2: Look for card/list layouts with bank names and rates
|
||||
// Strategy 2: Look for card/list layouts
|
||||
if (results.length < 5) {
|
||||
const cardSelectors = [
|
||||
'[class*="product"]',
|
||||
@@ -199,14 +217,12 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
const apyMatch = text.match(/([\d.]+)\s*%/);
|
||||
if (!apyMatch) continue;
|
||||
|
||||
// Try to find bank name from heading, link, or image alt text
|
||||
const nameEl =
|
||||
card.querySelector(
|
||||
'h2, h3, h4, h5, strong, [class*="name"], [class*="bank"], [class*="title"], a[href*="review"], img[alt]',
|
||||
);
|
||||
let bankName = nameEl?.textContent?.trim() || (nameEl as HTMLImageElement)?.alt || '';
|
||||
|
||||
// Skip if the "name" is just a rate or term
|
||||
if (!bankName || bankName.length < 3 || /^\d/.test(bankName) || bankName.includes('%')) continue;
|
||||
|
||||
const depositMatch = text.match(/\$[\d,]+/);
|
||||
@@ -261,24 +277,27 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
return results;
|
||||
}, MAX_RATES);
|
||||
|
||||
console.log(`Raw extraction found ${rates.length} rate entries.`);
|
||||
console.log(`Raw extraction found ${rawRates.length} rate entries.`);
|
||||
|
||||
// Parse and normalize the scraped data
|
||||
const parsed: CdRate[] = rates
|
||||
// Parse and normalize
|
||||
const isTermProduct = rateType === 'cd';
|
||||
|
||||
const parsed: MarketRate[] = rawRates
|
||||
.map((r) => {
|
||||
let bankName = r.bank_name.replace(/\s+/g, ' ').trim();
|
||||
const term = r.term_raw || 'N/A';
|
||||
const term = isTermProduct ? (r.term_raw || 'N/A') : 'N/A';
|
||||
|
||||
// If the bank name looks like a term or deposit info, it's a
|
||||
// summary card — label it more descriptively using the term
|
||||
const termText = r.term_raw || bankName;
|
||||
if (
|
||||
/^\d+\s*(month|year)/i.test(bankName) ||
|
||||
/no\s*min/i.test(bankName) ||
|
||||
/^\$/.test(bankName) ||
|
||||
bankName.length < 4
|
||||
) {
|
||||
bankName = `Top CD Rate - ${termText.replace(/^\d+/, (m: string) => m + ' ')}`.replace(/\s+/g, ' ').trim();
|
||||
// For CDs: if bank name looks like a term, label it descriptively
|
||||
if (isTermProduct) {
|
||||
const termText = r.term_raw || bankName;
|
||||
if (
|
||||
/^\d+\s*(month|year)/i.test(bankName) ||
|
||||
/no\s*min/i.test(bankName) ||
|
||||
/^\$/.test(bankName) ||
|
||||
bankName.length < 4
|
||||
) {
|
||||
bankName = `Top CD Rate - ${termText.replace(/^\d+/, (m: string) => m + ' ')}`.replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
@@ -286,13 +305,14 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
apy: parseApy(r.apy_raw),
|
||||
min_deposit: parseMinDeposit(r.min_deposit_raw),
|
||||
term,
|
||||
term_months: parseTermMonths(r.term_raw || bankName),
|
||||
term_months: isTermProduct ? parseTermMonths(r.term_raw || bankName) : null,
|
||||
rate_type: rateType,
|
||||
};
|
||||
})
|
||||
.filter((r) => r.bank_name && r.apy > 0);
|
||||
|
||||
// Deduplicate by bank name + term (keep highest APY)
|
||||
const seen = new Map<string, CdRate>();
|
||||
const seen = new Map<string, MarketRate>();
|
||||
for (const rate of parsed) {
|
||||
const key = `${rate.bank_name}|${rate.term}`;
|
||||
const existing = seen.get(key);
|
||||
@@ -305,16 +325,16 @@ async function fetchRates(): Promise<CdRate[]> {
|
||||
.sort((a, b) => b.apy - a.apy)
|
||||
.slice(0, MAX_RATES);
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
await page.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store scraped rates into shared.cd_rates, replacing all previous data.
|
||||
* Store scraped rates into shared.cd_rates.
|
||||
* Historical data is preserved — we no longer delete previous rows.
|
||||
* Each fetch batch shares a common fetched_at timestamp per rate_type.
|
||||
*/
|
||||
async function storeRates(rates: CdRate[]): Promise<void> {
|
||||
async function storeRates(rates: MarketRate[], sourceUrl: string): Promise<void> {
|
||||
const connectionString =
|
||||
process.env.DATABASE_URL ||
|
||||
'postgresql://hoafinance:change_me@localhost:5432/hoafinance';
|
||||
@@ -325,30 +345,28 @@ async function storeRates(rates: CdRate[]): Promise<void> {
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
|
||||
// Clear previous batch (we only keep the latest fetch)
|
||||
await client.query('DELETE FROM shared.cd_rates');
|
||||
|
||||
const now = new Date().toISOString();
|
||||
|
||||
for (const rate of rates) {
|
||||
await client.query(
|
||||
`INSERT INTO shared.cd_rates
|
||||
(bank_name, apy, min_deposit, term, term_months, fetched_at, source_url)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7)`,
|
||||
(bank_name, apy, min_deposit, term, term_months, rate_type, fetched_at, source_url)
|
||||
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
|
||||
[
|
||||
rate.bank_name,
|
||||
rate.apy,
|
||||
rate.min_deposit,
|
||||
rate.term,
|
||||
rate.term_months,
|
||||
rate.rate_type,
|
||||
now,
|
||||
BANKRATE_URL,
|
||||
sourceUrl,
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
await client.query('COMMIT');
|
||||
console.log(`Successfully stored ${rates.length} CD rates at ${now}`);
|
||||
console.log(` Stored ${rates.length} ${rates[0]?.rate_type || ''} rates at ${now}`);
|
||||
} catch (err) {
|
||||
await client.query('ROLLBACK');
|
||||
throw err;
|
||||
@@ -362,41 +380,78 @@ async function storeRates(rates: CdRate[]): Promise<void> {
|
||||
* Main entry point.
|
||||
*/
|
||||
async function main() {
|
||||
console.log('=== CD Rate Fetcher ===');
|
||||
console.log(`Fetching top CD rates from Bankrate.com...`);
|
||||
console.log('=== Market Rate Fetcher ===');
|
||||
console.log(`Fetching rates from Bankrate.com...`);
|
||||
console.log(`Time: ${new Date().toISOString()}`);
|
||||
console.log('');
|
||||
console.log(`Rate types: ${RATE_SOURCES.map((s) => s.label).join(', ')}`);
|
||||
|
||||
let browser: Browser | null = null;
|
||||
|
||||
try {
|
||||
const rates = await fetchRates();
|
||||
console.log('\nLaunching headless browser...');
|
||||
browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
],
|
||||
});
|
||||
|
||||
if (rates.length === 0) {
|
||||
console.warn('');
|
||||
console.warn('WARNING: No CD rates were extracted from Bankrate.');
|
||||
console.warn(
|
||||
'This likely means Bankrate changed their page structure.',
|
||||
);
|
||||
console.warn(
|
||||
'Review the page DOM and update selectors in fetch-cd-rates.ts.',
|
||||
);
|
||||
let totalStored = 0;
|
||||
|
||||
for (let i = 0; i < RATE_SOURCES.length; i++) {
|
||||
const source = RATE_SOURCES[i];
|
||||
|
||||
// Pause between fetches to avoid rate limiting (skip for first)
|
||||
if (i > 0) {
|
||||
const pauseSeconds = 8 + Math.floor(Math.random() * 5); // 8-12 seconds
|
||||
console.log(`\nPausing ${pauseSeconds} seconds before next fetch...`);
|
||||
await sleep(pauseSeconds * 1000);
|
||||
}
|
||||
|
||||
try {
|
||||
const rates = await fetchRatesFromPage(browser, source.url, source.type, source.label);
|
||||
|
||||
if (rates.length === 0) {
|
||||
console.warn(`\nWARNING: No ${source.label} rates were extracted.`);
|
||||
console.warn('This may mean Bankrate changed their page structure.');
|
||||
continue; // Don't abort the whole run — try other rate types
|
||||
}
|
||||
|
||||
console.log(`\nExtracted ${rates.length} ${source.label}:`);
|
||||
console.log('\u2500'.repeat(80));
|
||||
for (const r of rates) {
|
||||
const termStr = r.term !== 'N/A' ? r.term.padEnd(15) : ''.padEnd(15);
|
||||
console.log(
|
||||
` ${r.bank_name.padEnd(35)} ${String(r.apy + '%').padEnd(8)} ${termStr} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`,
|
||||
);
|
||||
}
|
||||
console.log('\u2500'.repeat(80));
|
||||
|
||||
console.log(`\nStoring ${source.label} to database...`);
|
||||
await storeRates(rates, source.url);
|
||||
totalStored += rates.length;
|
||||
} catch (err: any) {
|
||||
console.error(`\nERROR fetching ${source.label}: ${err.message}`);
|
||||
// Continue to next rate type
|
||||
}
|
||||
}
|
||||
|
||||
if (totalStored === 0) {
|
||||
console.warn('\nWARNING: No rates were stored for any type.');
|
||||
console.warn('Review Bankrate page structure and update selectors.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\nExtracted ${rates.length} rates:`);
|
||||
console.log('─'.repeat(70));
|
||||
for (const r of rates) {
|
||||
console.log(
|
||||
` ${r.bank_name.padEnd(30)} ${String(r.apy + '%').padEnd(8)} ${r.term.padEnd(15)} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`,
|
||||
);
|
||||
}
|
||||
console.log('─'.repeat(70));
|
||||
|
||||
console.log('\nStoring to database...');
|
||||
await storeRates(rates);
|
||||
console.log('Done.');
|
||||
console.log(`\nDone. Total rates stored: ${totalStored}`);
|
||||
} catch (err) {
|
||||
console.error('\nFATAL ERROR:', err);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
if (browser) {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user