- Add shared.cd_rates table for cross-tenant market data (CD rates from Bankrate) - Create standalone Puppeteer scraper script (scripts/fetch-cd-rates.ts) for cron-based rate fetching - Add investment-planning backend module with 3 endpoints: snapshot, cd-rates, recommendations - AI service gathers tenant financial data (accounts, investments, budgets, projects, cash flow) and calls OpenAI-compatible API (NVIDIA endpoint) for structured investment recommendations - Create InvestmentPlanningPage with summary cards, current investments table, market CD rates table, and AI recommendation accordion - Add Investment Planning to sidebar under Planning menu - Configure AI_API_URL, AI_API_KEY, AI_MODEL environment variables Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
404 lines
13 KiB
TypeScript
404 lines
13 KiB
TypeScript
#!/usr/bin/env tsx
|
|
/**
|
|
* CD Rate Fetcher Script
|
|
*
|
|
* Scrapes the top CD rates from Bankrate.com and stores them in the
|
|
* shared.cd_rates table in PostgreSQL. Designed to run standalone via cron.
|
|
*
|
|
* Bankrate renders rate data dynamically via JavaScript, so this script
|
|
* uses Puppeteer (headless Chrome) to fully render the page before scraping.
|
|
*
|
|
* Usage:
|
|
* cd scripts
|
|
* npm install
|
|
* npx tsx fetch-cd-rates.ts
|
|
*
|
|
* Environment:
|
|
* DATABASE_URL - PostgreSQL connection string (reads from ../.env)
|
|
*/
|
|
|
|
import * as dotenv from 'dotenv';
|
|
import { resolve } from 'path';
|
|
import { Pool } from 'pg';
|
|
import puppeteer, { type Browser } from 'puppeteer';
|
|
|
|
// Load .env from project root
|
|
dotenv.config({ path: resolve(__dirname, '..', '.env') });
|
|
|
|
const BANKRATE_URL = 'https://www.bankrate.com/banking/cds/cd-rates/';
|
|
const MAX_RATES = 25;
|
|
|
|
interface CdRate {
|
|
bank_name: string;
|
|
apy: number;
|
|
min_deposit: number | null;
|
|
term: string;
|
|
term_months: number | null;
|
|
}
|
|
|
|
/**
|
|
* Parse a term string like "3 months", "1 year", "18 months" into a month count.
|
|
*/
|
|
function parseTermMonths(term: string): number | null {
|
|
const lower = term.toLowerCase().trim();
|
|
const monthMatch = lower.match(/(\d+)\s*month/);
|
|
if (monthMatch) return parseInt(monthMatch[1], 10);
|
|
const yearMatch = lower.match(/(\d+)\s*year/);
|
|
if (yearMatch) return parseInt(yearMatch[1], 10) * 12;
|
|
// Handle fractional years like "1.5 years"
|
|
const fracYearMatch = lower.match(/([\d.]+)\s*year/);
|
|
if (fracYearMatch) return Math.round(parseFloat(fracYearMatch[1]) * 12);
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Parse a currency string like "$500", "$1,000", "$0", "No minimum" into a number or null.
|
|
*/
|
|
function parseMinDeposit(raw: string): number | null {
|
|
if (!raw) return null;
|
|
const cleaned = raw.replace(/[^0-9.]/g, '');
|
|
if (!cleaned) return null;
|
|
const val = parseFloat(cleaned);
|
|
return isNaN(val) ? null : val;
|
|
}
|
|
|
|
/**
|
|
* Parse an APY string like "4.50%", "4.50% APY" into a number.
|
|
*/
|
|
function parseApy(raw: string): number {
|
|
const cleaned = raw.replace(/[^0-9.]/g, '');
|
|
return parseFloat(cleaned) || 0;
|
|
}
|
|
|
|
/**
|
|
* Launch headless Chrome, navigate to Bankrate, and scrape CD rate data.
|
|
*/
|
|
async function fetchRates(): Promise<CdRate[]> {
|
|
let browser: Browser | null = null;
|
|
|
|
try {
|
|
console.log('Launching headless browser...');
|
|
browser = await puppeteer.launch({
|
|
headless: true,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
],
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
await page.setUserAgent(
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
);
|
|
|
|
console.log(`Navigating to ${BANKRATE_URL}...`);
|
|
await page.goto(BANKRATE_URL, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000,
|
|
});
|
|
|
|
// Wait for rate content to render
|
|
// Bankrate uses various table/card patterns; we'll try multiple selectors
|
|
console.log('Waiting for rate data to render...');
|
|
await page.waitForSelector(
|
|
'table, [data-testid*="rate"], .brc-table, [class*="ComparisonTable"], [class*="rate-table"]',
|
|
{ timeout: 30000 },
|
|
).catch(() => {
|
|
console.log('Primary selectors not found, proceeding with page scan...');
|
|
});
|
|
|
|
// Extra wait for dynamic content
|
|
await new Promise((resolve) => setTimeout(resolve, 3000));
|
|
|
|
// Scroll down to load all content (rate tables may be below the fold)
|
|
console.log('Scrolling to load all content...');
|
|
await page.evaluate(async () => {
|
|
for (let i = 0; i < 10; i++) {
|
|
window.scrollBy(0, 800);
|
|
await new Promise((r) => setTimeout(r, 500));
|
|
}
|
|
window.scrollTo(0, 0);
|
|
});
|
|
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
|
|
// Extract rate data from the page using multiple strategies
|
|
const rates = await page.evaluate((maxRates: number) => {
|
|
const results: Array<{
|
|
bank_name: string;
|
|
apy_raw: string;
|
|
min_deposit_raw: string;
|
|
term_raw: string;
|
|
}> = [];
|
|
|
|
// Strategy 1: Look for detailed bank comparison tables with named banks
|
|
// These typically have 4+ columns: Bank, APY, Min Deposit, Term
|
|
const tables = document.querySelectorAll('table');
|
|
for (const table of tables) {
|
|
const rows = table.querySelectorAll('tbody tr');
|
|
if (rows.length < 3) continue; // Skip small tables
|
|
|
|
for (const row of rows) {
|
|
const cells = row.querySelectorAll('td, th');
|
|
if (cells.length < 3) continue;
|
|
|
|
const texts = Array.from(cells).map((c) => c.textContent?.trim() || '');
|
|
const apyCell = texts.find((t) => /\d+\.\d+\s*%/.test(t));
|
|
if (!apyCell) continue;
|
|
|
|
// Bank name: look for a cell with a real name (not just number/percent/dollar)
|
|
const bankCell = texts.find(
|
|
(t) =>
|
|
t.length > 3 &&
|
|
!/^\d/.test(t) &&
|
|
!t.includes('%') &&
|
|
!t.startsWith('$') &&
|
|
!/^\d+\s*(month|year)/i.test(t),
|
|
);
|
|
|
|
// Also try to find the bank name from links or images in the row
|
|
const linkEl = row.querySelector('a[href*="review"], a[href*="bank"], img[alt]');
|
|
const linkName = linkEl?.textContent?.trim() || (linkEl as HTMLImageElement)?.alt || '';
|
|
|
|
const name = linkName.length > 3 ? linkName : bankCell || '';
|
|
if (!name) continue;
|
|
|
|
results.push({
|
|
bank_name: name,
|
|
apy_raw: apyCell,
|
|
min_deposit_raw:
|
|
texts.find((t) => t.includes('$') || /no min/i.test(t)) || '',
|
|
term_raw: texts.find((t) => /\d+\s*(month|year)/i.test(t)) || '',
|
|
});
|
|
|
|
if (results.length >= maxRates) break;
|
|
}
|
|
if (results.length >= 5) break; // Found a good table
|
|
}
|
|
|
|
// Strategy 2: Look for card/list layouts with bank names and rates
|
|
if (results.length < 5) {
|
|
const cardSelectors = [
|
|
'[class*="product"]',
|
|
'[class*="offer-card"]',
|
|
'[class*="rate-card"]',
|
|
'[class*="ComparisonRow"]',
|
|
'[class*="comparison-row"]',
|
|
'[data-testid*="product"]',
|
|
'[class*="partner"]',
|
|
];
|
|
|
|
for (const selector of cardSelectors) {
|
|
const cards = document.querySelectorAll(selector);
|
|
if (cards.length < 3) continue;
|
|
|
|
for (const card of cards) {
|
|
const text = card.textContent || '';
|
|
if (text.length < 20 || text.length > 2000) continue;
|
|
|
|
const apyMatch = text.match(/([\d.]+)\s*%/);
|
|
if (!apyMatch) continue;
|
|
|
|
// Try to find bank name from heading, link, or image alt text
|
|
const nameEl =
|
|
card.querySelector(
|
|
'h2, h3, h4, h5, strong, [class*="name"], [class*="bank"], [class*="title"], a[href*="review"], img[alt]',
|
|
);
|
|
let bankName = nameEl?.textContent?.trim() || (nameEl as HTMLImageElement)?.alt || '';
|
|
|
|
// Skip if the "name" is just a rate or term
|
|
if (!bankName || bankName.length < 3 || /^\d/.test(bankName) || bankName.includes('%')) continue;
|
|
|
|
const depositMatch = text.match(/\$[\d,]+/);
|
|
const termMatch = text.match(/\d+\s*(?:month|year)s?/i);
|
|
|
|
results.push({
|
|
bank_name: bankName,
|
|
apy_raw: apyMatch[0],
|
|
min_deposit_raw: depositMatch?.[0] || '',
|
|
term_raw: termMatch?.[0] || '',
|
|
});
|
|
|
|
if (results.length >= maxRates) break;
|
|
}
|
|
if (results.length >= 5) break;
|
|
}
|
|
}
|
|
|
|
// Strategy 3: Broad scan for rate-bearing elements
|
|
if (results.length < 5) {
|
|
const allElements = document.querySelectorAll(
|
|
'div, section, article, li',
|
|
);
|
|
for (const el of allElements) {
|
|
if (el.children.length > 20) continue;
|
|
const text = el.textContent || '';
|
|
if (text.length < 20 || text.length > 500) continue;
|
|
|
|
const apyMatch = text.match(/([\d.]+)\s*%\s*(?:APY)?/i);
|
|
if (!apyMatch) continue;
|
|
|
|
const bankEl = el.querySelector(
|
|
'h2, h3, h4, h5, strong, b, a[href*="review"]',
|
|
);
|
|
let bankName = bankEl?.textContent?.trim() || '';
|
|
if (!bankName || bankName.length < 3 || /^\d/.test(bankName)) continue;
|
|
|
|
const depositMatch = text.match(/\$[\d,]+/);
|
|
const termMatch = text.match(/\d+\s*(?:month|year)s?/i);
|
|
|
|
results.push({
|
|
bank_name: bankName,
|
|
apy_raw: apyMatch[0],
|
|
min_deposit_raw: depositMatch?.[0] || '',
|
|
term_raw: termMatch?.[0] || '',
|
|
});
|
|
|
|
if (results.length >= maxRates) break;
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}, MAX_RATES);
|
|
|
|
console.log(`Raw extraction found ${rates.length} rate entries.`);
|
|
|
|
// Parse and normalize the scraped data
|
|
const parsed: CdRate[] = rates
|
|
.map((r) => {
|
|
let bankName = r.bank_name.replace(/\s+/g, ' ').trim();
|
|
const term = r.term_raw || 'N/A';
|
|
|
|
// If the bank name looks like a term or deposit info, it's a
|
|
// summary card — label it more descriptively using the term
|
|
const termText = r.term_raw || bankName;
|
|
if (
|
|
/^\d+\s*(month|year)/i.test(bankName) ||
|
|
/no\s*min/i.test(bankName) ||
|
|
/^\$/.test(bankName) ||
|
|
bankName.length < 4
|
|
) {
|
|
bankName = `Top CD Rate - ${termText.replace(/^\d+/, (m: string) => m + ' ')}`.replace(/\s+/g, ' ').trim();
|
|
}
|
|
|
|
return {
|
|
bank_name: bankName,
|
|
apy: parseApy(r.apy_raw),
|
|
min_deposit: parseMinDeposit(r.min_deposit_raw),
|
|
term,
|
|
term_months: parseTermMonths(r.term_raw || bankName),
|
|
};
|
|
})
|
|
.filter((r) => r.bank_name && r.apy > 0);
|
|
|
|
// Deduplicate by bank name + term (keep highest APY)
|
|
const seen = new Map<string, CdRate>();
|
|
for (const rate of parsed) {
|
|
const key = `${rate.bank_name}|${rate.term}`;
|
|
const existing = seen.get(key);
|
|
if (!existing || rate.apy > existing.apy) {
|
|
seen.set(key, rate);
|
|
}
|
|
}
|
|
|
|
return Array.from(seen.values())
|
|
.sort((a, b) => b.apy - a.apy)
|
|
.slice(0, MAX_RATES);
|
|
} finally {
|
|
if (browser) {
|
|
await browser.close();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Store scraped rates into shared.cd_rates, replacing all previous data.
|
|
*/
|
|
async function storeRates(rates: CdRate[]): Promise<void> {
|
|
const connectionString =
|
|
process.env.DATABASE_URL ||
|
|
'postgresql://hoafinance:change_me@localhost:5432/hoafinance';
|
|
|
|
const pool = new Pool({ connectionString });
|
|
const client = await pool.connect();
|
|
|
|
try {
|
|
await client.query('BEGIN');
|
|
|
|
// Clear previous batch (we only keep the latest fetch)
|
|
await client.query('DELETE FROM shared.cd_rates');
|
|
|
|
const now = new Date().toISOString();
|
|
|
|
for (const rate of rates) {
|
|
await client.query(
|
|
`INSERT INTO shared.cd_rates
|
|
(bank_name, apy, min_deposit, term, term_months, fetched_at, source_url)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7)`,
|
|
[
|
|
rate.bank_name,
|
|
rate.apy,
|
|
rate.min_deposit,
|
|
rate.term,
|
|
rate.term_months,
|
|
now,
|
|
BANKRATE_URL,
|
|
],
|
|
);
|
|
}
|
|
|
|
await client.query('COMMIT');
|
|
console.log(`Successfully stored ${rates.length} CD rates at ${now}`);
|
|
} catch (err) {
|
|
await client.query('ROLLBACK');
|
|
throw err;
|
|
} finally {
|
|
client.release();
|
|
await pool.end();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Main entry point.
|
|
*/
|
|
async function main() {
|
|
console.log('=== CD Rate Fetcher ===');
|
|
console.log(`Fetching top CD rates from Bankrate.com...`);
|
|
console.log(`Time: ${new Date().toISOString()}`);
|
|
console.log('');
|
|
|
|
try {
|
|
const rates = await fetchRates();
|
|
|
|
if (rates.length === 0) {
|
|
console.warn('');
|
|
console.warn('WARNING: No CD rates were extracted from Bankrate.');
|
|
console.warn(
|
|
'This likely means Bankrate changed their page structure.',
|
|
);
|
|
console.warn(
|
|
'Review the page DOM and update selectors in fetch-cd-rates.ts.',
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`\nExtracted ${rates.length} rates:`);
|
|
console.log('─'.repeat(70));
|
|
for (const r of rates) {
|
|
console.log(
|
|
` ${r.bank_name.padEnd(30)} ${String(r.apy + '%').padEnd(8)} ${r.term.padEnd(15)} ${r.min_deposit != null ? '$' + r.min_deposit.toLocaleString() : 'N/A'}`,
|
|
);
|
|
}
|
|
console.log('─'.repeat(70));
|
|
|
|
console.log('\nStoring to database...');
|
|
await storeRates(rates);
|
|
console.log('Done.');
|
|
} catch (err) {
|
|
console.error('\nFATAL ERROR:', err);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main();
|