""" Market data service with rate limiting, caching, and batch processing. This service handles fetching market prices from Yahoo Finance with: - Database-backed caching to survive restarts - Rate limiting with exponential backoff - Batch processing to reduce API calls - Stale-while-revalidate pattern for better UX """ import time import yfinance as yf from sqlalchemy.orm import Session from sqlalchemy import and_ from typing import Dict, List, Optional from decimal import Decimal from datetime import datetime, timedelta import logging from app.models.market_price import MarketPrice logger = logging.getLogger(__name__) class MarketDataService: """Service for fetching and caching market prices with rate limiting.""" def __init__(self, db: Session, cache_ttl_seconds: int = 300): """ Initialize market data service. Args: db: Database session cache_ttl_seconds: How long cached prices are considered fresh (default: 5 minutes) """ self.db = db self.cache_ttl = cache_ttl_seconds self._rate_limit_delay = 0.5 # Start with 500ms between requests self._last_request_time = 0.0 self._consecutive_errors = 0 self._max_retries = 3 @staticmethod def _is_valid_stock_symbol(symbol: str) -> bool: """ Check if a symbol is a valid stock ticker (not an option symbol or CUSIP). Args: symbol: Symbol to check Returns: True if it looks like a valid stock ticker """ if not symbol or len(symbol) > 5: return False # Stock symbols should start with a letter, not a number # Numbers indicate CUSIP codes or option symbols if symbol[0].isdigit(): return False # Should be mostly uppercase letters # Allow $ for preferred shares (e.g., BRK.B becomes BRK-B) return symbol.replace('-', '').replace('.', '').isalpha() def get_price(self, symbol: str, allow_stale: bool = True) -> Optional[Decimal]: """ Get current price for a symbol with caching. Args: symbol: Stock ticker symbol allow_stale: If True, return stale cache data instead of None Returns: Price or None if unavailable """ # Skip invalid symbols (option symbols, CUSIPs, etc.) if not self._is_valid_stock_symbol(symbol): logger.debug(f"Skipping invalid symbol: {symbol} (not a stock ticker)") return None # Check database cache first cached = self._get_cached_price(symbol) if cached: price, age_seconds = cached if age_seconds < self.cache_ttl: # Fresh cache hit logger.debug(f"Cache HIT (fresh): {symbol} = ${price} (age: {age_seconds}s)") return price elif allow_stale: # Stale cache hit, but we'll return it logger.debug(f"Cache HIT (stale): {symbol} = ${price} (age: {age_seconds}s)") return price # Cache miss or expired - fetch from Yahoo Finance logger.info(f"Cache MISS: {symbol}, fetching from Yahoo Finance...") fresh_price = self._fetch_from_yahoo(symbol) if fresh_price is not None: self._update_cache(symbol, fresh_price) return fresh_price # If fetch failed and we have stale data, return it if cached and allow_stale: price, age_seconds = cached logger.warning(f"Yahoo fetch failed, using stale cache: {symbol} = ${price} (age: {age_seconds}s)") return price return None def get_prices_batch( self, symbols: List[str], allow_stale: bool = True, max_fetches: int = 10 ) -> Dict[str, Optional[Decimal]]: """ Get prices for multiple symbols with rate limiting. Args: symbols: List of ticker symbols allow_stale: Return stale cache data if available max_fetches: Maximum number of API calls to make (remaining use cache) Returns: Dictionary mapping symbol to price (or None if unavailable) """ results = {} symbols_to_fetch = [] # First pass: Check cache for all symbols for symbol in symbols: # Skip invalid symbols if not self._is_valid_stock_symbol(symbol): logger.debug(f"Skipping invalid symbol in batch: {symbol}") results[symbol] = None continue cached = self._get_cached_price(symbol) if cached: price, age_seconds = cached if age_seconds < self.cache_ttl: # Fresh cache - use it results[symbol] = price elif allow_stale: # Stale but usable results[symbol] = price if age_seconds < self.cache_ttl * 2: # Not TOO stale symbols_to_fetch.append(symbol) else: # Stale and not allowing stale - need to fetch symbols_to_fetch.append(symbol) else: # No cache at all symbols_to_fetch.append(symbol) # Second pass: Fetch missing/stale symbols (with limit) if symbols_to_fetch: logger.info(f"Batch fetching {len(symbols_to_fetch)} symbols (max: {max_fetches})") for i, symbol in enumerate(symbols_to_fetch[:max_fetches]): if i > 0: # Rate limiting delay time.sleep(self._rate_limit_delay) price = self._fetch_from_yahoo(symbol) if price is not None: results[symbol] = price self._update_cache(symbol, price) elif symbol not in results: # No cached value and fetch failed results[symbol] = None return results def refresh_stale_prices(self, min_age_seconds: int = 300, limit: int = 20) -> int: """ Background task to refresh stale prices. Args: min_age_seconds: Only refresh prices older than this limit: Maximum number of prices to refresh Returns: Number of prices refreshed """ cutoff_time = datetime.utcnow() - timedelta(seconds=min_age_seconds) # Get stale prices ordered by oldest first stale_prices = ( self.db.query(MarketPrice) .filter(MarketPrice.fetched_at < cutoff_time) .order_by(MarketPrice.fetched_at.asc()) .limit(limit) .all() ) refreshed = 0 for cached_price in stale_prices: time.sleep(self._rate_limit_delay) fresh_price = self._fetch_from_yahoo(cached_price.symbol) if fresh_price is not None: self._update_cache(cached_price.symbol, fresh_price) refreshed += 1 logger.info(f"Refreshed {refreshed}/{len(stale_prices)} stale prices") return refreshed def _get_cached_price(self, symbol: str) -> Optional[tuple[Decimal, float]]: """ Get cached price from database. Returns: Tuple of (price, age_in_seconds) or None if not cached """ cached = ( self.db.query(MarketPrice) .filter(MarketPrice.symbol == symbol) .first() ) if cached: age = (datetime.utcnow() - cached.fetched_at).total_seconds() return (cached.price, age) return None def _update_cache(self, symbol: str, price: Decimal) -> None: """Update or insert price in database cache.""" cached = ( self.db.query(MarketPrice) .filter(MarketPrice.symbol == symbol) .first() ) if cached: cached.price = price cached.fetched_at = datetime.utcnow() else: new_price = MarketPrice( symbol=symbol, price=price, fetched_at=datetime.utcnow() ) self.db.add(new_price) self.db.commit() def _fetch_from_yahoo(self, symbol: str) -> Optional[Decimal]: """ Fetch price from Yahoo Finance with rate limiting and retries. Returns: Price or None if fetch failed """ for attempt in range(self._max_retries): try: # Rate limiting elapsed = time.time() - self._last_request_time if elapsed < self._rate_limit_delay: time.sleep(self._rate_limit_delay - elapsed) self._last_request_time = time.time() # Fetch from Yahoo ticker = yf.Ticker(symbol) info = ticker.info # Try different price fields for field in ["currentPrice", "regularMarketPrice", "previousClose"]: if field in info and info[field]: price = Decimal(str(info[field])) # Success - reset error tracking self._consecutive_errors = 0 self._rate_limit_delay = max(0.5, self._rate_limit_delay * 0.9) # Gradually decrease delay logger.debug(f"Fetched {symbol} = ${price}") return price # No price found in response logger.warning(f"No price data in Yahoo response for {symbol}") return None except Exception as e: error_str = str(e).lower() if "429" in error_str or "too many requests" in error_str: # Rate limit hit - back off exponentially self._consecutive_errors += 1 self._rate_limit_delay = min(10.0, self._rate_limit_delay * 2) # Double delay, max 10s logger.warning( f"Rate limit hit for {symbol} (attempt {attempt + 1}/{self._max_retries}), " f"backing off to {self._rate_limit_delay}s delay" ) if attempt < self._max_retries - 1: time.sleep(self._rate_limit_delay * (attempt + 1)) # Longer wait for retries continue else: # Other error logger.error(f"Error fetching {symbol}: {e}") return None logger.error(f"Failed to fetch {symbol} after {self._max_retries} attempts") return None def clear_cache(self, older_than_days: int = 30) -> int: """ Clear old cached prices. Args: older_than_days: Delete prices older than this many days Returns: Number of records deleted """ cutoff = datetime.utcnow() - timedelta(days=older_than_days) deleted = ( self.db.query(MarketPrice) .filter(MarketPrice.fetched_at < cutoff) .delete() ) self.db.commit() logger.info(f"Cleared {deleted} cached prices older than {older_than_days} days") return deleted