"""Fidelity brokerage CSV parser.""" from pathlib import Path from typing import List, Dict, Any import pandas as pd from datetime import datetime import re from app.parsers.base_parser import BaseParser, ParseResult class FidelityParser(BaseParser): """ Parser for Fidelity brokerage account history CSV files. Expected CSV columns: - Run Date - Action - Symbol - Description - Type - Exchange Quantity - Exchange Currency - Currency - Price - Quantity - Exchange Rate - Commission - Fees - Accrued Interest - Amount - Cash Balance - Settlement Date """ # Expected column names in Fidelity CSV EXPECTED_COLUMNS = [ "Run Date", "Action", "Symbol", "Description", "Type", "Exchange Quantity", "Exchange Currency", "Currency", "Price", "Quantity", "Exchange Rate", "Commission", "Fees", "Accrued Interest", "Amount", "Cash Balance", "Settlement Date", ] def parse(self, file_path: Path) -> ParseResult: """ Parse a Fidelity CSV file into standardized transaction dictionaries. Args: file_path: Path to the Fidelity CSV file Returns: ParseResult containing parsed transactions, errors, and row count Raises: FileNotFoundError: If the file does not exist ValueError: If the CSV format is invalid """ errors = [] transactions = [] try: # Read CSV, skipping empty rows at the beginning df = self._read_csv(file_path, skiprows=self._find_header_row(file_path)) # Validate columns missing_cols = set(self.EXPECTED_COLUMNS) - set(df.columns) if missing_cols: raise ValueError(f"Missing required columns: {missing_cols}") # Parse each row for idx, row in df.iterrows(): try: transaction = self._parse_row(row) if transaction: transactions.append(transaction) except Exception as e: errors.append(f"Row {idx + 1}: {str(e)}") return ParseResult( transactions=transactions, errors=errors, row_count=len(df) ) except FileNotFoundError as e: raise e except Exception as e: raise ValueError(f"Failed to parse Fidelity CSV: {str(e)}") def _find_header_row(self, file_path: Path) -> int: """ Find the row number where the header starts in Fidelity CSV. Fidelity CSVs may have empty rows or metadata at the beginning. Args: file_path: Path to CSV file Returns: Row number (0-indexed) where the header is located """ with open(file_path, "r", encoding="utf-8-sig") as f: for i, line in enumerate(f): if "Run Date" in line: return i return 0 # Default to first row if not found def _extract_real_ticker(self, symbol: str, description: str, action: str) -> str: """ Extract the real underlying ticker from option descriptions. Fidelity uses internal reference numbers (like 6736999MM) in the Symbol column for options, but the real ticker is in the Description/Action in parentheses. Examples: - Description: "CALL (OPEN) OPENDOOR JAN 16 26 (100 SHS)" - Action: "YOU SOLD CLOSING TRANSACTION CALL (OPEN) OPENDOOR..." Args: symbol: Symbol from CSV (might be Fidelity internal reference) description: Description field action: Action field Returns: Real ticker symbol, or original symbol if not found """ # If symbol looks normal (letters only, not Fidelity's numeric codes), return it if symbol and re.match(r'^[A-Z]{1,5}$', symbol): return symbol # Try to extract from description first (more reliable) # Pattern: (TICKER) or CALL (TICKER) or PUT (TICKER) if description: # Look for pattern like "CALL (OPEN)" or "PUT (AAPL)" match = re.search(r'(?:CALL|PUT)\s*\(([A-Z]+)\)', description, re.IGNORECASE) if match: return match.group(1) # Look for standalone (TICKER) pattern match = re.search(r'\(([A-Z]{1,5})\)', description) if match: ticker = match.group(1) # Make sure it's not something like (100 or (Margin) if not ticker.isdigit() and ticker not in ['MARGIN', 'CASH', 'SHS']: return ticker # Fall back to action field if action: match = re.search(r'(?:CALL|PUT)\s*\(([A-Z]+)\)', action, re.IGNORECASE) if match: return match.group(1) # Return original symbol if we couldn't extract anything better return symbol if symbol else None def _parse_row(self, row: pd.Series) -> Dict[str, Any]: """ Parse a single row from Fidelity CSV into a transaction dictionary. Args: row: Pandas Series representing one CSV row Returns: Dictionary with transaction data, or None if row should be skipped Raises: ValueError: If required fields are missing or invalid """ # Parse dates run_date = self._parse_date(row["Run Date"]) settlement_date = self._parse_date(row["Settlement Date"]) # Extract raw values raw_symbol = self._safe_string(row["Symbol"]) description = self._safe_string(row["Description"]) action = str(row["Action"]).strip() if pd.notna(row["Action"]) else "" # Extract the real ticker (especially important for options) actual_symbol = self._extract_real_ticker(raw_symbol, description, action) # Extract and clean values transaction = { "run_date": run_date, "action": action, "symbol": actual_symbol, "description": description, "transaction_type": self._safe_string(row["Type"]), "exchange_quantity": self._safe_decimal(row["Exchange Quantity"]), "exchange_currency": self._safe_string(row["Exchange Currency"]), "currency": self._safe_string(row["Currency"]), "price": self._safe_decimal(row["Price"]), "quantity": self._safe_decimal(row["Quantity"]), "exchange_rate": self._safe_decimal(row["Exchange Rate"]), "commission": self._safe_decimal(row["Commission"]), "fees": self._safe_decimal(row["Fees"]), "accrued_interest": self._safe_decimal(row["Accrued Interest"]), "amount": self._safe_decimal(row["Amount"]), "cash_balance": self._safe_decimal(row["Cash Balance"]), "settlement_date": settlement_date, } return transaction def _parse_date(self, date_value: Any) -> Any: """ Parse date value from CSV, handling various formats. Args: date_value: Date value from CSV (string or datetime) Returns: datetime.date object or None if empty/invalid """ if pd.isna(date_value) or date_value == "": return None # If already a datetime object if isinstance(date_value, datetime): return date_value.date() # Try parsing common date formats date_str = str(date_value).strip() if not date_str: return None # Try common formats for fmt in ["%m/%d/%Y", "%Y-%m-%d", "%m-%d-%Y"]: try: return datetime.strptime(date_str, fmt).date() except ValueError: continue return None def _safe_string(self, value: Any) -> str: """ Safely convert value to string, handling NaN and empty values. Args: value: Value to convert Returns: String value or None if empty """ if pd.isna(value) or value == "": return None return str(value).strip()