"""Base parser interface for brokerage CSV files.""" from abc import ABC, abstractmethod from typing import List, Dict, Any, NamedTuple from pathlib import Path import pandas as pd class ParseResult(NamedTuple): """ Result of parsing a brokerage CSV file. Attributes: transactions: List of parsed transaction dictionaries errors: List of error messages encountered during parsing row_count: Total number of rows processed """ transactions: List[Dict[str, Any]] errors: List[str] row_count: int class BaseParser(ABC): """ Abstract base class for brokerage CSV parsers. Provides a standard interface for parsing CSV files from different brokerages. Subclasses must implement the parse() method for their specific format. """ @abstractmethod def parse(self, file_path: Path) -> ParseResult: """ Parse a brokerage CSV file into standardized transaction dictionaries. Args: file_path: Path to the CSV file to parse Returns: ParseResult containing transactions, errors, and row count Raises: FileNotFoundError: If the file does not exist ValueError: If the file format is invalid """ pass def _read_csv(self, file_path: Path, **kwargs) -> pd.DataFrame: """ Read CSV file into a pandas DataFrame with error handling. Args: file_path: Path to CSV file **kwargs: Additional arguments passed to pd.read_csv() Returns: DataFrame containing CSV data Raises: FileNotFoundError: If file does not exist pd.errors.EmptyDataError: If file is empty """ if not file_path.exists(): raise FileNotFoundError(f"CSV file not found: {file_path}") return pd.read_csv(file_path, **kwargs) @staticmethod def _safe_decimal(value: Any) -> Any: """ Safely convert value to decimal-compatible format, handling NaN and None. Args: value: Value to convert Returns: Converted value or None if invalid """ if pd.isna(value): return None if value == "": return None return value @staticmethod def _safe_date(value: Any) -> Any: """ Safely convert value to date, handling NaN and None. Args: value: Value to convert Returns: Converted date or None if invalid """ if pd.isna(value): return None if value == "": return None return value