"""Fidelity brokerage CSV parser."""
from pathlib import Path
from typing import List, Dict, Any
import pandas as pd
from datetime import datetime
import re

from app.parsers.base_parser import BaseParser, ParseResult


class FidelityParser(BaseParser):
    """
    Parser for Fidelity brokerage account history CSV files.

    Expected CSV columns:
    - Run Date
    - Action
    - Symbol
    - Description
    - Type
    - Exchange Quantity
    - Exchange Currency
    - Currency
    - Price
    - Quantity
    - Exchange Rate
    - Commission
    - Fees
    - Accrued Interest
    - Amount
    - Cash Balance
    - Settlement Date
    """

    # Expected column names in Fidelity CSV
    EXPECTED_COLUMNS = [
        "Run Date",
        "Action",
        "Symbol",
        "Description",
        "Type",
        "Exchange Quantity",
        "Exchange Currency",
        "Currency",
        "Price",
        "Quantity",
        "Exchange Rate",
        "Commission",
        "Fees",
        "Accrued Interest",
        "Amount",
        "Cash Balance",
        "Settlement Date",
    ]

    def parse(self, file_path: Path) -> ParseResult:
        """
        Parse a Fidelity CSV file into standardized transaction dictionaries.

        Args:
            file_path: Path to the Fidelity CSV file

        Returns:
            ParseResult containing parsed transactions, errors, and row count

        Raises:
            FileNotFoundError: If the file does not exist
            ValueError: If the CSV format is invalid
        """
        errors = []
        transactions = []

        try:
            # Read CSV, skipping empty rows at the beginning
            df = self._read_csv(file_path, skiprows=self._find_header_row(file_path))

            # Validate columns
            missing_cols = set(self.EXPECTED_COLUMNS) - set(df.columns)
            if missing_cols:
                raise ValueError(f"Missing required columns: {missing_cols}")

            # Parse each row
            for idx, row in df.iterrows():
                try:
                    transaction = self._parse_row(row)
                    if transaction:
                        transactions.append(transaction)
                except Exception as e:
                    errors.append(f"Row {idx + 1}: {str(e)}")

            return ParseResult(
                transactions=transactions, errors=errors, row_count=len(df)
            )

        except FileNotFoundError as e:
            raise e
        except Exception as e:
            raise ValueError(f"Failed to parse Fidelity CSV: {str(e)}")

    def _find_header_row(self, file_path: Path) -> int:
        """
        Find the row number where the header starts in Fidelity CSV.

        Fidelity CSVs may have empty rows or metadata at the beginning.

        Args:
            file_path: Path to CSV file

        Returns:
            Row number (0-indexed) where the header is located
        """
        with open(file_path, "r", encoding="utf-8-sig") as f:
            for i, line in enumerate(f):
                if "Run Date" in line:
                    return i
        return 0  # Default to first row if not found

    def _extract_real_ticker(self, symbol: str, description: str, action: str) -> str:
        """
        Extract the real underlying ticker from option descriptions.
        
        Fidelity uses internal reference numbers (like 6736999MM) in the Symbol column
        for options, but the real ticker is in the Description/Action in parentheses.
        
        Examples:
        - Description: "CALL (OPEN) OPENDOOR JAN 16 26  (100 SHS)"
        - Action: "YOU SOLD CLOSING TRANSACTION CALL (OPEN) OPENDOOR..."
        
        Args:
            symbol: Symbol from CSV (might be Fidelity internal reference)
            description: Description field
            action: Action field
            
        Returns:
            Real ticker symbol, or original symbol if not found
        """
        # If symbol looks normal (letters only, not Fidelity's numeric codes), return it
        if symbol and re.match(r'^[A-Z]{1,5}$', symbol):
            return symbol
        
        # Try to extract from description first (more reliable)
        # Pattern: (TICKER) or CALL (TICKER) or PUT (TICKER)
        if description:
            # Look for pattern like "CALL (OPEN)" or "PUT (AAPL)"
            match = re.search(r'(?:CALL|PUT)\s*\(([A-Z]+)\)', description, re.IGNORECASE)
            if match:
                return match.group(1)
            
            # Look for standalone (TICKER) pattern
            match = re.search(r'\(([A-Z]{1,5})\)', description)
            if match:
                ticker = match.group(1)
                # Make sure it's not something like (100 or (Margin)
                if not ticker.isdigit() and ticker not in ['MARGIN', 'CASH', 'SHS']:
                    return ticker
        
        # Fall back to action field
        if action:
            match = re.search(r'(?:CALL|PUT)\s*\(([A-Z]+)\)', action, re.IGNORECASE)
            if match:
                return match.group(1)
        
        # Return original symbol if we couldn't extract anything better
        return symbol if symbol else None

    def _parse_row(self, row: pd.Series) -> Dict[str, Any]:
        """
        Parse a single row from Fidelity CSV into a transaction dictionary.

        Args:
            row: Pandas Series representing one CSV row

        Returns:
            Dictionary with transaction data, or None if row should be skipped

        Raises:
            ValueError: If required fields are missing or invalid
        """
        # Parse dates
        run_date = self._parse_date(row["Run Date"])
        settlement_date = self._parse_date(row["Settlement Date"])

        # Extract raw values
        raw_symbol = self._safe_string(row["Symbol"])
        description = self._safe_string(row["Description"])
        action = str(row["Action"]).strip() if pd.notna(row["Action"]) else ""
        
        # Extract the real ticker (especially important for options)
        actual_symbol = self._extract_real_ticker(raw_symbol, description, action)

        # Extract and clean values
        transaction = {
            "run_date": run_date,
            "action": action,
            "symbol": actual_symbol,
            "description": description,
            "transaction_type": self._safe_string(row["Type"]),
            "exchange_quantity": self._safe_decimal(row["Exchange Quantity"]),
            "exchange_currency": self._safe_string(row["Exchange Currency"]),
            "currency": self._safe_string(row["Currency"]),
            "price": self._safe_decimal(row["Price"]),
            "quantity": self._safe_decimal(row["Quantity"]),
            "exchange_rate": self._safe_decimal(row["Exchange Rate"]),
            "commission": self._safe_decimal(row["Commission"]),
            "fees": self._safe_decimal(row["Fees"]),
            "accrued_interest": self._safe_decimal(row["Accrued Interest"]),
            "amount": self._safe_decimal(row["Amount"]),
            "cash_balance": self._safe_decimal(row["Cash Balance"]),
            "settlement_date": settlement_date,
        }

        return transaction

    def _parse_date(self, date_value: Any) -> Any:
        """
        Parse date value from CSV, handling various formats.

        Args:
            date_value: Date value from CSV (string or datetime)

        Returns:
            datetime.date object or None if empty/invalid
        """
        if pd.isna(date_value) or date_value == "":
            return None

        # If already a datetime object
        if isinstance(date_value, datetime):
            return date_value.date()

        # Try parsing common date formats
        date_str = str(date_value).strip()
        if not date_str:
            return None

        # Try common formats
        for fmt in ["%m/%d/%Y", "%Y-%m-%d", "%m-%d-%Y"]:
            try:
                return datetime.strptime(date_str, fmt).date()
            except ValueError:
                continue

        return None

    def _safe_string(self, value: Any) -> str:
        """
        Safely convert value to string, handling NaN and empty values.

        Args:
            value: Value to convert

        Returns:
            String value or None if empty
        """
        if pd.isna(value) or value == "":
            return None
        return str(value).strip()