Source code for wraquant.backtest.tearsheet

"""Enhanced tearsheet / reporting utilities for backtests.

Generates comprehensive performance summaries, monthly return tables,
drawdown analysis, rolling metrics, and trade-level analytics.
"""

from __future__ import annotations

from typing import Any

import numpy as np
import pandas as pd

__all__ = [
    "generate_tearsheet",
    "monthly_returns_table",
    "drawdown_table",
    "rolling_metrics_table",
    "trade_analysis",
]



[docs]
def generate_tearsheet(
    returns: pd.Series,
    benchmark: pd.Series | None = None,
    risk_free: float = 0.0,
    periods_per_year: int = 252,
) -> dict[str, Any]:
    """Generate a comprehensive performance tearsheet dictionary.

    Parameters
    ----------
    returns : pd.Series
        Portfolio return series (simple, not log).
    benchmark : pd.Series, optional
        Benchmark return series for relative metrics.
    risk_free : float
        Annualised risk-free rate.
    periods_per_year : int
        Trading periods per year (252 for daily).

    Returns
    -------
    dict[str, Any]
        Dictionary containing absolute and (optionally) relative
        performance metrics.
    """
    returns = returns.dropna()
    n = len(returns)

    if n == 0:
        return {
            "total_return": 0.0,
            "annualized_return": 0.0,
            "annualized_volatility": 0.0,
            "sharpe_ratio": 0.0,
            "sortino_ratio": 0.0,
            "max_drawdown": 0.0,
            "calmar_ratio": 0.0,
            "skewness": 0.0,
            "kurtosis": 0.0,
            "var_95": 0.0,
            "cvar_95": 0.0,
            "win_rate": 0.0,
            "profit_factor": 0.0,
            "n_periods": 0,
        }

    # --- Absolute metrics ---
    total_return = float((1 + returns).prod() - 1)
    ann_factor = periods_per_year / n
    ann_return = float((1 + total_return) ** ann_factor - 1)
    ann_vol = float(returns.std() * np.sqrt(periods_per_year))
    sharpe = (ann_return - risk_free) / ann_vol if ann_vol > 0 else 0.0

    # Sortino
    downside = returns[returns < 0]
    down_std = float(downside.std() * np.sqrt(periods_per_year)) if len(downside) > 0 else 0.0
    sortino = (ann_return - risk_free) / down_std if down_std > 0 else 0.0

    # Drawdown — delegate to canonical implementation
    from wraquant.risk.metrics import max_drawdown as _max_drawdown

    cum = (1 + returns).cumprod()
    max_dd = _max_drawdown(cum)
    calmar = ann_return / abs(max_dd) if max_dd != 0 else 0.0

    # Skew / Kurtosis
    skew = float(returns.skew()) if n > 2 else 0.0
    kurt = float(returns.kurtosis()) if n > 3 else 0.0

    # VaR / CVaR at 95 %
    var_95 = float(np.percentile(returns, 5))
    cvar_95 = float(returns[returns <= var_95].mean()) if (returns <= var_95).any() else var_95

    # Win rate
    win_rate = float((returns > 0).sum()) / n

    # Profit factor
    gains = float(returns[returns > 0].sum())
    losses = float(abs(returns[returns < 0].sum()))
    profit_factor = gains / losses if losses > 0 else float("inf")

    result: dict[str, Any] = {
        "total_return": total_return,
        "annualized_return": ann_return,
        "annualized_volatility": ann_vol,
        "sharpe_ratio": sharpe,
        "sortino_ratio": sortino,
        "max_drawdown": max_dd,
        "calmar_ratio": calmar,
        "skewness": skew,
        "kurtosis": kurt,
        "var_95": var_95,
        "cvar_95": cvar_95,
        "win_rate": win_rate,
        "profit_factor": profit_factor,
        "n_periods": n,
    }

    # --- Benchmark-relative metrics ---
    if benchmark is not None:
        benchmark = benchmark.dropna()
        # Align on common index
        common = returns.index.intersection(benchmark.index)
        r = returns.loc[common]
        b = benchmark.loc[common]
        excess = r - b

        # Tracking error
        te = float(excess.std() * np.sqrt(periods_per_year))

        # Information ratio
        ann_excess = float(excess.mean() * periods_per_year)
        ir = ann_excess / te if te > 0 else 0.0

        # Beta / Alpha (CAPM)
        cov_rb = float(np.cov(r, b)[0, 1])
        var_b = float(b.var())
        beta = cov_rb / var_b if var_b > 0 else 0.0
        bm_ann_return = float((1 + b).prod() ** (periods_per_year / len(b)) - 1)
        alpha = ann_return - (risk_free + beta * (bm_ann_return - risk_free))

        # Up/down capture
        up_mask = b > 0
        down_mask = b < 0
        up_capture = (
            float(r[up_mask].mean() / b[up_mask].mean()) if up_mask.any() else 0.0
        )
        down_capture = (
            float(r[down_mask].mean() / b[down_mask].mean()) if down_mask.any() else 0.0
        )

        result.update(
            {
                "benchmark_total_return": float((1 + b).prod() - 1),
                "tracking_error": te,
                "information_ratio": ir,
                "beta": beta,
                "alpha": alpha,
                "up_capture": up_capture,
                "down_capture": down_capture,
            }
        )

    return result




[docs]
def monthly_returns_table(returns: pd.Series) -> pd.DataFrame:
    """Compute a table of monthly returns suitable for heatmap display.

    Parameters
    ----------
    returns : pd.Series
        Daily (or intraday) return series with a DatetimeIndex.

    Returns
    -------
    pd.DataFrame
        Rows = years, columns = months (1-12).  Values are total
        returns for that month.
    """
    returns = returns.dropna()
    if returns.empty:
        return pd.DataFrame()

    # Compound daily returns into monthly
    monthly = (1 + returns).groupby(
        [returns.index.year, returns.index.month]
    ).prod() - 1
    monthly.index.names = ["year", "month"]
    table = monthly.unstack(level="month")
    # Flatten any MultiIndex columns that arise from unstacking
    if isinstance(table.columns, pd.MultiIndex):
        table.columns = table.columns.droplevel(0)
    table.columns.name = "month"
    return table




[docs]
def drawdown_table(
    returns: pd.Series,
    top_n: int = 5,
) -> pd.DataFrame:
    """Return the top *N* drawdown periods with metadata.

    Parameters
    ----------
    returns : pd.Series
        Portfolio return series.
    top_n : int
        Number of worst drawdowns to report.

    Returns
    -------
    pd.DataFrame
        Columns: ``peak_date``, ``trough_date``, ``recovery_date``,
        ``depth``, ``duration`` (periods from peak to recovery or end).
    """
    returns = returns.dropna()
    if returns.empty:
        return pd.DataFrame(
            columns=["peak_date", "trough_date", "recovery_date", "depth", "duration"]
        )

    cum = (1 + returns).cumprod()
    peak = cum.cummax()
    dd = (cum - peak) / peak

    # Segment drawdown periods
    events: list[dict[str, Any]] = []
    dd_start = None
    trough_val = 0.0
    trough_idx = None

    for idx, val in dd.items():
        if val < 0:
            if dd_start is None:
                dd_start = idx
                trough_val = val
                trough_idx = idx
            if val < trough_val:
                trough_val = val
                trough_idx = idx
        else:
            if dd_start is not None:
                events.append(
                    {
                        "peak_date": dd_start,
                        "trough_date": trough_idx,
                        "recovery_date": idx,
                        "depth": float(trough_val),
                    }
                )
                dd_start = None

    # Open drawdown (no recovery)
    if dd_start is not None:
        events.append(
            {
                "peak_date": dd_start,
                "trough_date": trough_idx,
                "recovery_date": None,
                "depth": float(trough_val),
            }
        )

    if not events:
        return pd.DataFrame(
            columns=["peak_date", "trough_date", "recovery_date", "depth", "duration"]
        )

    df = pd.DataFrame(events)
    # Duration in periods
    idx_list = list(returns.index)

    def _duration(row: pd.Series) -> int:
        start = idx_list.index(row["peak_date"])
        end_date = row["recovery_date"]
        if end_date is None or (isinstance(end_date, float) and np.isnan(end_date)) or pd.isna(end_date):
            end_date = idx_list[-1]
        end = idx_list.index(end_date)
        return end - start

    df["duration"] = df.apply(_duration, axis=1)
    df = df.sort_values("depth").head(top_n).reset_index(drop=True)
    return df




[docs]
def rolling_metrics_table(
    returns: pd.Series,
    windows: list[int] | None = None,
    periods_per_year: int = 252,
) -> pd.DataFrame:
    """Compute rolling Sharpe, volatility, and return at multiple windows.

    Parameters
    ----------
    returns : pd.Series
        Portfolio return series.
    windows : list[int], optional
        Rolling window sizes in periods.  Default ``[21, 63, 126, 252]``.
    periods_per_year : int
        Trading periods per year.

    Returns
    -------
    pd.DataFrame
        MultiIndex columns: ``(window, metric)`` with metrics
        ``rolling_return``, ``rolling_vol``, ``rolling_sharpe``.
    """
    if windows is None:
        windows = [21, 63, 126, 252]

    returns = returns.dropna()
    frames: dict[tuple[int, str], pd.Series] = {}

    for w in windows:
        roll_ret = returns.rolling(w).apply(
            lambda x: float((1 + x).prod() - 1), raw=False
        )
        roll_vol = returns.rolling(w).std() * np.sqrt(periods_per_year)
        roll_mean = returns.rolling(w).mean() * periods_per_year
        roll_sharpe = roll_mean / roll_vol.replace(0, np.nan)

        frames[(w, "rolling_return")] = roll_ret
        frames[(w, "rolling_vol")] = roll_vol
        frames[(w, "rolling_sharpe")] = roll_sharpe

    result = pd.DataFrame(frames)
    result.columns = pd.MultiIndex.from_tuples(
        result.columns, names=["window", "metric"]
    )
    return result




[docs]
def trade_analysis(trades_df: pd.DataFrame) -> dict[str, float]:
    """Analyse trade-level performance.

    Parameters
    ----------
    trades_df : pd.DataFrame
        Must contain a ``pnl`` column with per-trade profit/loss values.
        Optionally includes ``entry_price``, ``exit_price``, ``side``, etc.

    Returns
    -------
    dict[str, float]
        Dictionary with ``win_rate``, ``avg_pnl``, ``avg_win``,
        ``avg_loss``, ``profit_factor``, ``expectancy``,
        ``max_win``, ``max_loss``, ``n_trades``.
    """
    if "pnl" not in trades_df.columns:
        raise ValueError("trades_df must contain a 'pnl' column")

    pnl = trades_df["pnl"].dropna()
    n = len(pnl)
    if n == 0:
        return {
            "win_rate": 0.0,
            "avg_pnl": 0.0,
            "avg_win": 0.0,
            "avg_loss": 0.0,
            "profit_factor": 0.0,
            "expectancy": 0.0,
            "max_win": 0.0,
            "max_loss": 0.0,
            "n_trades": 0.0,
        }

    wins = pnl[pnl > 0]
    losses = pnl[pnl < 0]

    win_rate = len(wins) / n
    avg_pnl = float(pnl.mean())
    avg_win = float(wins.mean()) if len(wins) > 0 else 0.0
    avg_loss = float(losses.mean()) if len(losses) > 0 else 0.0
    total_wins = float(wins.sum()) if len(wins) > 0 else 0.0
    total_losses = float(abs(losses.sum())) if len(losses) > 0 else 0.0
    profit_factor = total_wins / total_losses if total_losses > 0 else float("inf")

    # Expectancy = win_rate * avg_win + (1 - win_rate) * avg_loss
    expectancy = win_rate * avg_win + (1 - win_rate) * avg_loss

    return {
        "win_rate": win_rate,
        "avg_pnl": avg_pnl,
        "avg_win": avg_win,
        "avg_loss": avg_loss,
        "profit_factor": profit_factor,
        "expectancy": expectancy,
        "max_win": float(pnl.max()),
        "max_loss": float(pnl.min()),
        "n_trades": float(n),
    }




[docs]
def comprehensive_tearsheet(
    returns: pd.Series,
    benchmark: pd.Series | None = None,
    risk_free: float = 0.0,
    periods_per_year: int = 252,
    trades_df: pd.DataFrame | None = None,
    regime_states: pd.Series | None = None,
) -> dict[str, Any]:
    """Generate a complete performance report combining all available metrics.

    This is the "kitchen sink" tearsheet — it computes every metric in
    the backtesting module and organises them into a nested dictionary
    that can feed directly into dashboards and visualisation tools.

    The returned dictionary contains the following top-level keys:

    - ``summary``: Core risk/return metrics from ``generate_tearsheet``.
    - ``extended_metrics``: All metrics from ``backtest.metrics``
      (Omega, Burke, UPI, Kappa, tail, Rachev, SQN, etc.).
    - ``monthly_returns``: Monthly return table (years x months).
    - ``yearly_returns``: Annual compounded returns.
    - ``drawdown_analysis``: Top 5 drawdowns with dates and durations.
    - ``rolling_metrics``: Rolling 3m/6m/12m Sharpe, vol, and return.
    - ``trade_analysis``: Per-trade statistics (if ``trades_df`` is
      provided).
    - ``regime_performance``: Performance broken out by regime state
      (if ``regime_states`` is provided).

    Parameters:
        returns: Simple return series with a DatetimeIndex.
        benchmark: Benchmark return series for relative metrics.
        risk_free: Annual risk-free rate.
        periods_per_year: Trading periods per year (252 for daily).
        trades_df: Trade log DataFrame with a ``pnl`` column.
            If provided, trade-level analysis is included.
        regime_states: Series of regime labels (e.g., "bull", "bear",
            "normal") aligned with ``returns``.  If provided, the
            tearsheet includes per-regime performance breakdowns.

    Returns:
        Nested dictionary with all metrics and analysis tables.

    Example:
        >>> import pandas as pd, numpy as np
        >>> rng = np.random.default_rng(42)
        >>> rets = pd.Series(
        ...     rng.normal(0.0004, 0.01, 504),
        ...     index=pd.bdate_range("2022-01-03", periods=504),
        ... )
        >>> ts = comprehensive_tearsheet(rets)
        >>> "summary" in ts and "extended_metrics" in ts
        True

    See Also:
        generate_tearsheet: Core performance metrics only.
        strategy_comparison: Side-by-side comparison of multiple strategies.
    """
    from wraquant.backtest.metrics import (
        burke_ratio,
        common_sense_ratio,
        gain_to_pain_ratio,
        kappa_ratio,
        omega_ratio,
        payoff_ratio as _payoff_ratio,
        profit_factor as _profit_factor,
        rachev_ratio,
        recovery_factor,
        system_quality_number,
        tail_ratio,
        ulcer_performance_index,
    )

    # --- Core summary ---
    summary = generate_tearsheet(
        returns,
        benchmark=benchmark,
        risk_free=risk_free,
        periods_per_year=periods_per_year,
    )

    # --- VaR / CVaR from canonical risk module ---
    from wraquant.risk.var import value_at_risk as _var

    var_95_risk = _var(returns, confidence=0.95, method="historical")
    var_99_risk = _var(returns, confidence=0.99, method="historical")
    summary["var_95_risk"] = var_95_risk
    summary["var_99_risk"] = var_99_risk

    # --- Extended metrics ---
    extended: dict[str, float] = {
        "omega_ratio": omega_ratio(returns),
        "burke_ratio": burke_ratio(returns, periods_per_year=periods_per_year),
        "ulcer_performance_index": ulcer_performance_index(
            returns, periods_per_year=periods_per_year
        ),
        "kappa_2": kappa_ratio(returns, order=2, periods_per_year=periods_per_year),
        "kappa_3": kappa_ratio(returns, order=3, periods_per_year=periods_per_year),
        "tail_ratio": tail_ratio(returns),
        "common_sense_ratio": common_sense_ratio(
            returns, risk_free=risk_free, periods_per_year=periods_per_year
        ),
        "rachev_ratio": rachev_ratio(returns),
        "gain_to_pain_ratio": gain_to_pain_ratio(returns),
        "profit_factor": _profit_factor(returns),
        "payoff_ratio": _payoff_ratio(returns),
        "recovery_factor": recovery_factor(returns),
        "system_quality_number": system_quality_number(returns),
    }

    # --- Monthly and yearly returns ---
    monthly = monthly_returns_table(returns)

    yearly_returns: dict[int, float] = {}
    if not returns.empty and hasattr(returns.index, "year"):
        for year in sorted(returns.index.year.unique()):
            yr_rets = returns[returns.index.year == year]
            yearly_returns[int(year)] = float((1 + yr_rets).prod() - 1)

    # --- Drawdown analysis ---
    dd_table = drawdown_table(returns, top_n=5)

    # --- Rolling metrics ---
    rolling = rolling_metrics_table(
        returns,
        windows=[63, 126, 252],
        periods_per_year=periods_per_year,
    )

    result: dict[str, Any] = {
        "summary": summary,
        "extended_metrics": extended,
        "monthly_returns": monthly,
        "yearly_returns": yearly_returns,
        "drawdown_analysis": dd_table,
        "rolling_metrics": rolling,
    }

    # --- Trade analysis ---
    if trades_df is not None and "pnl" in trades_df.columns:
        ta = trade_analysis(trades_df)

        # Win rate by month if trades have timestamps
        if "timestamp" in trades_df.columns or isinstance(
            trades_df.index, pd.DatetimeIndex
        ):
            ts_col = (
                trades_df.index
                if isinstance(trades_df.index, pd.DatetimeIndex)
                else pd.to_datetime(trades_df["timestamp"])
            )
            monthly_wr: dict[str, float] = {}
            for month_key, grp in trades_df.groupby(ts_col.to_period("M")):
                pnl_grp = grp["pnl"].dropna()
                if len(pnl_grp) > 0:
                    monthly_wr[str(month_key)] = float(
                        (pnl_grp > 0).sum() / len(pnl_grp)
                    )
            ta["win_rate_by_month"] = monthly_wr

        # Holding period if entry/exit times available
        if "entry_time" in trades_df.columns and "exit_time" in trades_df.columns:
            durations = pd.to_datetime(trades_df["exit_time"]) - pd.to_datetime(
                trades_df["entry_time"]
            )
            ta["avg_holding_period_days"] = float(durations.mean().total_seconds() / 86400)

        # Best / worst trades
        pnl_sorted = trades_df["pnl"].dropna().sort_values()
        ta["worst_5_trades"] = pnl_sorted.head(5).tolist()
        ta["best_5_trades"] = pnl_sorted.tail(5).tolist()

        result["trade_analysis"] = ta

    # --- Regime-conditional performance ---
    if regime_states is not None:
        common_idx = returns.index.intersection(regime_states.index)
        r_aligned = returns.loc[common_idx]
        s_aligned = regime_states.loc[common_idx]

        regime_perf: dict[str, dict[str, float]] = {}
        for regime in s_aligned.unique():
            mask = s_aligned == regime
            regime_rets = r_aligned[mask]
            if len(regime_rets) > 1:
                ann_vol = float(regime_rets.std() * np.sqrt(periods_per_year))
                ann_ret = float(regime_rets.mean() * periods_per_year)
                regime_perf[str(regime)] = {
                    "count": int(mask.sum()),
                    "mean_return": float(regime_rets.mean()),
                    "annualized_return": ann_ret,
                    "annualized_volatility": ann_vol,
                    "sharpe": float(ann_ret / ann_vol) if ann_vol > 0 else 0.0,
                    "total_return": float((1 + regime_rets).prod() - 1),
                }

        result["regime_performance"] = regime_perf

    return result




[docs]
def strategy_comparison(
    strategies: dict[str, pd.Series],
    risk_free: float = 0.0,
    periods_per_year: int = 252,
) -> pd.DataFrame:
    """Compare multiple strategies side-by-side on all metrics.

    Computes a comprehensive set of performance metrics for each
    strategy and returns them in a single DataFrame where columns
    are strategy names and rows are metric names.  The best and worst
    values for each metric are easy to spot by sorting.

    Parameters:
        strategies: Mapping of ``{strategy_name: returns_series}``.
        risk_free: Annual risk-free rate.
        periods_per_year: Trading periods per year.

    Returns:
        DataFrame with metric names as the index and strategy names as
        columns.  Contains all core and extended metrics.

    Example:
        >>> import pandas as pd, numpy as np
        >>> rng = np.random.default_rng(42)
        >>> strats = {
        ...     "momentum": pd.Series(rng.normal(0.0005, 0.012, 252)),
        ...     "mean_rev": pd.Series(rng.normal(0.0003, 0.008, 252)),
        ... }
        >>> comp = strategy_comparison(strats)
        >>> "momentum" in comp.columns and "mean_rev" in comp.columns
        True

    See Also:
        comprehensive_tearsheet: Full report for a single strategy.
        generate_tearsheet: Core metrics for a single strategy.
    """
    from wraquant.backtest.metrics import (
        burke_ratio,
        common_sense_ratio,
        gain_to_pain_ratio,
        kappa_ratio,
        omega_ratio,
        payoff_ratio as _payoff_ratio,
        profit_factor as _profit_factor,
        rachev_ratio,
        recovery_factor,
        system_quality_number,
        tail_ratio,
        ulcer_performance_index,
    )

    records: dict[str, dict[str, float]] = {}
    for name, rets in strategies.items():
        ts = generate_tearsheet(
            rets, risk_free=risk_free, periods_per_year=periods_per_year
        )
        # Remove non-numeric entries
        ts.pop("n_periods", None)
        # Add extended metrics
        ts["omega_ratio"] = omega_ratio(rets)
        ts["burke_ratio"] = burke_ratio(rets, periods_per_year=periods_per_year)
        ts["ulcer_performance_index"] = ulcer_performance_index(
            rets, periods_per_year=periods_per_year
        )
        ts["kappa_2"] = kappa_ratio(rets, order=2, periods_per_year=periods_per_year)
        ts["tail_ratio"] = tail_ratio(rets)
        ts["common_sense_ratio"] = common_sense_ratio(
            rets, risk_free=risk_free, periods_per_year=periods_per_year
        )
        ts["rachev_ratio"] = rachev_ratio(rets)
        ts["gain_to_pain_ratio"] = gain_to_pain_ratio(rets)
        ts["payoff_ratio"] = _payoff_ratio(rets)
        ts["recovery_factor"] = recovery_factor(rets)
        ts["system_quality_number"] = system_quality_number(rets)
        records[name] = ts

    return pd.DataFrame(records)