Source code for wraquant.execution.cost

"""Transaction cost analysis.

Provides slippage calculation, commission accounting, total execution
cost breakdown, and market impact models.
"""

from __future__ import annotations

import numpy as np
import pandas as pd
from numpy.typing import NDArray



[docs]
def slippage(
    execution_price: float | NDArray[np.floating],
    benchmark_price: float | NDArray[np.floating],
    side: str = "buy",
) -> float | NDArray[np.floating]:
    """Per-trade slippage relative to a benchmark.

    Use slippage to measure the implicit cost of executing a trade at
    a worse price than the benchmark.  Slippage is always defined so
    that positive values represent a cost to the trader.

    For buys, slippage is positive when the execution price exceeds the
    benchmark.  For sells, it is positive when the execution price is
    below the benchmark.

    Parameters:
        execution_price: Actual fill price(s).
        benchmark_price: Reference price(s) (e.g., mid-quote, arrival
            price, or VWAP).
        side: ``'buy'`` or ``'sell'``.

    Returns:
        Signed slippage value(s).  Positive = cost, negative = improvement.

    Example:
        >>> slippage(100.05, 100.00, side='buy')
        0.05
        >>> slippage(99.95, 100.00, side='sell')
        0.05

    See Also:
        total_cost: Aggregate cost breakdown across multiple trades.
    """
    ep = np.asarray(execution_price, dtype=np.float64)
    bp = np.asarray(benchmark_price, dtype=np.float64)

    if side == "buy":
        result = ep - bp
    elif side == "sell":
        result = bp - ep
    else:
        raise ValueError(f"side must be 'buy' or 'sell', got {side!r}")

    return float(result) if np.ndim(result) == 0 else result




[docs]
def commission_cost(
    qty: float | NDArray[np.floating],
    price: float | NDArray[np.floating],
    rate: float = 0.001,
) -> float | NDArray[np.floating]:
    """Commission cost calculation.

    Computes the explicit broker commission as a fraction of notional
    value (``|qty| * price * rate``).

    Parameters:
        qty: Number of shares per trade (sign is ignored).
        price: Price per share.
        rate: Commission rate as a fraction of notional value
            (default 0.001 = 10 bps).

    Returns:
        Commission cost(s).  Always non-negative.

    Example:
        >>> commission_cost(1000, 50.0, rate=0.001)
        50.0
        >>> commission_cost(1000, 50.0, rate=0.0005)  # 5 bps
        25.0

    See Also:
        slippage: Implicit execution cost.
        total_cost: Combined slippage + commission.
    """
    q = np.asarray(qty, dtype=np.float64)
    p = np.asarray(price, dtype=np.float64)
    result = np.abs(q) * p * rate
    return float(result) if np.ndim(result) == 0 else result




[docs]
def total_cost(
    trades_df: pd.DataFrame,
    commission_rate: float = 0.001,
) -> dict[str, float]:
    """Total execution cost breakdown from a trades DataFrame.

    Use ``total_cost`` to get a complete picture of execution quality
    across a batch of trades, decomposed into slippage (implicit market
    cost) and commissions (explicit broker cost).

    The DataFrame must contain columns ``'execution_price'``,
    ``'benchmark_price'``, ``'qty'``, and ``'side'``.

    Parameters:
        trades_df: DataFrame with one row per fill.  Required columns:
            ``'execution_price'``, ``'benchmark_price'``, ``'qty'``,
            ``'side'`` (``'buy'`` or ``'sell'``).
        commission_rate: Commission rate as a fraction of notional
            value (default 0.001 = 10 bps).

    Returns:
        Dictionary with:

        - ``'total_slippage'``: Sum of slippage * qty across all trades.
        - ``'total_commission'``: Sum of explicit commissions.
        - ``'total_cost'``: Slippage + commission.
        - ``'cost_bps'``: Total cost in basis points of total notional.
        - ``'n_trades'``: Number of fills.

    Example:
        >>> import pandas as pd
        >>> trades = pd.DataFrame({
        ...     'execution_price': [100.05, 50.10],
        ...     'benchmark_price': [100.00, 50.00],
        ...     'qty': [1000, 2000],
        ...     'side': ['buy', 'buy'],
        ... })
        >>> result = total_cost(trades, commission_rate=0.001)
        >>> result['n_trades']
        2
        >>> result['cost_bps'] > 0
        True

    See Also:
        slippage: Per-trade slippage calculation.
        commission_cost: Per-trade commission calculation.
    """
    required = {"execution_price", "benchmark_price", "qty", "side"}
    missing = required - set(trades_df.columns)
    if missing:
        raise ValueError(f"Missing columns: {missing}")

    total_slippage = 0.0
    total_commission = 0.0
    notional = 0.0

    for _, row in trades_df.iterrows():
        s = slippage(row["execution_price"], row["benchmark_price"], row["side"])
        c = commission_cost(row["qty"], row["execution_price"], commission_rate)
        n = abs(row["qty"]) * row["execution_price"]
        total_slippage += s * abs(row["qty"])
        total_commission += c
        notional += n

    cost = total_slippage + total_commission
    cost_bps = (cost / notional) * 10_000 if notional > 0 else 0.0

    return {
        "total_slippage": float(total_slippage),
        "total_commission": float(total_commission),
        "total_cost": float(cost),
        "cost_bps": float(cost_bps),
        "n_trades": int(len(trades_df)),
    }




[docs]
def market_impact_model(
    qty: float,
    avg_daily_volume: float,
    volatility: float,
    model: str = "sqrt",
) -> float:
    """Estimate market impact using a parametric model.

    Use market impact models to pre-estimate how much your order will
    move the price, before you execute.  This is essential for
    pre-trade cost analysis and optimal execution sizing.

    Parameters:
        qty: Order quantity (shares).
        avg_daily_volume: Average daily volume.
        volatility: Daily price volatility (standard deviation of
            returns).
        model: Impact model to use.  ``'sqrt'`` for the square-root
            model (impact = sigma * sqrt(Q/ADV)), which is the
            industry standard.  ``'linear'`` for a simple linear
            model (impact = sigma * Q/ADV).

    Returns:
        Estimated market impact as a fraction of price (same scale as
        *volatility*).  Multiply by the stock price to get dollar impact.

    Example:
        >>> # 10,000 shares, ADV 1M, 2% daily vol
        >>> impact = market_impact_model(10_000, 1_000_000, 0.02, model='sqrt')
        >>> 0 < impact < 0.02  # less than full daily vol
        True

    Notes:
        The square-root model is empirically well-supported for
        equities: impact scales as the square root of participation
        rate, consistent with Kyle (1985) and Almgren et al. (2005).

    See Also:
        slippage: Measure actual post-trade slippage.
        wraquant.execution.optimal.almgren_chriss: Optimal execution trajectory.
    """
    participation = abs(qty) / avg_daily_volume if avg_daily_volume > 0 else 0.0

    if model == "sqrt":
        # Square-root model: impact ~ sigma * sqrt(Q / ADV)
        return float(volatility * np.sqrt(participation))
    elif model == "linear":
        # Linear model: impact ~ sigma * (Q / ADV)
        return float(volatility * participation)
    else:
        raise ValueError(f"model must be 'sqrt' or 'linear', got {model!r}")




[docs]
def liquidity_adjusted_cost(
    price: float,
    quantity: float,
    bid: float | pd.Series,
    ask: float | pd.Series,
    volume: float | pd.Series,
    avg_daily_volume: float | None = None,
) -> dict[str, float]:
    """Estimate execution cost using microstructure liquidity measures.

    Combines spread cost from ``microstructure.liquidity`` with market
    impact from ``execution.cost`` for a complete cost estimate.  This
    bridges the microstructure and execution modules, giving a single
    function that incorporates both implicit (spread) and impact costs.

    The total cost has three components:

    1. **Spread cost**: half the effective spread times quantity. This
       is the immediate cost of crossing the bid-ask spread.
    2. **Market impact**: estimated price impact from the square-root
       model, scaled by quantity and price.
    3. **Total cost**: sum of spread cost and market impact cost.

    When to use:
        Use this for pre-trade cost estimation when you have both
        quote data (bid/ask) and volume data.  It gives a more
        realistic cost estimate than spread or impact alone because
        real execution costs include both components.

    Parameters:
        price: Current mid-price or last trade price.
        quantity: Order quantity (shares).
        bid: Best bid price(s).
        ask: Best ask price(s).
        volume: Recent trading volume.
        avg_daily_volume: Average daily volume for impact estimation.
            If *None*, uses the mean of *volume* (when *volume* is a
            Series) or *volume* itself (when scalar).

    Returns:
        Dictionary containing:

        - ``'spread_cost'`` (*float*) -- Cost from crossing the bid-ask
          spread (half-spread times quantity).
        - ``'market_impact_cost'`` (*float*) -- Estimated market impact
          cost in dollar terms.
        - ``'total_cost'`` (*float*) -- Sum of spread and impact costs.
        - ``'cost_bps'`` (*float*) -- Total cost in basis points of
          notional value.
        - ``'effective_spread'`` (*float*) -- Mean effective spread used
          in the calculation.
        - ``'amihud_illiquidity'`` (*float*) -- Amihud illiquidity ratio
          (higher = less liquid).

    Example:
        >>> result = liquidity_adjusted_cost(
        ...     price=100.0, quantity=5000,
        ...     bid=99.98, ask=100.02, volume=1_000_000,
        ... )
        >>> result['spread_cost'] > 0
        True
        >>> result['total_cost'] >= result['spread_cost']
        True

    See Also:
        wraquant.microstructure.liquidity.effective_spread: Raw spread.
        wraquant.microstructure.liquidity.amihud_illiquidity: Illiquidity.
        market_impact_model: Parametric impact estimation.
    """
    from wraquant.microstructure.liquidity import (
        amihud_illiquidity,
        effective_spread,
    )

    # Compute effective spread from bid/ask
    bid_arr = np.asarray(bid, dtype=np.float64)
    ask_arr = np.asarray(ask, dtype=np.float64)
    mid = (bid_arr + ask_arr) / 2.0

    # effective_spread returns 2 * |trade - mid|; for pre-trade estimate
    # use the quoted spread directly
    eff_spread_raw = effective_spread(
        trade_prices=np.atleast_1d(ask_arr),
        midpoints=np.atleast_1d(mid),
    )
    mean_eff_spread = float(np.nanmean(eff_spread_raw))

    # Spread cost = half-spread * quantity (you pay half the spread on entry)
    half_spread = mean_eff_spread / 2.0
    spread_cost = half_spread * abs(quantity)

    # Amihud illiquidity (scalar estimate)
    if isinstance(volume, pd.Series):
        vol_series = volume
        returns_proxy = pd.Series(np.zeros(len(volume)))
        returns_proxy.iloc[1:] = np.diff(np.log(np.maximum(mid, 1e-10))) if np.ndim(mid) > 0 else 0.0
        adv = float(vol_series.mean()) if avg_daily_volume is None else avg_daily_volume
        amihud = float(amihud_illiquidity(returns_proxy, vol_series))
    else:
        adv = float(volume) if avg_daily_volume is None else avg_daily_volume
        amihud = 0.0

    # Market impact via square-root model
    if adv > 0:
        # Estimate daily volatility from spread as a proxy if we don't have returns
        vol_proxy = mean_eff_spread / price if price > 0 else 0.01
        impact_frac = market_impact_model(
            qty=abs(quantity),
            avg_daily_volume=adv,
            volatility=vol_proxy,
            model="sqrt",
        )
        impact_cost = impact_frac * price * abs(quantity)
    else:
        impact_cost = 0.0

    total = spread_cost + impact_cost
    notional = abs(quantity) * price
    cost_bps = (total / notional) * 10_000 if notional > 0 else 0.0

    return {
        "spread_cost": float(spread_cost),
        "market_impact_cost": float(impact_cost),
        "total_cost": float(total),
        "cost_bps": float(cost_bps),
        "effective_spread": float(mean_eff_spread),
        "amihud_illiquidity": float(amihud),
    }




[docs]
def expected_cost_model(
    quantity: float,
    price: float,
    adv: float,
    volatility: float,
    spread: float,
) -> dict[str, float]:
    """Comprehensive pre-trade expected cost model.

    Use this before executing an order to estimate total expected cost,
    broken down into three components: spread crossing cost, market
    impact, and timing risk.  This helps decide whether to execute
    aggressively (high urgency) or passively (low urgency).

    Components:
        1. **Spread cost**: immediate cost of crossing the bid-ask
           spread = 0.5 * spread * quantity.
        2. **Market impact**: price impact from trading = sigma *
           sqrt(Q / ADV) * price * quantity (square-root model).
        3. **Timing risk**: opportunity cost of slow execution =
           sigma * sqrt(T) * price * quantity, where T is estimated
           execution duration proportional to Q / ADV.

    Parameters:
        quantity: Order quantity (shares or units).
        price: Current market price.
        adv: Average daily volume (shares).
        volatility: Daily price volatility (standard deviation of
            returns, e.g., 0.02 = 2%).
        spread: Bid-ask spread in price terms (e.g., 0.02 for a
            $0.02 spread).

    Returns:
        Dictionary containing:

        - **spread_cost** (*float*) -- Half-spread cost in dollar terms.
        - **impact_cost** (*float*) -- Estimated market impact cost.
        - **timing_risk** (*float*) -- Estimated timing/opportunity risk.
        - **total_cost** (*float*) -- Sum of all components.
        - **cost_bps** (*float*) -- Total cost in basis points of
          notional value.

    Example:
        >>> result = expected_cost_model(
        ...     quantity=5000, price=100.0, adv=1_000_000,
        ...     volatility=0.02, spread=0.02,
        ... )
        >>> result['spread_cost']
        50.0
        >>> result['total_cost'] > result['spread_cost']
        True

    See Also:
        market_impact_model: Parametric impact estimation.
        liquidity_adjusted_cost: Spread + impact using microstructure data.
    """
    notional = abs(quantity) * price
    participation = abs(quantity) / adv if adv > 0 else 1.0

    # 1. Spread cost: half the spread per share
    s_cost = 0.5 * spread * abs(quantity)

    # 2. Market impact (square-root model)
    impact_frac = volatility * np.sqrt(participation)
    i_cost = impact_frac * notional

    # 3. Timing risk: proportional to execution duration
    # Execution duration ~ Q / (pov * ADV), assume pov = 0.1
    exec_duration = participation / 0.1  # in days
    t_risk = volatility * np.sqrt(max(exec_duration, 0.0)) * notional * 0.5

    total = s_cost + i_cost + t_risk
    cost_bps = (total / notional) * 10_000 if notional > 0 else 0.0

    return {
        "spread_cost": float(s_cost),
        "impact_cost": float(i_cost),
        "timing_risk": float(t_risk),
        "total_cost": float(total),
        "cost_bps": float(cost_bps),
    }




[docs]
def transaction_cost_analysis(
    trades_df: pd.DataFrame,
    market_data_df: pd.DataFrame,
) -> pd.DataFrame:
    """Post-trade Transaction Cost Analysis (TCA).

    Use this after execution to evaluate the quality of each fill
    relative to multiple benchmarks.  TCA is the standard way
    institutional investors assess broker/algorithm performance.

    Compares each trade's execution price against:

    - **Arrival price**: mid-price when the order was submitted.
    - **VWAP**: volume-weighted average price during execution.
    - **Close**: closing price of the day.

    Parameters:
        trades_df: DataFrame with one row per fill.  Required columns:

            - ``'execution_price'`` -- actual fill price.
            - ``'qty'`` -- fill quantity (positive for buys).
            - ``'side'`` -- ``'buy'`` or ``'sell'``.
            - ``'timestamp'`` -- execution timestamp (optional, used
              for ordering).

        market_data_df: DataFrame with market reference data.  Required
            columns:

            - ``'arrival_price'`` -- mid-price at order arrival.
            - ``'vwap'`` -- VWAP during execution window.
            - ``'close'`` -- closing price.

            If *market_data_df* has a single row, the same benchmarks
            are applied to all trades.  If it has one row per trade,
            benchmarks are matched by index.

    Returns:
        DataFrame with original trade data plus additional columns:

        - ``'arrival_cost'`` -- execution price vs arrival price
          (positive = slippage cost for buys).
        - ``'arrival_cost_bps'`` -- arrival cost in basis points.
        - ``'vwap_cost'`` -- execution price vs VWAP.
        - ``'vwap_cost_bps'`` -- VWAP cost in basis points.
        - ``'close_cost'`` -- execution price vs close.
        - ``'close_cost_bps'`` -- close cost in basis points.

    Example:
        >>> import pandas as pd
        >>> trades = pd.DataFrame({
        ...     'execution_price': [100.05, 100.10, 100.08],
        ...     'qty': [1000, 2000, 1500],
        ...     'side': ['buy', 'buy', 'buy'],
        ... })
        >>> market = pd.DataFrame({
        ...     'arrival_price': [100.00],
        ...     'vwap': [100.06],
        ...     'close': [100.12],
        ... })
        >>> tca = transaction_cost_analysis(trades, market)
        >>> 'arrival_cost_bps' in tca.columns
        True

    See Also:
        total_cost: Aggregate cost breakdown.
        arrival_price_benchmark: Simpler arrival-price analysis.
    """
    result = trades_df.copy()

    # Expand market_data_df to match trades if single row
    if len(market_data_df) == 1:
        arrival = float(market_data_df["arrival_price"].iloc[0])
        vwap_price = float(market_data_df["vwap"].iloc[0])
        close_price = float(market_data_df["close"].iloc[0])
        arrivals = np.full(len(trades_df), arrival)
        vwaps = np.full(len(trades_df), vwap_price)
        closes = np.full(len(trades_df), close_price)
    else:
        arrivals = np.asarray(market_data_df["arrival_price"], dtype=np.float64)
        vwaps = np.asarray(market_data_df["vwap"], dtype=np.float64)
        closes = np.asarray(market_data_df["close"], dtype=np.float64)

    exec_prices = np.asarray(trades_df["execution_price"], dtype=np.float64)
    sides = trades_df["side"].values

    # Calculate signed costs (positive = cost for the trader)
    sign = np.where(sides == "buy", 1.0, -1.0)

    arrival_cost = sign * (exec_prices - arrivals)
    vwap_cost = sign * (exec_prices - vwaps)
    close_cost = sign * (exec_prices - closes)

    result["arrival_cost"] = arrival_cost
    result["arrival_cost_bps"] = np.where(
        arrivals != 0, (arrival_cost / arrivals) * 10_000, 0.0
    )
    result["vwap_cost"] = vwap_cost
    result["vwap_cost_bps"] = np.where(
        vwaps != 0, (vwap_cost / vwaps) * 10_000, 0.0
    )
    result["close_cost"] = close_cost
    result["close_cost_bps"] = np.where(
        closes != 0, (close_cost / closes) * 10_000, 0.0
    )

    return result