Source code for wraquant.execution.cost

"""Transaction cost analysis.

Provides slippage calculation, commission accounting, total execution
cost breakdown, and market impact models.
"""

from __future__ import annotations

import numpy as np
import pandas as pd
from numpy.typing import NDArray


[docs] def slippage( execution_price: float | NDArray[np.floating], benchmark_price: float | NDArray[np.floating], side: str = "buy", ) -> float | NDArray[np.floating]: """Per-trade slippage relative to a benchmark. Use slippage to measure the implicit cost of executing a trade at a worse price than the benchmark. Slippage is always defined so that positive values represent a cost to the trader. For buys, slippage is positive when the execution price exceeds the benchmark. For sells, it is positive when the execution price is below the benchmark. Parameters: execution_price: Actual fill price(s). benchmark_price: Reference price(s) (e.g., mid-quote, arrival price, or VWAP). side: ``'buy'`` or ``'sell'``. Returns: Signed slippage value(s). Positive = cost, negative = improvement. Example: >>> slippage(100.05, 100.00, side='buy') 0.05 >>> slippage(99.95, 100.00, side='sell') 0.05 See Also: total_cost: Aggregate cost breakdown across multiple trades. """ ep = np.asarray(execution_price, dtype=np.float64) bp = np.asarray(benchmark_price, dtype=np.float64) if side == "buy": result = ep - bp elif side == "sell": result = bp - ep else: raise ValueError(f"side must be 'buy' or 'sell', got {side!r}") return float(result) if np.ndim(result) == 0 else result
[docs] def commission_cost( qty: float | NDArray[np.floating], price: float | NDArray[np.floating], rate: float = 0.001, ) -> float | NDArray[np.floating]: """Commission cost calculation. Computes the explicit broker commission as a fraction of notional value (``|qty| * price * rate``). Parameters: qty: Number of shares per trade (sign is ignored). price: Price per share. rate: Commission rate as a fraction of notional value (default 0.001 = 10 bps). Returns: Commission cost(s). Always non-negative. Example: >>> commission_cost(1000, 50.0, rate=0.001) 50.0 >>> commission_cost(1000, 50.0, rate=0.0005) # 5 bps 25.0 See Also: slippage: Implicit execution cost. total_cost: Combined slippage + commission. """ q = np.asarray(qty, dtype=np.float64) p = np.asarray(price, dtype=np.float64) result = np.abs(q) * p * rate return float(result) if np.ndim(result) == 0 else result
[docs] def total_cost( trades_df: pd.DataFrame, commission_rate: float = 0.001, ) -> dict[str, float]: """Total execution cost breakdown from a trades DataFrame. Use ``total_cost`` to get a complete picture of execution quality across a batch of trades, decomposed into slippage (implicit market cost) and commissions (explicit broker cost). The DataFrame must contain columns ``'execution_price'``, ``'benchmark_price'``, ``'qty'``, and ``'side'``. Parameters: trades_df: DataFrame with one row per fill. Required columns: ``'execution_price'``, ``'benchmark_price'``, ``'qty'``, ``'side'`` (``'buy'`` or ``'sell'``). commission_rate: Commission rate as a fraction of notional value (default 0.001 = 10 bps). Returns: Dictionary with: - ``'total_slippage'``: Sum of slippage * qty across all trades. - ``'total_commission'``: Sum of explicit commissions. - ``'total_cost'``: Slippage + commission. - ``'cost_bps'``: Total cost in basis points of total notional. - ``'n_trades'``: Number of fills. Example: >>> import pandas as pd >>> trades = pd.DataFrame({ ... 'execution_price': [100.05, 50.10], ... 'benchmark_price': [100.00, 50.00], ... 'qty': [1000, 2000], ... 'side': ['buy', 'buy'], ... }) >>> result = total_cost(trades, commission_rate=0.001) >>> result['n_trades'] 2 >>> result['cost_bps'] > 0 True See Also: slippage: Per-trade slippage calculation. commission_cost: Per-trade commission calculation. """ required = {"execution_price", "benchmark_price", "qty", "side"} missing = required - set(trades_df.columns) if missing: raise ValueError(f"Missing columns: {missing}") total_slippage = 0.0 total_commission = 0.0 notional = 0.0 for _, row in trades_df.iterrows(): s = slippage(row["execution_price"], row["benchmark_price"], row["side"]) c = commission_cost(row["qty"], row["execution_price"], commission_rate) n = abs(row["qty"]) * row["execution_price"] total_slippage += s * abs(row["qty"]) total_commission += c notional += n cost = total_slippage + total_commission cost_bps = (cost / notional) * 10_000 if notional > 0 else 0.0 return { "total_slippage": float(total_slippage), "total_commission": float(total_commission), "total_cost": float(cost), "cost_bps": float(cost_bps), "n_trades": int(len(trades_df)), }
[docs] def market_impact_model( qty: float, avg_daily_volume: float, volatility: float, model: str = "sqrt", ) -> float: """Estimate market impact using a parametric model. Use market impact models to pre-estimate how much your order will move the price, before you execute. This is essential for pre-trade cost analysis and optimal execution sizing. Parameters: qty: Order quantity (shares). avg_daily_volume: Average daily volume. volatility: Daily price volatility (standard deviation of returns). model: Impact model to use. ``'sqrt'`` for the square-root model (impact = sigma * sqrt(Q/ADV)), which is the industry standard. ``'linear'`` for a simple linear model (impact = sigma * Q/ADV). Returns: Estimated market impact as a fraction of price (same scale as *volatility*). Multiply by the stock price to get dollar impact. Example: >>> # 10,000 shares, ADV 1M, 2% daily vol >>> impact = market_impact_model(10_000, 1_000_000, 0.02, model='sqrt') >>> 0 < impact < 0.02 # less than full daily vol True Notes: The square-root model is empirically well-supported for equities: impact scales as the square root of participation rate, consistent with Kyle (1985) and Almgren et al. (2005). See Also: slippage: Measure actual post-trade slippage. wraquant.execution.optimal.almgren_chriss: Optimal execution trajectory. """ participation = abs(qty) / avg_daily_volume if avg_daily_volume > 0 else 0.0 if model == "sqrt": # Square-root model: impact ~ sigma * sqrt(Q / ADV) return float(volatility * np.sqrt(participation)) elif model == "linear": # Linear model: impact ~ sigma * (Q / ADV) return float(volatility * participation) else: raise ValueError(f"model must be 'sqrt' or 'linear', got {model!r}")
[docs] def liquidity_adjusted_cost( price: float, quantity: float, bid: float | pd.Series, ask: float | pd.Series, volume: float | pd.Series, avg_daily_volume: float | None = None, ) -> dict[str, float]: """Estimate execution cost using microstructure liquidity measures. Combines spread cost from ``microstructure.liquidity`` with market impact from ``execution.cost`` for a complete cost estimate. This bridges the microstructure and execution modules, giving a single function that incorporates both implicit (spread) and impact costs. The total cost has three components: 1. **Spread cost**: half the effective spread times quantity. This is the immediate cost of crossing the bid-ask spread. 2. **Market impact**: estimated price impact from the square-root model, scaled by quantity and price. 3. **Total cost**: sum of spread cost and market impact cost. When to use: Use this for pre-trade cost estimation when you have both quote data (bid/ask) and volume data. It gives a more realistic cost estimate than spread or impact alone because real execution costs include both components. Parameters: price: Current mid-price or last trade price. quantity: Order quantity (shares). bid: Best bid price(s). ask: Best ask price(s). volume: Recent trading volume. avg_daily_volume: Average daily volume for impact estimation. If *None*, uses the mean of *volume* (when *volume* is a Series) or *volume* itself (when scalar). Returns: Dictionary containing: - ``'spread_cost'`` (*float*) -- Cost from crossing the bid-ask spread (half-spread times quantity). - ``'market_impact_cost'`` (*float*) -- Estimated market impact cost in dollar terms. - ``'total_cost'`` (*float*) -- Sum of spread and impact costs. - ``'cost_bps'`` (*float*) -- Total cost in basis points of notional value. - ``'effective_spread'`` (*float*) -- Mean effective spread used in the calculation. - ``'amihud_illiquidity'`` (*float*) -- Amihud illiquidity ratio (higher = less liquid). Example: >>> result = liquidity_adjusted_cost( ... price=100.0, quantity=5000, ... bid=99.98, ask=100.02, volume=1_000_000, ... ) >>> result['spread_cost'] > 0 True >>> result['total_cost'] >= result['spread_cost'] True See Also: wraquant.microstructure.liquidity.effective_spread: Raw spread. wraquant.microstructure.liquidity.amihud_illiquidity: Illiquidity. market_impact_model: Parametric impact estimation. """ from wraquant.microstructure.liquidity import ( amihud_illiquidity, effective_spread, ) # Compute effective spread from bid/ask bid_arr = np.asarray(bid, dtype=np.float64) ask_arr = np.asarray(ask, dtype=np.float64) mid = (bid_arr + ask_arr) / 2.0 # effective_spread returns 2 * |trade - mid|; for pre-trade estimate # use the quoted spread directly eff_spread_raw = effective_spread( trade_prices=np.atleast_1d(ask_arr), midpoints=np.atleast_1d(mid), ) mean_eff_spread = float(np.nanmean(eff_spread_raw)) # Spread cost = half-spread * quantity (you pay half the spread on entry) half_spread = mean_eff_spread / 2.0 spread_cost = half_spread * abs(quantity) # Amihud illiquidity (scalar estimate) if isinstance(volume, pd.Series): vol_series = volume returns_proxy = pd.Series(np.zeros(len(volume))) returns_proxy.iloc[1:] = np.diff(np.log(np.maximum(mid, 1e-10))) if np.ndim(mid) > 0 else 0.0 adv = float(vol_series.mean()) if avg_daily_volume is None else avg_daily_volume amihud = float(amihud_illiquidity(returns_proxy, vol_series)) else: adv = float(volume) if avg_daily_volume is None else avg_daily_volume amihud = 0.0 # Market impact via square-root model if adv > 0: # Estimate daily volatility from spread as a proxy if we don't have returns vol_proxy = mean_eff_spread / price if price > 0 else 0.01 impact_frac = market_impact_model( qty=abs(quantity), avg_daily_volume=adv, volatility=vol_proxy, model="sqrt", ) impact_cost = impact_frac * price * abs(quantity) else: impact_cost = 0.0 total = spread_cost + impact_cost notional = abs(quantity) * price cost_bps = (total / notional) * 10_000 if notional > 0 else 0.0 return { "spread_cost": float(spread_cost), "market_impact_cost": float(impact_cost), "total_cost": float(total), "cost_bps": float(cost_bps), "effective_spread": float(mean_eff_spread), "amihud_illiquidity": float(amihud), }
[docs] def expected_cost_model( quantity: float, price: float, adv: float, volatility: float, spread: float, ) -> dict[str, float]: """Comprehensive pre-trade expected cost model. Use this before executing an order to estimate total expected cost, broken down into three components: spread crossing cost, market impact, and timing risk. This helps decide whether to execute aggressively (high urgency) or passively (low urgency). Components: 1. **Spread cost**: immediate cost of crossing the bid-ask spread = 0.5 * spread * quantity. 2. **Market impact**: price impact from trading = sigma * sqrt(Q / ADV) * price * quantity (square-root model). 3. **Timing risk**: opportunity cost of slow execution = sigma * sqrt(T) * price * quantity, where T is estimated execution duration proportional to Q / ADV. Parameters: quantity: Order quantity (shares or units). price: Current market price. adv: Average daily volume (shares). volatility: Daily price volatility (standard deviation of returns, e.g., 0.02 = 2%). spread: Bid-ask spread in price terms (e.g., 0.02 for a $0.02 spread). Returns: Dictionary containing: - **spread_cost** (*float*) -- Half-spread cost in dollar terms. - **impact_cost** (*float*) -- Estimated market impact cost. - **timing_risk** (*float*) -- Estimated timing/opportunity risk. - **total_cost** (*float*) -- Sum of all components. - **cost_bps** (*float*) -- Total cost in basis points of notional value. Example: >>> result = expected_cost_model( ... quantity=5000, price=100.0, adv=1_000_000, ... volatility=0.02, spread=0.02, ... ) >>> result['spread_cost'] 50.0 >>> result['total_cost'] > result['spread_cost'] True See Also: market_impact_model: Parametric impact estimation. liquidity_adjusted_cost: Spread + impact using microstructure data. """ notional = abs(quantity) * price participation = abs(quantity) / adv if adv > 0 else 1.0 # 1. Spread cost: half the spread per share s_cost = 0.5 * spread * abs(quantity) # 2. Market impact (square-root model) impact_frac = volatility * np.sqrt(participation) i_cost = impact_frac * notional # 3. Timing risk: proportional to execution duration # Execution duration ~ Q / (pov * ADV), assume pov = 0.1 exec_duration = participation / 0.1 # in days t_risk = volatility * np.sqrt(max(exec_duration, 0.0)) * notional * 0.5 total = s_cost + i_cost + t_risk cost_bps = (total / notional) * 10_000 if notional > 0 else 0.0 return { "spread_cost": float(s_cost), "impact_cost": float(i_cost), "timing_risk": float(t_risk), "total_cost": float(total), "cost_bps": float(cost_bps), }
[docs] def transaction_cost_analysis( trades_df: pd.DataFrame, market_data_df: pd.DataFrame, ) -> pd.DataFrame: """Post-trade Transaction Cost Analysis (TCA). Use this after execution to evaluate the quality of each fill relative to multiple benchmarks. TCA is the standard way institutional investors assess broker/algorithm performance. Compares each trade's execution price against: - **Arrival price**: mid-price when the order was submitted. - **VWAP**: volume-weighted average price during execution. - **Close**: closing price of the day. Parameters: trades_df: DataFrame with one row per fill. Required columns: - ``'execution_price'`` -- actual fill price. - ``'qty'`` -- fill quantity (positive for buys). - ``'side'`` -- ``'buy'`` or ``'sell'``. - ``'timestamp'`` -- execution timestamp (optional, used for ordering). market_data_df: DataFrame with market reference data. Required columns: - ``'arrival_price'`` -- mid-price at order arrival. - ``'vwap'`` -- VWAP during execution window. - ``'close'`` -- closing price. If *market_data_df* has a single row, the same benchmarks are applied to all trades. If it has one row per trade, benchmarks are matched by index. Returns: DataFrame with original trade data plus additional columns: - ``'arrival_cost'`` -- execution price vs arrival price (positive = slippage cost for buys). - ``'arrival_cost_bps'`` -- arrival cost in basis points. - ``'vwap_cost'`` -- execution price vs VWAP. - ``'vwap_cost_bps'`` -- VWAP cost in basis points. - ``'close_cost'`` -- execution price vs close. - ``'close_cost_bps'`` -- close cost in basis points. Example: >>> import pandas as pd >>> trades = pd.DataFrame({ ... 'execution_price': [100.05, 100.10, 100.08], ... 'qty': [1000, 2000, 1500], ... 'side': ['buy', 'buy', 'buy'], ... }) >>> market = pd.DataFrame({ ... 'arrival_price': [100.00], ... 'vwap': [100.06], ... 'close': [100.12], ... }) >>> tca = transaction_cost_analysis(trades, market) >>> 'arrival_cost_bps' in tca.columns True See Also: total_cost: Aggregate cost breakdown. arrival_price_benchmark: Simpler arrival-price analysis. """ result = trades_df.copy() # Expand market_data_df to match trades if single row if len(market_data_df) == 1: arrival = float(market_data_df["arrival_price"].iloc[0]) vwap_price = float(market_data_df["vwap"].iloc[0]) close_price = float(market_data_df["close"].iloc[0]) arrivals = np.full(len(trades_df), arrival) vwaps = np.full(len(trades_df), vwap_price) closes = np.full(len(trades_df), close_price) else: arrivals = np.asarray(market_data_df["arrival_price"], dtype=np.float64) vwaps = np.asarray(market_data_df["vwap"], dtype=np.float64) closes = np.asarray(market_data_df["close"], dtype=np.float64) exec_prices = np.asarray(trades_df["execution_price"], dtype=np.float64) sides = trades_df["side"].values # Calculate signed costs (positive = cost for the trader) sign = np.where(sides == "buy", 1.0, -1.0) arrival_cost = sign * (exec_prices - arrivals) vwap_cost = sign * (exec_prices - vwaps) close_cost = sign * (exec_prices - closes) result["arrival_cost"] = arrival_cost result["arrival_cost_bps"] = np.where( arrivals != 0, (arrival_cost / arrivals) * 10_000, 0.0 ) result["vwap_cost"] = vwap_cost result["vwap_cost_bps"] = np.where( vwaps != 0, (vwap_cost / vwaps) * 10_000, 0.0 ) result["close_cost"] = close_cost result["close_cost_bps"] = np.where( closes != 0, (close_cost / closes) * 10_000, 0.0 ) return result