"""Liquidity analytics for market microstructure.
Liquidity measures how easily an asset can be traded without
significantly moving its price. Illiquid assets carry a liquidity risk
premium and pose execution challenges. This module provides the
standard toolkit for measuring liquidity from trade and quote data.
Measures provided:
**Illiquidity / price impact**:
- ``amihud_illiquidity``: the Amihud (2002) ratio -- average daily
|return| / volume. Higher values indicate less liquid assets.
The most widely used cross-sectional liquidity proxy because it
only requires daily data.
- ``kyle_lambda``: Kyle's lambda -- the permanent price impact
coefficient estimated via rolling OLS of price changes on signed
order flow. Higher lambda = more price impact per unit of volume.
- ``price_impact``: per-trade permanent price impact.
**Spread estimators**:
- ``roll_spread``: Roll (1984) implied spread from serial
autocovariance of price changes. Requires only trade prices
(no quote data needed).
- ``effective_spread``: 2 * |trade_price - midpoint|. The
standard measure of execution cost.
- ``realized_spread``: spread earned by the liquidity provider
after a delay, capturing adverse selection.
**Activity**:
- ``turnover_ratio``: daily volume / shares outstanding. Measures
trading activity relative to float.
How to choose:
- **Cross-sectional liquidity ranking** (daily data only): use
``amihud_illiquidity``.
- **Execution cost analysis** (trade + quote data): use
``effective_spread`` and ``realized_spread``.
- **Price impact modeling**: use ``kyle_lambda`` for permanent
impact; ``price_impact`` for per-trade measurement.
- **No quote data available**: use ``roll_spread`` as a proxy
for the bid-ask spread.
References:
- Amihud (2002), "Illiquidity and Stock Returns"
- Kyle (1985), "Continuous Auctions and Insider Trading"
- Roll (1984), "A Simple Implicit Measure of the Effective
Bid-Ask Spread"
"""
from __future__ import annotations
import numpy as np
import pandas as pd
from numpy.typing import NDArray
from wraquant.core._coerce import coerce_series
[docs]
def amihud_illiquidity(
returns: pd.Series,
volume: pd.Series,
window: int | None = None,
) -> pd.Series | float:
"""Amihud (2002) illiquidity ratio: mean of |return| / dollar volume.
A higher value indicates less liquid (more illiquid) markets.
Parameters:
returns: Asset return series.
volume: Dollar volume series (price * shares traded).
window: Rolling window size. If *None*, returns a single scalar
average over the entire sample.
Returns:
Rolling Amihud illiquidity ratio (or a single float when
*window* is *None*).
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> returns = pd.Series(np.random.randn(252) * 0.01)
>>> volume = pd.Series(np.random.uniform(1e6, 5e6, 252))
>>> illiq = amihud_illiquidity(returns, volume)
>>> illiq > 0
True
See Also:
kyle_lambda: Price impact coefficient (regression-based alternative).
amihud_rolling: Rolling version with normalization.
"""
returns = coerce_series(returns, "returns")
volume = coerce_series(volume, "volume")
ratio = np.abs(returns) / volume
ratio = ratio.replace([np.inf, -np.inf], np.nan)
if window is None:
return float(np.nanmean(ratio))
return ratio.rolling(window).mean()
[docs]
def kyle_lambda(
prices: pd.Series,
volume: pd.Series,
window: int = 20,
) -> pd.Series:
"""Kyle's lambda -- price impact coefficient via rolling OLS.
Regresses price changes on signed order flow (volume) to estimate the
permanent price impact per unit of volume.
Parameters:
prices: Price series.
volume: Signed volume series (positive for buys, negative for sells).
window: Rolling regression window.
Returns:
Rolling Kyle's lambda series. Higher values indicate more
price impact per unit of volume (less liquid).
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> prices = pd.Series(100 + np.cumsum(np.random.randn(100) * 0.5))
>>> volume = pd.Series(np.random.randn(100) * 1000)
>>> lam = kyle_lambda(prices, volume, window=20)
>>> len(lam) == 100
True
See Also:
amihud_illiquidity: Simpler illiquidity proxy (no signed volume needed).
lambda_kyle_rolling: Kyle's lambda with confidence intervals.
"""
prices = coerce_series(prices, "prices")
volume = coerce_series(volume, "volume")
delta_p = prices.diff()
# Rolling OLS: lambda = cov(dp, v) / var(v)
cov_pv = delta_p.rolling(window).cov(volume)
var_v = volume.rolling(window).var()
lam = cov_pv / var_v
lam = lam.replace([np.inf, -np.inf], np.nan)
lam.name = "kyle_lambda"
return lam
[docs]
def roll_spread(prices: pd.Series) -> float:
"""Roll (1984) implied bid-ask spread from serial covariance.
Estimates the effective spread from the negative first-order
autocovariance of price changes: spread = 2 * sqrt(-cov).
Parameters:
prices: Price series.
Returns:
Estimated implied spread. Returns *NaN* if the serial
covariance is non-negative (model assumption violated).
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> # Simulate trade prices with bid-ask bounce
>>> mid = 100 + np.cumsum(np.random.randn(500) * 0.01)
>>> bounce = np.random.choice([-0.05, 0.05], size=500)
>>> prices = pd.Series(mid + bounce)
>>> spread = roll_spread(prices)
>>> spread > 0 or np.isnan(spread) # positive spread or NaN
True
See Also:
effective_spread: Direct spread from trade and quote data.
corwin_schultz_spread: High-low spread estimator (OHLC data).
"""
prices = coerce_series(prices, "prices")
dp = prices.diff().dropna()
cov = np.cov(dp.values[:-1], dp.values[1:])[0, 1]
if cov >= 0:
return np.nan
return 2.0 * np.sqrt(-cov)
[docs]
def effective_spread(
trade_prices: pd.Series | NDArray[np.floating],
midpoints: pd.Series | NDArray[np.floating],
) -> pd.Series | NDArray[np.floating]:
"""Effective bid-ask spread: 2 * |trade_price - midpoint|.
Parameters:
trade_prices: Executed trade prices.
midpoints: Prevailing bid-ask midpoints at time of each trade.
Returns:
Per-trade effective spread, same type as the inputs.
Example:
>>> import pandas as pd, numpy as np
>>> trades = pd.Series([100.05, 99.95, 100.03])
>>> mids = pd.Series([100.0, 100.0, 100.0])
>>> spreads = effective_spread(trades, mids)
>>> float(spreads.iloc[0])
0.1
See Also:
realized_spread: Post-trade spread (adverse selection component).
roll_spread: Implied spread from price autocovariance.
"""
trade_prices = coerce_series(trade_prices, "trade_prices")
midpoints = coerce_series(midpoints, "midpoints")
return 2.0 * np.abs(trade_prices - midpoints)
[docs]
def realized_spread(
trade_prices: pd.Series,
midpoints: pd.Series,
delay: int = 5,
) -> pd.Series:
"""Realized spread incorporating a post-trade midpoint delay.
Measures the revenue to the liquidity provider:
``2 * direction * (trade_price - midpoint_{t+delay})``.
Parameters:
trade_prices: Executed trade prices.
midpoints: Mid-quote series aligned to trades.
delay: Number of observations to shift the midpoint forward.
Returns:
Per-trade realized spread series (NaN for the last *delay* rows).
Example:
>>> import pandas as pd, numpy as np
>>> trades = pd.Series([100.05, 99.95, 100.03, 100.01, 99.98])
>>> mids = pd.Series([100.0, 100.0, 100.0, 100.0, 100.0])
>>> rs = realized_spread(trades, mids, delay=2)
>>> len(rs) == 5
True
See Also:
effective_spread: Total execution cost (before adverse selection).
spread_decomposition: Full Huang-Stoll decomposition.
"""
trade_prices = coerce_series(trade_prices, "trade_prices")
midpoints = coerce_series(midpoints, "midpoints")
direction = np.sign(trade_prices - midpoints)
future_mid = midpoints.shift(-delay)
return 2.0 * direction * (trade_prices - future_mid)
[docs]
def price_impact(
trade_prices: pd.Series,
volume: pd.Series,
direction: pd.Series,
) -> pd.Series:
"""Permanent price impact per trade.
Computed as ``direction * (midpoint_{t+1} - midpoint_t) / volume``,
approximated here via successive trade prices.
Parameters:
trade_prices: Executed trade prices.
volume: Volume for each trade.
direction: Trade direction indicator (+1 buy, -1 sell).
Returns:
Per-trade permanent price impact series.
Example:
>>> import pandas as pd, numpy as np
>>> trades = pd.Series([100.0, 100.05, 100.10, 100.08])
>>> vol = pd.Series([1000, 2000, 1500, 1800])
>>> direction = pd.Series([1, 1, -1, 1])
>>> impact = price_impact(trades, vol, direction)
>>> len(impact) == 4
True
See Also:
kyle_lambda: Aggregate price impact coefficient.
wraquant.microstructure.market_quality.price_impact_regression:
Permanent vs. temporary impact decomposition.
"""
trade_prices = coerce_series(trade_prices, "trade_prices")
volume = coerce_series(volume, "volume")
direction = coerce_series(direction, "direction")
dp = trade_prices.diff().shift(-1)
impact = direction * dp / volume
impact = impact.replace([np.inf, -np.inf], np.nan)
impact.name = "price_impact"
return impact
[docs]
def turnover_ratio(
volume: pd.Series,
shares_outstanding: pd.Series | float,
) -> pd.Series:
"""Turnover ratio: volume / shares outstanding.
Parameters:
volume: Daily trading volume.
shares_outstanding: Total shares outstanding (scalar or series).
Returns:
Daily turnover ratio. Higher values indicate more active trading.
Example:
>>> import pandas as pd
>>> volume = pd.Series([1e6, 1.5e6, 0.8e6])
>>> ratio = turnover_ratio(volume, shares_outstanding=100e6)
>>> float(ratio.iloc[0])
0.01
See Also:
amihud_illiquidity: Price-impact-based liquidity measure.
"""
volume = coerce_series(volume, "volume")
ratio = volume / shares_outstanding
ratio.name = "turnover_ratio"
return ratio
# ---------------------------------------------------------------------------
# Enhanced liquidity analytics
# ---------------------------------------------------------------------------
[docs]
def corwin_schultz_spread(
high: pd.Series,
low: pd.Series,
window: int = 1,
) -> pd.Series:
"""Corwin & Schultz (2012) high-low spread estimator.
Estimates the effective bid-ask spread from consecutive daily high and
low prices. The key insight is that daily high prices are almost always
buyer-initiated (at the ask) while daily lows are seller-initiated (at
the bid). The ratio of high-to-low therefore captures both volatility
*and* the spread. By comparing single-day and two-day high-low ranges
the method disentangles the two components.
**When to use**: When only daily OHLC data is available and you need a
spread estimate. More robust than the Roll (1984) estimator because it
does not require negative serial covariance and performs better in the
presence of stale prices.
**Interpretation**: Output is in price units (same scale as the input).
Values typically range from 0 (perfectly liquid) to several percent of
price for illiquid stocks. Negative estimates are floored at zero
(model assumption violated, usually when volatility overwhelms spread).
Parameters:
high: Daily high prices.
low: Daily low prices.
window: Averaging window for the spread estimate. ``window=1``
returns the raw daily estimate.
Returns:
Estimated bid-ask spread series, floored at zero.
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> close = pd.Series(100 + np.cumsum(np.random.randn(100) * 0.5))
>>> high = close + np.abs(np.random.randn(100)) * 0.3
>>> low = close - np.abs(np.random.randn(100)) * 0.3
>>> spread = corwin_schultz_spread(high, low)
>>> (spread >= 0).all()
True
References:
Corwin, S. A. & Schultz, P. (2012). "A Simple Way to Estimate
Bid-Ask Spreads from Daily High and Low Prices." *Journal of
Finance*, 67(2), 719-760.
See Also:
roll_spread: Implied spread from trade prices only.
effective_spread: Direct spread from trade and quote data.
"""
high = coerce_series(high, "high")
low = coerce_series(low, "low")
# Natural log of high/low ratio, squared
ln_hl = np.log(high / low)
beta = ln_hl ** 2
# Sum of beta over two consecutive days
beta_sum = beta + beta.shift(1)
# Two-day high-low range
high_2d = pd.concat([high, high.shift(1)], axis=1).max(axis=1)
low_2d = pd.concat([low, low.shift(1)], axis=1).min(axis=1)
gamma = np.log(high_2d / low_2d) ** 2
# Corwin-Schultz alpha and spread
# alpha = (sqrt(2*beta) - sqrt(beta)) / (3 - 2*sqrt(2))
# - sqrt(gamma / (3 - 2*sqrt(2)))
k = 3.0 - 2.0 * np.sqrt(2.0)
alpha = (np.sqrt(2.0 * beta_sum) - np.sqrt(beta_sum)) / k - np.sqrt(gamma / k)
# S = 2 * (e^alpha - 1) / (1 + e^alpha)
exp_alpha = np.exp(alpha)
spread = 2.0 * (exp_alpha - 1.0) / (1.0 + exp_alpha)
# Floor at zero -- negative estimates are artefacts
spread = spread.clip(lower=0.0)
if window > 1:
spread = spread.rolling(window).mean()
spread.name = "corwin_schultz_spread"
return spread
[docs]
def closing_quoted_spread(
bid_close: pd.Series,
ask_close: pd.Series,
) -> pd.Series:
"""Quoted bid-ask spread at the market close.
The closing spread is particularly relevant for investors who trade at or
near the close (e.g., mutual fund NAV calculations, index rebalancing,
MOC orders). It also serves as a simple daily liquidity proxy when
intraday data is unavailable.
**When to use**: When analyzing execution costs for daily-frequency
traders, evaluating end-of-day liquidity conditions, or constructing a
daily spread time series from closing quote data.
**Interpretation**: Narrower spreads indicate better end-of-day
liquidity. Spread widening at the close often precedes periods of
higher volatility or information events (e.g., earnings releases).
Parameters:
bid_close: Best bid price at market close.
ask_close: Best ask price at market close.
Returns:
Closing quoted spread series (ask - bid), in price units.
Example:
>>> import pandas as pd
>>> bid = pd.Series([99.90, 99.85, 99.95])
>>> ask = pd.Series([100.10, 100.15, 100.05])
>>> spread = closing_quoted_spread(bid, ask)
>>> float(spread.iloc[0])
0.2
References:
Chordia, T., Roll, R. & Subrahmanyam, A. (2001). "Market Liquidity
and Trading Activity." *Journal of Finance*, 56(2), 501-530.
See Also:
effective_spread: Execution-weighted spread measure.
relative_spread: Spread normalized by midpoint.
"""
bid_close = coerce_series(bid_close, "bid_close")
ask_close = coerce_series(ask_close, "ask_close")
spread = ask_close - bid_close
spread.name = "closing_quoted_spread"
return spread
[docs]
def depth_imbalance(
bid_depth: pd.Series | NDArray[np.floating],
ask_depth: pd.Series | NDArray[np.floating],
) -> pd.Series | NDArray[np.floating]:
"""Order book depth imbalance.
Computes ``(bid_depth - ask_depth) / (bid_depth + ask_depth)`` to
measure the directional imbalance in resting limit order volume.
**When to use**: For real-time assessment of supply-demand imbalance in
the limit order book. Commonly used as a short-horizon return predictor
in high-frequency strategies.
**Interpretation**:
- **+1**: All depth is on the bid side (strong buying interest,
bullish signal).
- **-1**: All depth is on the ask side (strong selling interest,
bearish signal).
- **0**: Balanced book.
Values persistently above +0.3 or below -0.3 often indicate directional
pressure that leads to price movement in the direction of the deeper
side.
Parameters:
bid_depth: Total volume at the best bid (or top-N bid levels).
ask_depth: Total volume at the best ask (or top-N ask levels).
Returns:
Depth imbalance in [-1, 1].
Example:
>>> import pandas as pd
>>> bid_depth = pd.Series([5000, 3000, 4000])
>>> ask_depth = pd.Series([3000, 5000, 4000])
>>> imb = depth_imbalance(bid_depth, ask_depth)
>>> float(imb.iloc[0]) # more bids than asks -> positive
0.25
References:
Cao, C., Hansch, O. & Wang, X. (2009). "The Information Content
of an Open Limit-Order Book." *Journal of Futures Markets*, 29(1),
16-41.
See Also:
wraquant.microstructure.toxicity.order_flow_imbalance:
Volume-based imbalance measure.
wraquant.microstructure.market_quality.depth: Total market depth.
"""
is_series = isinstance(bid_depth, pd.Series)
bid_series = coerce_series(bid_depth, "bid_depth")
ask_series = coerce_series(ask_depth, "ask_depth")
bid_arr = bid_series.to_numpy(dtype=np.float64)
ask_arr = ask_series.to_numpy(dtype=np.float64)
total = bid_arr + ask_arr
imbalance = np.where(total > 0, (bid_arr - ask_arr) / total, 0.0)
if is_series:
return pd.Series(imbalance, index=bid_series.index, name="depth_imbalance")
return imbalance
[docs]
def lambda_kyle_rolling(
prices: pd.Series,
volume: pd.Series,
window: int = 20,
) -> pd.DataFrame:
"""Rolling Kyle's lambda with confidence intervals.
Extends :func:`kyle_lambda` by computing standard errors from the
rolling OLS regression, yielding point estimates along with 95%
confidence bounds. This is essential for determining whether the
estimated price impact is statistically significant at each point in
time.
**When to use**: When you need not just the *level* of price impact but
also its *precision*. Useful for detecting regime changes in market
liquidity -- a significant widening of the confidence interval suggests
structural uncertainty about the price impact coefficient.
**Interpretation**: A positive lambda indicates that buy-initiated
volume pushes prices up (and sell-initiated pushes down), consistent
with the Kyle (1985) model. Lambda values close to zero (or with
confidence intervals spanning zero) suggest limited permanent price
impact, i.e., a liquid market.
Parameters:
prices: Price series.
volume: Signed volume series (positive for buys, negative for
sells).
window: Rolling regression window (must be >= 5).
Returns:
DataFrame with columns ``'lambda'``, ``'std_err'``,
``'ci_lower'``, ``'ci_upper'`` (95% confidence interval).
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> prices = pd.Series(100 + np.cumsum(np.random.randn(50) * 0.1))
>>> volume = pd.Series(np.random.randn(50) * 1000)
>>> result = lambda_kyle_rolling(prices, volume, window=20)
>>> list(result.columns)
['lambda', 'std_err', 'ci_lower', 'ci_upper']
References:
Kyle, A. S. (1985). "Continuous Auctions and Insider Trading."
*Econometrica*, 53(6), 1315-1335.
See Also:
kyle_lambda: Simple point estimate without confidence intervals.
amihud_rolling: Rolling Amihud illiquidity ratio.
"""
prices = coerce_series(prices, "prices")
volume = coerce_series(volume, "volume")
delta_p = prices.diff()
lam = pd.Series(np.nan, index=prices.index, name="lambda")
se = pd.Series(np.nan, index=prices.index, name="std_err")
for i in range(window, len(prices)):
y = delta_p.iloc[i - window + 1 : i + 1].values
x = volume.iloc[i - window + 1 : i + 1].values
# Skip windows with NaN
mask = ~(np.isnan(y) | np.isnan(x))
if mask.sum() < 5:
continue
y_clean = y[mask]
x_clean = x[mask]
n = len(y_clean)
x_bar = np.mean(x_clean)
var_x = np.sum((x_clean - x_bar) ** 2)
if var_x < 1e-15:
continue
beta = np.sum((x_clean - x_bar) * (y_clean - np.mean(y_clean))) / var_x
residuals = y_clean - (np.mean(y_clean) - beta * x_bar + beta * x_clean)
s2 = np.sum(residuals ** 2) / max(n - 2, 1)
std_err = np.sqrt(s2 / var_x)
lam.iloc[i] = beta
se.iloc[i] = std_err
ci_lower = lam - 1.96 * se
ci_upper = lam + 1.96 * se
return pd.DataFrame(
{"lambda": lam, "std_err": se, "ci_lower": ci_lower, "ci_upper": ci_upper},
index=prices.index,
)
[docs]
def amihud_rolling(
returns: pd.Series,
volume: pd.Series,
window: int = 21,
normalize: bool = True,
) -> pd.Series:
"""Rolling Amihud (2002) illiquidity ratio with proper normalization.
Computes the Amihud ratio over a rolling window and optionally
normalizes by the cross-sectional or time-series mean so that values
are comparable across different assets and time periods.
**When to use**: For tracking how an individual asset's liquidity
evolves over time. The normalization makes the measure comparable
across assets with different price levels and trading volumes.
**Interpretation**: Higher values indicate less liquidity (more price
impact per unit of trading volume). Sudden spikes often correspond
to liquidity crises or market stress events. Typical values for
large-cap US stocks are 1e-11 to 1e-9 (unnormalized).
Parameters:
returns: Asset return series.
volume: Dollar volume series (price * shares traded).
window: Rolling window size (default 21 for ~1 month of trading
days).
normalize: If *True*, divide each rolling value by the full-sample
mean so the time-series average is 1.0.
Returns:
Rolling Amihud illiquidity series.
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> returns = pd.Series(np.random.randn(100) * 0.01)
>>> volume = pd.Series(np.random.uniform(1e6, 5e6, 100))
>>> illiq = amihud_rolling(returns, volume, window=21)
>>> illiq.name
'amihud_rolling'
References:
Amihud, Y. (2002). "Illiquidity and Stock Returns: Cross-Section
and Time-Series Effects." *Journal of Financial Markets*, 5(1),
31-56.
See Also:
amihud_illiquidity: Static (full-sample) Amihud ratio.
liquidity_commonality: How much liquidity co-moves with the market.
"""
returns = coerce_series(returns, "returns")
volume = coerce_series(volume, "volume")
ratio = np.abs(returns) / volume
ratio = ratio.replace([np.inf, -np.inf], np.nan)
rolling = ratio.rolling(window).mean()
if normalize:
full_mean = np.nanmean(rolling)
if full_mean > 0:
rolling = rolling / full_mean
rolling.name = "amihud_rolling"
return rolling
[docs]
def liquidity_commonality(
asset_illiquidity: pd.Series,
market_illiquidity: pd.Series,
window: int = 60,
) -> pd.Series:
"""Commonality in liquidity (Chordia, Roll & Subrahmanyam, 2000).
Measures how much an individual asset's liquidity co-moves with
market-wide liquidity. The commonality coefficient is estimated via
rolling regressions of changes in the asset's illiquidity measure on
changes in the market-wide illiquidity measure.
**When to use**: For assessing systematic liquidity risk. Assets with
high commonality become illiquid precisely when the entire market
becomes illiquid -- an undesirable property that investors demand a
premium for bearing.
**Interpretation**: The output is the rolling R-squared from the
regression. Higher values (closer to 1) indicate stronger co-movement
with market liquidity. Values above 0.3 suggest meaningful systematic
liquidity risk. Most large-cap stocks show commonality R-squared of
0.05-0.20.
Parameters:
asset_illiquidity: Individual asset's illiquidity measure (e.g.,
Amihud ratio, effective spread) as a time series.
market_illiquidity: Market-wide illiquidity aggregate (e.g.,
equal-weighted average Amihud ratio across all stocks).
window: Rolling regression window (default 60 for ~3 months).
Returns:
Rolling R-squared of the commonality regression.
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> asset = pd.Series(np.random.randn(200).cumsum())
>>> market = pd.Series(np.random.randn(200).cumsum())
>>> r2 = liquidity_commonality(asset, market, window=60)
>>> r2.name
'liquidity_commonality'
References:
Chordia, T., Roll, R. & Subrahmanyam, A. (2000). "Commonality in
Liquidity." *Journal of Financial Economics*, 56(1), 3-28.
See Also:
amihud_rolling: Generate the illiquidity input for this function.
"""
asset_illiquidity = coerce_series(asset_illiquidity, "asset_illiquidity")
market_illiquidity = coerce_series(market_illiquidity, "market_illiquidity")
d_asset = asset_illiquidity.diff()
d_market = market_illiquidity.diff()
r_squared = pd.Series(np.nan, index=asset_illiquidity.index, name="liquidity_commonality")
for i in range(window, len(d_asset)):
y = d_asset.iloc[i - window + 1 : i + 1].values
x = d_market.iloc[i - window + 1 : i + 1].values
mask = ~(np.isnan(y) | np.isnan(x))
if mask.sum() < 5:
continue
y_c = y[mask]
x_c = x[mask]
x_bar = np.mean(x_c)
y_bar = np.mean(y_c)
ss_xx = np.sum((x_c - x_bar) ** 2)
ss_yy = np.sum((y_c - y_bar) ** 2)
if ss_xx < 1e-15 or ss_yy < 1e-15:
r_squared.iloc[i] = 0.0
continue
ss_xy = np.sum((x_c - x_bar) * (y_c - y_bar))
r2 = (ss_xy ** 2) / (ss_xx * ss_yy)
r_squared.iloc[i] = r2
return r_squared
[docs]
def spread_decomposition(
trade_prices: pd.Series,
bid: pd.Series,
ask: pd.Series,
direction: pd.Series,
delay: int = 5,
) -> dict[str, float]:
"""Huang-Stoll (1997) three-way spread decomposition.
Decomposes the effective spread into three economically distinct
components:
1. **Adverse selection**: compensation for trading against informed
traders who possess private information. This portion of the spread
is a *permanent* price impact -- the midpoint moves against the
liquidity provider after the trade.
2. **Order processing**: compensation for the mechanical costs of
market-making (exchange fees, technology, labor).
3. **Inventory holding**: compensation for the risk of holding an
unbalanced inventory.
**When to use**: For understanding *why* spreads are wide. If adverse
selection dominates, the market has significant information asymmetry.
If order processing dominates, the market is structurally costly.
**Interpretation**:
- Adverse selection fraction > 0.5 indicates a market dominated by
informed trading (e.g., single-stock options, small-cap equities
before earnings).
- Order processing fraction > 0.5 indicates a market where mechanical
costs dominate (e.g., bond markets, low-volatility large-cap
equities).
- Inventory fraction is typically the smallest component for equities
but can be large for less liquid instruments.
Parameters:
trade_prices: Executed trade prices.
bid: Best bid prices at time of each trade.
ask: Best ask prices at time of each trade.
direction: Trade direction indicator (+1 buy, -1 sell).
delay: Number of observations to look ahead for measuring the
permanent price impact (default 5).
Returns:
Dictionary with keys:
- ``'adverse_selection'``: fraction of the spread due to
information asymmetry.
- ``'order_processing'``: fraction due to order handling costs.
- ``'inventory_holding'``: fraction due to inventory risk.
- ``'effective_spread_mean'``: average effective spread.
Example:
>>> import pandas as pd, numpy as np
>>> np.random.seed(42)
>>> n = 200
>>> mid = 100 + np.cumsum(np.random.randn(n) * 0.01)
>>> spread_half = 0.05
>>> bid = pd.Series(mid - spread_half)
>>> ask = pd.Series(mid + spread_half)
>>> direction = pd.Series(np.random.choice([1, -1], n))
>>> trades = pd.Series(np.where(direction > 0, ask, bid))
>>> result = spread_decomposition(trades, bid, ask, direction)
>>> 0 <= result['adverse_selection'] <= 1
True
References:
Huang, R. D. & Stoll, H. R. (1997). "The Components of the
Bid-Ask Spread: A General Approach." *Review of Financial Studies*,
10(4), 995-1034.
See Also:
effective_spread: Total execution cost measure.
realized_spread: Liquidity provider's revenue component.
"""
trade_prices = coerce_series(trade_prices, "trade_prices")
bid = coerce_series(bid, "bid")
ask = coerce_series(ask, "ask")
direction = coerce_series(direction, "direction")
mid = (bid + ask) / 2.0
# Effective half-spread per trade
eff_half = direction * (trade_prices - mid)
# Permanent component: midpoint revision in the direction of the trade
mid_future = mid.shift(-delay)
permanent = direction * (mid_future - mid)
# Drop NaN rows at the end
valid = ~(eff_half.isna() | permanent.isna())
eff_valid = eff_half[valid]
perm_valid = permanent[valid]
mean_eff = float(np.nanmean(eff_valid))
mean_perm = float(np.nanmean(perm_valid))
if mean_eff <= 0:
# Degenerate case
return {
"adverse_selection": float("nan"),
"order_processing": float("nan"),
"inventory_holding": float("nan"),
"effective_spread_mean": float(mean_eff * 2.0),
}
# Adverse selection fraction
adverse_frac = np.clip(mean_perm / mean_eff, 0.0, 1.0)
# Realized spread = transitory component (order processing + inventory)
transitory_frac = 1.0 - adverse_frac
# Split transitory into order processing and inventory via serial
# correlation of trade direction (proxy for inventory management)
dir_arr = direction[valid].values.astype(np.float64)
if len(dir_arr) > 1:
autocorr = np.corrcoef(dir_arr[:-1], dir_arr[1:])[0, 1]
if np.isnan(autocorr):
autocorr = 0.0
# Inventory fraction proportional to serial correlation of direction
inventory_share = np.clip(abs(autocorr), 0.0, 1.0)
else:
inventory_share = 0.0
inventory_frac = transitory_frac * inventory_share
processing_frac = transitory_frac * (1.0 - inventory_share)
return {
"adverse_selection": float(adverse_frac),
"order_processing": float(processing_frac),
"inventory_holding": float(inventory_frac),
"effective_spread_mean": float(mean_eff * 2.0),
}