Source code for wraquant.math.information

"""Information-theoretic measures for financial analysis."""

from __future__ import annotations

from typing import Callable

import numpy as np
from numpy.typing import ArrayLike

from wraquant.core._coerce import coerce_array

__all__ = [
    "fisher_information",
    "mutual_information",
    "transfer_entropy",
    "entropy",
    "kl_divergence",
    "conditional_entropy",
]


[docs] def fisher_information( log_likelihood_fn: Callable[..., float], params: ArrayLike, dx: float = 1e-5, ) -> np.ndarray: """Numerical Fisher information matrix via second derivatives. Computes the negative of the Hessian of *log_likelihood_fn* evaluated at *params* using central finite differences. Parameters ---------- log_likelihood_fn : callable Function ``f(params) -> float`` returning the log-likelihood. params : array_like Parameter vector at which to evaluate the information matrix. dx : float, optional Finite-difference step size (default 1e-5). Returns ------- np.ndarray Fisher information matrix of shape ``(len(params), len(params))``. Larger diagonal entries indicate parameters that are more precisely estimable from the data. Example ------- >>> import numpy as np >>> from wraquant.math.information import fisher_information >>> # Log-likelihood of Normal(mu, sigma=1): -0.5 * sum((x - mu)^2) >>> data = np.array([1.0, 2.0, 3.0]) >>> ll_fn = lambda p: -0.5 * np.sum((data - p[0])**2) >>> fim = fisher_information(ll_fn, np.array([2.0])) >>> fim.shape (1, 1) >>> fim[0, 0] > 0 # positive definite True See Also -------- wraquant.math.numerical.finite_difference_hessian : General Hessian computation. """ params = coerce_array(params, name="params") n = len(params) fim = np.empty((n, n), dtype=float) for i in range(n): for j in range(i, n): # Central difference for second partial derivative p_pp = params.copy() p_pm = params.copy() p_mp = params.copy() p_mm = params.copy() p_pp[i] += dx p_pp[j] += dx p_pm[i] += dx p_pm[j] -= dx p_mp[i] -= dx p_mp[j] += dx p_mm[i] -= dx p_mm[j] -= dx d2 = ( log_likelihood_fn(p_pp) - log_likelihood_fn(p_pm) - log_likelihood_fn(p_mp) + log_likelihood_fn(p_mm) ) / (4.0 * dx * dx) # Fisher information = negative Hessian of log-likelihood fim[i, j] = -d2 fim[j, i] = -d2 return fim
[docs] def entropy( data: ArrayLike, bins: int = 20, method: str = "histogram", ) -> float: """Shannon entropy of a data series. Parameters ---------- data : array_like Input data (1-D). bins : int, optional Number of histogram bins (default 20). method : {'histogram'}, optional Discretisation method (default ``'histogram'``). Returns ------- float Shannon entropy in nats. Higher values indicate more dispersed (uncertain) distributions; lower values indicate concentrated distributions. Raises ------ ValueError If *method* is not recognised. Example ------- >>> import numpy as np >>> from wraquant.math.information import entropy >>> uniform = np.random.uniform(size=1000) >>> peaked = np.random.normal(0, 0.01, size=1000) >>> entropy(uniform) > entropy(peaked) True See Also -------- conditional_entropy : Entropy of X given Y. mutual_information : Shared information between two variables. wraquant.math.spectral.spectral_entropy : Entropy of the power spectrum. """ if method != "histogram": raise ValueError(f"Unknown method {method!r}; only 'histogram' is supported.") data = coerce_array(data, name="data") counts, _ = np.histogram(data, bins=bins) probs = counts / counts.sum() probs = probs[probs > 0] return float(-np.sum(probs * np.log(probs)))
[docs] def mutual_information( x: ArrayLike, y: ArrayLike, bins: int = 20, ) -> float: """Mutual information between two series (discretised). .. math:: I(X; Y) = H(X) + H(Y) - H(X, Y) Parameters ---------- x : array_like First data series. y : array_like Second data series. bins : int, optional Number of histogram bins per dimension (default 20). Returns ------- float Mutual information in nats (>= 0). Zero indicates independence; higher values indicate stronger dependence (linear or nonlinear). Example ------- >>> import numpy as np >>> from wraquant.math.information import mutual_information >>> rng = np.random.default_rng(42) >>> x = rng.standard_normal(1000) >>> y = x + rng.standard_normal(1000) * 0.1 # highly dependent >>> z = rng.standard_normal(1000) # independent >>> mutual_information(x, y) > mutual_information(x, z) True See Also -------- transfer_entropy : Directed (causal) information flow. conditional_entropy : H(X | Y) = H(X) - I(X; Y). """ x = coerce_array(x, name="x") y = coerce_array(y, name="y") # Joint histogram joint, _, _ = np.histogram2d(x, y, bins=bins) joint_prob = joint / joint.sum() # Marginals px = joint_prob.sum(axis=1) py = joint_prob.sum(axis=0) # MI = sum p(x,y) * log(p(x,y) / (p(x)*p(y))) mi = 0.0 for i in range(len(px)): for j in range(len(py)): if joint_prob[i, j] > 0 and px[i] > 0 and py[j] > 0: mi += joint_prob[i, j] * np.log(joint_prob[i, j] / (px[i] * py[j])) return float(max(mi, 0.0))
[docs] def transfer_entropy( source: ArrayLike, target: ArrayLike, lag: int = 1, bins: int = 10, ) -> float: r"""Transfer entropy from *source* to *target*. Measures the directional information flow from *source* to *target* beyond what *target*'s own past explains. .. math:: TE_{X \\to Y} = H(Y_t | Y_{t-k}) - H(Y_t | Y_{t-k}, X_{t-k}) Parameters ---------- source : array_like Source time series. target : array_like Target time series. lag : int, optional Lag order (default 1). bins : int, optional Number of histogram bins for discretisation (default 10). Returns ------- float Transfer entropy in nats (>= 0). Higher values indicate stronger directional information flow from *source* to *target*. Example ------- >>> import numpy as np >>> from wraquant.math.information import transfer_entropy >>> rng = np.random.default_rng(42) >>> x = rng.standard_normal(500) >>> y = np.concatenate([[0], x[:-1]]) + rng.standard_normal(500) * 0.1 >>> te_x_to_y = transfer_entropy(x, y, lag=1) >>> te_y_to_x = transfer_entropy(y, x, lag=1) >>> te_x_to_y > te_y_to_x # x drives y, not vice versa True Notes ----- Reference: Schreiber, T. (2000). "Measuring Information Transfer." *Physical Review Letters*, 85(2), 461-464. See Also -------- mutual_information : Symmetric (undirected) dependence measure. wraquant.math.network.granger_network : Linear Granger causality. """ source = coerce_array(source, name="source") target = coerce_array(target, name="target") n = min(len(source), len(target)) # Align: target_future, target_past, source_past target_future = target[lag:n] target_past = target[: n - lag] source_past = source[: n - lag] # Discretise def _digitize(arr: np.ndarray) -> np.ndarray: edges = np.linspace(arr.min() - 1e-12, arr.max() + 1e-12, bins + 1) return np.digitize(arr, edges[1:-1]) tf = _digitize(target_future) tp = _digitize(target_past) sp = _digitize(source_past) # H(target_future | target_past) - H(target_future | target_past, source_past) # = H(tf, tp) - H(tp) - H(tf, tp, sp) + H(tp, sp) def _h(*arrays: np.ndarray) -> float: """Joint entropy of integer-labelled arrays.""" combined = np.column_stack(arrays) _, counts = np.unique(combined, axis=0, return_counts=True) probs = counts / counts.sum() return float(-np.sum(probs * np.log(probs))) te = _h(tf, tp) - _h(tp) - _h(tf, tp, sp) + _h(tp, sp) return float(max(te, 0.0))
[docs] def kl_divergence( p: ArrayLike, q: ArrayLike, bins: int = 20, ) -> float: """KL divergence D_KL(P || Q) estimated from samples. Parameters ---------- p : array_like Samples from distribution P. q : array_like Samples from distribution Q. bins : int, optional Number of histogram bins (default 20). Returns ------- float KL divergence in nats (>= 0). Zero when P and Q are identical; larger values indicate greater distributional difference. Note: KL divergence is asymmetric -- D_KL(P||Q) != D_KL(Q||P). Example ------- >>> import numpy as np >>> from wraquant.math.information import kl_divergence >>> rng = np.random.default_rng(42) >>> p = rng.normal(0, 1, size=5000) >>> q = rng.normal(0.5, 1, size=5000) >>> kl_divergence(p, q) > 0 True See Also -------- entropy : Shannon entropy of a single distribution. mutual_information : Symmetric dependence measure. """ p_arr = coerce_array(p, name="p") q_arr = coerce_array(q, name="q") # Shared bin edges covering both distributions lo = min(p_arr.min(), q_arr.min()) hi = max(p_arr.max(), q_arr.max()) edges = np.linspace(lo, hi, bins + 1) p_counts, _ = np.histogram(p_arr, bins=edges) q_counts, _ = np.histogram(q_arr, bins=edges) # Convert to probabilities, add small epsilon for numerical stability eps = 1e-12 p_prob = p_counts / p_counts.sum() + eps q_prob = q_counts / q_counts.sum() + eps # Re-normalise after epsilon adjustment p_prob = p_prob / p_prob.sum() q_prob = q_prob / q_prob.sum() kl = float(np.sum(p_prob * np.log(p_prob / q_prob))) return max(kl, 0.0)
[docs] def conditional_entropy( x: ArrayLike, y: ArrayLike, bins: int = 20, ) -> float: """Conditional entropy H(X | Y). .. math:: H(X | Y) = H(X, Y) - H(Y) Parameters ---------- x : array_like First data series. y : array_like Second data series (the conditioning variable). bins : int, optional Number of histogram bins per dimension (default 20). Returns ------- float Conditional entropy in nats. Lower values mean *Y* is more informative about *X*. When H(X|Y) = 0, *X* is fully determined by *Y*. Example ------- >>> import numpy as np >>> from wraquant.math.information import conditional_entropy >>> rng = np.random.default_rng(42) >>> x = rng.standard_normal(1000) >>> y = x + rng.standard_normal(1000) * 0.1 # y almost determines x >>> h_x_given_y = conditional_entropy(x, y) >>> h_x_given_y < 1.0 # low because y is informative about x True See Also -------- entropy : Unconditional Shannon entropy H(X). mutual_information : I(X;Y) = H(X) - H(X|Y). """ x = coerce_array(x, name="x") y = coerce_array(y, name="y") # Joint entropy H(X, Y) joint, _, _ = np.histogram2d(x, y, bins=bins) joint_prob = joint / joint.sum() joint_prob = joint_prob[joint_prob > 0] h_xy = float(-np.sum(joint_prob * np.log(joint_prob))) # Marginal entropy H(Y) h_y = entropy(y, bins=bins) return float(max(h_xy - h_y, 0.0))