Source code for wraquant.io.export

"""Export and reporting utilities.

Functions for converting financial data into various output formats
suitable for reporting, serialization, and display.
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd

from wraquant.core._coerce import coerce_series

__all__ = [
    "to_tearsheet",
    "to_json",
    "to_dict",
    "format_table",
]



[docs]
def to_tearsheet(
    returns: pd.Series,
    benchmark: pd.Series | None = None,
    output_path: str | Path | None = None,
) -> dict[str, Any]:
    """Generate a performance tearsheet from a return series.

    Computes the key performance and risk metrics that every portfolio
    analysis should include, and returns them as a serialisable
    dictionary.  Optionally writes the result to a JSON file for
    reporting or downstream consumption.

    Metrics computed: total return, annualized return, annualized
    volatility, Sharpe ratio, maximum drawdown, and Calmar ratio.
    When a benchmark is provided, also computes correlation and
    information ratio.

    Parameters:
        returns (pd.Series): Series of portfolio returns (simple, not
            cumulative), indexed by datetime.
        benchmark (pd.Series | None): Optional benchmark return series
            for relative metrics.  When provided, the two series are
            aligned by index.
        output_path (str | Path | None): If provided, write the
            tearsheet dict to this JSON file.

    Returns:
        dict[str, Any]: Dictionary with keys ``total_return``,
            ``annualized_return``, ``annualized_volatility``,
            ``sharpe_ratio``, ``max_drawdown``, ``calmar_ratio``,
            ``n_periods``, and optionally ``benchmark_correlation``
            and ``information_ratio``.

    Example:
        >>> import pandas as pd, numpy as np
        >>> returns = pd.Series(np.random.randn(252) * 0.01)
        >>> sheet = to_tearsheet(returns)
        >>> "sharpe_ratio" in sheet
        True

    See Also:
        to_json: Serialize any data to JSON.
        format_table: Pretty-print a DataFrame.
    """
    returns = coerce_series(returns, name="returns").dropna()
    n_periods = len(returns)

    # Assume 252 trading days per year for annualization
    trading_days = 252

    total_return = float((1 + returns).prod() - 1)

    n_years = n_periods / trading_days
    annualized_return = (
        float((1 + total_return) ** (1 / n_years) - 1) if n_years > 0 else 0.0
    )

    annualized_vol = float(returns.std() * np.sqrt(trading_days))

    sharpe = annualized_return / annualized_vol if annualized_vol != 0 else 0.0

    # Max drawdown
    cumulative = (1 + returns).cumprod()
    running_max = cumulative.cummax()
    drawdown = (cumulative - running_max) / running_max
    max_drawdown = float(drawdown.min())

    calmar = annualized_return / abs(max_drawdown) if max_drawdown != 0 else 0.0

    result: dict[str, Any] = {
        "total_return": total_return,
        "annualized_return": annualized_return,
        "annualized_volatility": annualized_vol,
        "sharpe_ratio": sharpe,
        "max_drawdown": max_drawdown,
        "calmar_ratio": calmar,
        "n_periods": n_periods,
    }

    if benchmark is not None:
        benchmark = coerce_series(benchmark, name="benchmark").dropna()
        # Align the two series
        aligned_returns, aligned_bench = returns.align(benchmark, join="inner")
        if len(aligned_returns) > 1:
            result["benchmark_correlation"] = float(aligned_returns.corr(aligned_bench))
            excess = aligned_returns - aligned_bench
            tracking_error = float(excess.std() * np.sqrt(trading_days))
            info_ratio = (
                float(excess.mean() * trading_days) / tracking_error
                if tracking_error != 0
                else 0.0
            )
            result["information_ratio"] = info_ratio

    if output_path is not None:
        output_path = Path(output_path)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        output_path.write_text(json.dumps(result, indent=2, default=str))

    return result




[docs]
def to_json(
    data: pd.DataFrame | pd.Series | dict[str, Any],
    path: str | Path | None = None,
    orient: str = "records",
) -> str | None:
    """Export data to JSON format.

    Handles DataFrames, Series, and plain dictionaries.  When a file
    path is provided, the JSON is written to disk; otherwise the JSON
    string is returned for further use (e.g., sending via an API).

    Parameters:
        data (pd.DataFrame | pd.Series | dict): Data to serialize.
            DataFrames and Series use the pandas JSON serializer; plain
            dicts use the stdlib ``json`` module.
        path (str | Path | None): If provided, write the JSON string
            to this file and return ``None``.  Otherwise, return the
            JSON string.
        orient (str): Orientation for :meth:`pandas.DataFrame.to_json`
            (e.g., ``'records'``, ``'index'``, ``'columns'``).

    Returns:
        str | None: JSON string when *path* is ``None``; otherwise
            ``None``.

    Example:
        >>> json_str = to_json({"sharpe": 1.2, "max_dd": -0.15})
        >>> isinstance(json_str, str)
        True

    See Also:
        to_dict: Convert to a nested dictionary.
        to_tearsheet: Generate a full performance report.
    """
    if isinstance(data, (pd.DataFrame, pd.Series)):
        json_str = data.to_json(orient=orient, date_format="iso", indent=2)
    else:
        json_str = json.dumps(data, indent=2, default=str)

    if path is not None:
        path = Path(path)
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(json_str)
        return None

    return json_str




[docs]
def to_dict(
    data: pd.DataFrame | pd.Series,
) -> dict[str, Any]:
    """Convert a DataFrame or Series to a nested dictionary.

    For a DataFrame, produces ``{column: {index: value, ...}, ...}``.
    For a Series, produces ``{index: value, ...}``.  Useful for
    serialization, API responses, or interop with non-pandas code.

    Parameters:
        data (pd.DataFrame | pd.Series): DataFrame or Series to
            convert.

    Returns:
        dict[str, Any]: Nested dictionary representation of the data.

    Example:
        >>> import pandas as pd
        >>> s = pd.Series([1, 2, 3], index=["a", "b", "c"])
        >>> to_dict(s)
        {'a': 1, 'b': 2, 'c': 3}

    See Also:
        to_json: Serialize to JSON string.
    """
    if isinstance(data, pd.Series):
        return data.to_dict()
    return data.to_dict()




[docs]
def format_table(
    data: pd.DataFrame,
    precision: int = 4,
    pct_columns: list[str] | None = None,
) -> str:
    """Format a DataFrame as a pretty-printed table string.

    Produces a human-readable text table suitable for console output,
    log files, or email reports.  Numeric columns are formatted to a
    fixed number of decimal places, and designated columns are
    displayed as percentages.

    Parameters:
        data (pd.DataFrame): DataFrame to format.
        precision (int): Number of decimal places for numeric columns
            (default 4).
        pct_columns (list[str] | None): Column names to format as
            percentages (values are multiplied by 100 and suffixed
            with ``%``).

    Returns:
        str: String representation of the formatted table.

    Example:
        >>> import pandas as pd
        >>> df = pd.DataFrame({"return": [0.05], "vol": [0.15]})
        >>> print(format_table(df, pct_columns=["return", "vol"]))
        ...
    """
    formatted = data.copy()

    if pct_columns is not None:
        for col in pct_columns:
            if col in formatted.columns:
                formatted[col] = formatted[col].apply(
                    lambda x: f"{x * 100:.{precision}f}%" if pd.notna(x) else ""
                )

    # Format remaining numeric columns
    for col in formatted.columns:
        if col in (pct_columns or []):
            continue
        if pd.api.types.is_numeric_dtype(formatted[col]):
            formatted[col] = formatted[col].apply(
                lambda x: f"{x:.{precision}f}" if pd.notna(x) else ""
            )

    return formatted.to_string()