Source code for wraquant.news.sentiment

"""Sentiment analysis and news-based signal generation.

Provides functions for scoring news sentiment, building sentiment time series,
and generating discrete trading signals from textual data.  Uses FMP as the
primary news data source and includes a built-in keyword-based sentiment
scorer that requires no NLP dependencies.  When ``textblob`` or ``vaderSentiment``
are installed, those engines can be used for higher-quality scoring.

The sentiment pipeline is:

1. **Fetch** -- Pull headlines/articles via ``FMPClient``.
2. **Score** -- Assign each headline a score in [-1, +1].
3. **Aggregate** -- Combine scores with recency weighting.
4. **Signal** -- Convert the aggregate into a discrete trading signal.

References:
    - Tetlock (2007), "Giving Content to Investor Sentiment"
    - Loughran & McDonald (2011), "When Is a Liability Not a Liability?"
    - Hutto & Gilbert (2014), "VADER: A Parsimonious Rule-based Model for
      Sentiment Analysis of Social Media Text"
"""

from __future__ import annotations

import math
import re
from typing import Any, Sequence

import numpy as np
import pandas as pd

from wraquant._lazy import is_available
from wraquant.core.decorators import requires_extra

# ---------------------------------------------------------------------------
# Built-in keyword lexicon (Loughran-McDonald inspired)
# ---------------------------------------------------------------------------

_POSITIVE_WORDS: frozenset[str] = frozenset(
    {
        "beat",
        "beats",
        "exceeds",
        "exceeded",
        "surge",
        "surges",
        "surged",
        "rally",
        "rallies",
        "rallied",
        "gain",
        "gains",
        "gained",
        "profit",
        "profitable",
        "profitability",
        "upgrade",
        "upgrades",
        "upgraded",
        "outperform",
        "outperforms",
        "outperformed",
        "bullish",
        "optimistic",
        "strong",
        "stronger",
        "strongest",
        "record",
        "high",
        "higher",
        "highest",
        "growth",
        "growing",
        "grew",
        "expand",
        "expands",
        "expanded",
        "expansion",
        "positive",
        "upside",
        "breakout",
        "breakthrough",
        "boom",
        "booming",
        "recover",
        "recovers",
        "recovered",
        "recovery",
        "rebound",
        "rebounds",
        "robust",
        "solid",
        "impressive",
        "innovation",
        "innovative",
        "opportunity",
        "opportunities",
        "favorable",
        "success",
        "successful",
        "dividend",
        "buyback",
        "repurchase",
        "win",
        "wins",
        "won",
        "approval",
        "approved",
        "approves",
        "launch",
        "launches",
        "launched",
        "partnership",
        "collaboration",
        "acquisition",
        "momentum",
        "accelerate",
        "accelerated",
        "accelerating",
        "soar",
        "soars",
        "soared",
        "boost",
        "boosts",
        "boosted",
        "exceed",
        "top",
        "tops",
        "topped",
    }
)

_NEGATIVE_WORDS: frozenset[str] = frozenset(
    {
        "miss",
        "misses",
        "missed",
        "decline",
        "declines",
        "declined",
        "drop",
        "drops",
        "dropped",
        "fall",
        "falls",
        "fell",
        "loss",
        "losses",
        "losing",
        "downgrade",
        "downgrades",
        "downgraded",
        "underperform",
        "underperforms",
        "underperformed",
        "bearish",
        "pessimistic",
        "weak",
        "weaker",
        "weakest",
        "low",
        "lower",
        "lowest",
        "risk",
        "risks",
        "risky",
        "negative",
        "downside",
        "crash",
        "crashes",
        "crashed",
        "selloff",
        "sell-off",
        "recession",
        "recessionary",
        "contraction",
        "shrink",
        "shrinks",
        "shrunk",
        "bankruptcy",
        "bankrupt",
        "default",
        "defaults",
        "defaulted",
        "fraud",
        "fraudulent",
        "scandal",
        "investigation",
        "lawsuit",
        "litigation",
        "fine",
        "fined",
        "penalty",
        "warning",
        "warns",
        "warned",
        "cut",
        "cuts",
        "layoff",
        "layoffs",
        "restructuring",
        "impairment",
        "writedown",
        "write-down",
        "volatility",
        "volatile",
        "uncertainty",
        "uncertain",
        "concern",
        "concerns",
        "worried",
        "worry",
        "fear",
        "fears",
        "plunge",
        "plunges",
        "plunged",
        "slump",
        "slumps",
        "slumped",
        "tumble",
        "tumbles",
        "tumbled",
        "deficit",
        "debt",
        "overvalued",
        "bubble",
        "inflation",
        "inflationary",
        "tariff",
        "tariffs",
        "sanctions",
        "shutdown",
        "delay",
        "delays",
        "delayed",
        "disappointing",
        "disappointed",
        "disappoint",
    }
)

_NEGATION_WORDS: frozenset[str] = frozenset(
    {
        "not",
        "no",
        "never",
        "neither",
        "nor",
        "hardly",
        "barely",
        "scarcely",
        "doesn't",
        "don't",
        "didn't",
        "isn't",
        "aren't",
        "wasn't",
        "weren't",
        "won't",
        "wouldn't",
        "couldn't",
        "shouldn't",
    }
)

_INTENSIFIER_WORDS: frozenset[str] = frozenset(
    {
        "very",
        "extremely",
        "significantly",
        "substantially",
        "dramatically",
        "sharply",
        "strongly",
        "massively",
        "hugely",
        "remarkably",
    }
)

_WORD_RE = re.compile(r"[a-z'\-]+")


def _keyword_score(text: str) -> float:
    """Score a text using the built-in keyword lexicon.

    Uses Loughran-McDonald-inspired word lists with negation handling
    and intensity modifiers to produce a score in [-1, +1].

    Parameters:
        text: Raw text to score.

    Returns:
        Sentiment score in [-1.0, +1.0].
    """
    words = _WORD_RE.findall(text.lower())
    if not words:
        return 0.0

    score = 0.0
    negated = False
    intensified = False

    for word in words:
        if word in _NEGATION_WORDS:
            negated = True
            continue
        if word in _INTENSIFIER_WORDS:
            intensified = True
            continue

        base = 0.0
        if word in _POSITIVE_WORDS:
            base = 1.0
        elif word in _NEGATIVE_WORDS:
            base = -1.0

        if base != 0.0:
            if intensified:
                base *= 1.5
            if negated:
                base *= -0.75  # Negation partially flips, not full reversal
            score += base

        # Reset modifiers after consuming a sentiment word
        negated = False
        intensified = False

    # Normalize: divide by sqrt(word count) so longer texts don't dominate
    # but still benefit from repeated sentiment words
    normalized = score / math.sqrt(len(words))
    # Clamp to [-1, 1]
    return float(max(-1.0, min(1.0, normalized)))


def _vader_score(text: str) -> float:
    """Score text using VADER sentiment analyzer.

    Parameters:
        text: Raw text to score.

    Returns:
        VADER compound score in [-1.0, +1.0].
    """
    from vaderSentiment.vaderSentiment import (
        SentimentIntensityAnalyzer,  # type: ignore[import-untyped]
    )

    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(text)
    return float(scores["compound"])


def _textblob_score(text: str) -> float:
    """Score text using TextBlob sentiment analysis.

    Parameters:
        text: Raw text to score.

    Returns:
        TextBlob polarity score in [-1.0, +1.0].
    """
    from textblob import TextBlob  # type: ignore[import-untyped]

    blob = TextBlob(text)
    return float(blob.sentiment.polarity)


def _get_scorer(engine: str = "auto") -> tuple[str, Any]:
    """Resolve the sentiment scoring engine.

    Parameters:
        engine: One of ``"auto"``, ``"keyword"``, ``"vader"``,
            ``"textblob"``.  ``"auto"`` tries VADER first, then TextBlob,
            then falls back to the built-in keyword scorer.

    Returns:
        Tuple of (engine_name, scorer_function).

    Raises:
        ValueError: If the requested engine is not recognized or not
            installed.
    """
    if engine == "keyword":
        return "keyword", _keyword_score
    if engine == "vader":
        if not is_available("vaderSentiment"):
            msg = (
                "vaderSentiment is not installed.  "
                "Install it with: pip install vaderSentiment"
            )
            raise ValueError(msg)
        return "vader", _vader_score
    if engine == "textblob":
        if not is_available("textblob"):
            msg = "textblob is not installed.  " "Install it with: pip install textblob"
            raise ValueError(msg)
        return "textblob", _textblob_score
    if engine == "auto":
        if is_available("vaderSentiment"):
            return "vader", _vader_score
        if is_available("textblob"):
            return "textblob", _textblob_score
        return "keyword", _keyword_score

    msg = f"Unknown sentiment engine: {engine!r}.  Use 'auto', 'keyword', 'vader', or 'textblob'."
    raise ValueError(msg)


# ---------------------------------------------------------------------------
# Recency weighting
# ---------------------------------------------------------------------------


def _recency_weights(dates: pd.Series, half_life_days: float = 7.0) -> np.ndarray:
    """Compute exponential decay weights based on recency.

    Parameters:
        dates: Series of datetime values.
        half_life_days: Half-life for the exponential decay in days.

    Returns:
        Array of weights in (0, 1], most recent = 1.0.
    """
    if dates.empty:
        return np.array([])

    dates_dt = pd.to_datetime(dates, utc=True)
    most_recent = dates_dt.max()
    days_ago = (most_recent - dates_dt).dt.total_seconds() / 86400.0
    decay = np.log(2) / half_life_days
    weights = np.exp(-decay * days_ago.values)
    return weights.astype(float)


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


[docs] @requires_extra("market-data") def news_sentiment( symbol: str, limit: int = 50, *, engine: str = "auto", half_life_days: float = 7.0, include_press_releases: bool = True, ) -> dict[str, Any]: """Analyze sentiment of recent news for a stock. Fetches recent news headlines (and optionally press releases) from FMP, scores each headline using the specified sentiment engine, and computes aggregate statistics including recency-weighted sentiment and trend direction. The recency weighting uses exponential decay so that recent articles contribute more to the aggregate than older ones. The trend is determined by comparing first-half vs. second-half sentiment to detect whether coverage is improving or deteriorating. Parameters: symbol: Ticker symbol (e.g., ``"AAPL"``). limit: Maximum number of news articles to fetch. Higher values give a more robust sentiment estimate but include older news. engine: Sentiment scoring engine. ``"auto"`` tries VADER, then TextBlob, then falls back to the built-in keyword scorer. Options: ``"auto"``, ``"keyword"``, ``"vader"``, ``"textblob"``. half_life_days: Half-life for recency weighting in days. Default of 7 means a one-week-old article gets half the weight of today's article. include_press_releases: If True, also fetch press releases and include them in the analysis. Returns: Dictionary containing: - **symbol** (*str*) -- Ticker symbol. - **engine** (*str*) -- Sentiment engine used. - **article_count** (*int*) -- Total number of articles scored. - **articles** (*list[dict]*) -- List of dicts, each with keys ``title``, ``date``, ``source``, ``sentiment``, ``url``. - **aggregate** (*dict*) -- Aggregate statistics: - **mean** (*float*) -- Simple mean sentiment. - **weighted_mean** (*float*) -- Recency-weighted mean. - **median** (*float*) -- Median sentiment. - **std** (*float*) -- Standard deviation of scores. - **bullish_pct** (*float*) -- Fraction of positive articles. - **bearish_pct** (*float*) -- Fraction of negative articles. - **neutral_pct** (*float*) -- Fraction of neutral articles. - **trend** (*str*) -- ``"improving"``, ``"deteriorating"``, or ``"stable"`` based on first-half vs. second-half comparison. - **trend_delta** (*float*) -- Second-half mean minus first-half mean. - **news_volume** (*str*) -- ``"high"``, ``"medium"``, or ``"low"`` based on article count relative to limit. Example: >>> from wraquant.news.sentiment import news_sentiment >>> result = news_sentiment("AAPL", limit=30) >>> print(f"Weighted sentiment: {result['aggregate']['weighted_mean']:.3f}") >>> print(f"Trend: {result['trend']}") Notes: Reference: Tetlock (2007). "Giving Content to Investor Sentiment." *The Journal of Finance*, 62(3), 1139-1168. See Also: sentiment_timeseries: Build a daily time series of sentiment. sentiment_signal: Convert sentiment to a trading signal. """ from wraquant.data.providers.fmp import FMPClient client = FMPClient() engine_name, scorer = _get_scorer(engine) # Fetch news data news_df = client.stock_news(symbol, limit=limit) frames = [news_df] if include_press_releases: try: pr_df = client.press_releases(symbol, limit=max(10, limit // 3)) frames.append(pr_df) except Exception: # noqa: BLE001 pass # Press releases may not be available for all symbols combined = pd.concat(frames, ignore_index=True) if len(frames) > 1 else news_df if combined.empty: return { "symbol": symbol, "engine": engine_name, "article_count": 0, "articles": [], "aggregate": { "mean": 0.0, "weighted_mean": 0.0, "median": 0.0, "std": 0.0, "bullish_pct": 0.0, "bearish_pct": 0.0, "neutral_pct": 0.0, }, "trend": "stable", "trend_delta": 0.0, "news_volume": "low", } # Identify column names (FMP may vary) title_col = _resolve_col(combined, ["title", "headline", "text"]) date_col = _resolve_col(combined, ["publishedDate", "date", "published_date"]) source_col = _resolve_col(combined, ["site", "source", "publisher"]) url_col = _resolve_col(combined, ["url", "link"]) # Score each article titles = combined[title_col].fillna("").astype(str) scores = np.array([scorer(t) for t in titles], dtype=float) # Build articles list articles: list[dict[str, Any]] = [] for i in range(len(combined)): article: dict[str, Any] = { "title": titles.iloc[i], "sentiment": float(scores[i]), } if date_col: article["date"] = str(combined[date_col].iloc[i]) if source_col: article["source"] = str(combined[source_col].iloc[i]) if url_col: article["url"] = str(combined[url_col].iloc[i]) articles.append(article) # Aggregate statistics mean_score = float(np.mean(scores)) median_score = float(np.median(scores)) std_score = float(np.std(scores, ddof=1)) if len(scores) > 1 else 0.0 # Recency-weighted mean if date_col: weights = _recency_weights(combined[date_col], half_life_days) if len(weights) > 0 and weights.sum() > 0: weighted_mean = float(np.average(scores, weights=weights)) else: weighted_mean = mean_score else: weighted_mean = mean_score bullish_pct = float(np.mean(scores > 0.05)) bearish_pct = float(np.mean(scores < -0.05)) neutral_pct = 1.0 - bullish_pct - bearish_pct # Trend detection: split into halves chronologically n = len(scores) mid = n // 2 if mid > 0 and n - mid > 0: first_half_mean = float(np.mean(scores[:mid])) second_half_mean = float(np.mean(scores[mid:])) trend_delta = second_half_mean - first_half_mean if trend_delta > 0.1: trend = "improving" elif trend_delta < -0.1: trend = "deteriorating" else: trend = "stable" else: trend = "stable" trend_delta = 0.0 # News volume assessment if len(combined) >= limit * 0.8: news_volume = "high" elif len(combined) >= limit * 0.3: news_volume = "medium" else: news_volume = "low" return { "symbol": symbol, "engine": engine_name, "article_count": len(combined), "articles": articles, "aggregate": { "mean": mean_score, "weighted_mean": weighted_mean, "median": median_score, "std": std_score, "bullish_pct": bullish_pct, "bearish_pct": bearish_pct, "neutral_pct": neutral_pct, }, "trend": trend, "trend_delta": trend_delta, "news_volume": news_volume, }
[docs] @requires_extra("market-data") def sentiment_timeseries( symbol: str, days: int = 90, *, engine: str = "auto", resample: str = "D", ) -> pd.Series: """Build a daily (or custom frequency) sentiment time series. Fetches up to ``days`` worth of news for a symbol, scores each article, and resamples into a regular time series by averaging sentiment within each period. Missing days are forward-filled so the series can be used directly alongside price data. Parameters: symbol: Ticker symbol (e.g., ``"MSFT"``). days: Number of calendar days of history to request. FMP may return fewer articles than this span covers. engine: Sentiment scoring engine (see ``news_sentiment``). resample: Pandas resample frequency string. ``"D"`` for daily, ``"W"`` for weekly, ``"B"`` for business days. Returns: pd.Series with a DatetimeIndex and sentiment scores averaged per period. Index name is ``"date"``, series name is ``"sentiment"``. Example: >>> from wraquant.news.sentiment import sentiment_timeseries >>> ts = sentiment_timeseries("TSLA", days=30) >>> print(ts.tail()) See Also: news_sentiment: Detailed sentiment analysis for a single snapshot. sentiment_signal: Convert the time series to a signal. """ from wraquant.data.providers.fmp import FMPClient client = FMPClient() _, scorer = _get_scorer(engine) # Estimate limit: assume ~2 articles/day on average estimated_limit = max(50, days * 3) news_df = client.stock_news(symbol, limit=estimated_limit) if news_df.empty: idx = pd.date_range( end=pd.Timestamp.now(tz="UTC").normalize(), periods=days, freq="D", ) return pd.Series(0.0, index=idx, name="sentiment") date_col = _resolve_col(news_df, ["publishedDate", "date", "published_date"]) title_col = _resolve_col(news_df, ["title", "headline", "text"]) if not date_col or not title_col: msg = "News DataFrame missing required date or title columns." raise ValueError(msg) df = news_df[[date_col, title_col]].copy() df["date"] = pd.to_datetime(df[date_col], utc=True) df["sentiment"] = df[title_col].fillna("").astype(str).apply(scorer) # Filter to requested date range cutoff = pd.Timestamp.now(tz="UTC") - pd.Timedelta(days=days) df = df.loc[df["date"] >= cutoff] if df.empty: idx = pd.date_range( end=pd.Timestamp.now(tz="UTC").normalize(), periods=days, freq="D", ) return pd.Series(0.0, index=idx, name="sentiment") # Resample and fill df = df.set_index("date") resampled = df["sentiment"].resample(resample).mean() # Forward-fill gaps, then back-fill any leading NaNs resampled = resampled.ffill().bfill() resampled.index.name = "date" resampled.name = "sentiment" return resampled
[docs] @requires_extra("market-data") def sentiment_signal( symbol: str, threshold: float = 0.3, *, engine: str = "auto", half_life_days: float = 7.0, ) -> str: """Generate a discrete sentiment-based trading signal for a stock. Fetches recent news, computes the recency-weighted aggregate sentiment, and classifies it as bullish, bearish, or neutral based on the threshold. The signal logic is: - ``weighted_mean > threshold`` => ``"bullish"`` - ``weighted_mean < -threshold`` => ``"bearish"`` - Otherwise => ``"neutral"`` Parameters: symbol: Ticker symbol (e.g., ``"GOOG"``). threshold: Absolute threshold for signal classification. Lower values produce more signals (more sensitive); higher values filter out weak sentiment. Default of 0.3 is moderately conservative. engine: Sentiment scoring engine (see ``news_sentiment``). half_life_days: Half-life for recency weighting (see ``news_sentiment``). Returns: One of ``"bullish"``, ``"bearish"``, or ``"neutral"``. Example: >>> from wraquant.news.sentiment import sentiment_signal >>> signal = sentiment_signal("NVDA", threshold=0.2) >>> print(f"Sentiment signal: {signal}") See Also: news_sentiment: Full sentiment analysis with article-level detail. sentiment_timeseries: Historical sentiment time series. """ result = news_sentiment( symbol, limit=50, engine=engine, half_life_days=half_life_days, ) weighted_mean = result["aggregate"]["weighted_mean"] if weighted_mean > threshold: return "bullish" if weighted_mean < -threshold: return "bearish" return "neutral"
# --------------------------------------------------------------------------- # Legacy API (kept for backward compatibility) # ---------------------------------------------------------------------------
[docs] def sentiment_score( texts: str | Sequence[str], *, engine: str = "auto", ) -> dict[str, Any]: """Score text passages on a numeric sentiment scale. Scores arbitrary text using the specified sentiment engine. This is the low-level scoring function; for news-specific analysis with data fetching and aggregation, use ``news_sentiment`` instead. Parameters: texts: A single text string or a sequence of text strings to score. engine: Sentiment engine. ``"auto"`` tries VADER, then TextBlob, then the built-in keyword scorer. Options: ``"auto"``, ``"keyword"``, ``"vader"``, ``"textblob"``. Returns: Dictionary containing: - **scores** (*list[float]*) -- Sentiment score for each text, in the range [-1.0, 1.0]. - **mean_score** (*float*) -- Mean sentiment across all texts. - **engine** (*str*) -- Name of the engine used. Example: >>> result = sentiment_score("Stock rallied on strong earnings") >>> print(f"Score: {result['scores'][0]:.3f}") >>> print(f"Engine: {result['engine']}") See Also: news_sentiment: Full news sentiment pipeline with data fetching. sentiment_signal: Discrete signal from sentiment. """ if isinstance(texts, str): texts = [texts] engine_name, scorer = _get_scorer(engine) scores = [scorer(t) for t in texts] mean_score = float(np.mean(scores)) if scores else 0.0 return { "scores": scores, "mean_score": mean_score, "engine": engine_name, }
[docs] def news_impact( returns: pd.Series, event_dates: list | pd.DatetimeIndex, window: int = 5, ) -> dict[str, Any]: """Measure the impact of news events on returns using event study. Delegates to ``wraquant.causal.treatment.event_study`` to compute cumulative abnormal returns (CARs) around each event date. When to use: Use news impact analysis to quantify whether specific news events (earnings releases, FDA approvals, geopolitical shocks) have a statistically significant effect on returns. Parameters: returns: Return series with a DatetimeIndex. event_dates: List of event dates to study. window: Number of periods before and after each event to include in the analysis window. Returns: Dictionary containing: - **car** (*float*) -- Mean cumulative abnormal return across all events. - **event_results** -- Detailed event study output from ``wraquant.causal.treatment.event_study``. Example: >>> import pandas as pd, numpy as np >>> rng = np.random.default_rng(42) >>> dates = pd.bdate_range("2023-01-01", periods=252) >>> rets = pd.Series(rng.normal(0.0005, 0.01, 252), index=dates) >>> events = [dates[50], dates[150]] >>> result = news_impact(rets, events, window=5) See Also: wraquant.causal.treatment.event_study: Underlying event study. earnings_surprise: Earnings-specific impact metric. """ from wraquant.causal.treatment import event_study result = event_study(returns, event_dates, window=window) if hasattr(result, "effect") and result.effect is not None: car = float(result.effect) else: car = 0.0 return { "car": car, "event_results": result, }
[docs] def earnings_surprise( actual: float, estimate: float, ) -> float: """Compute the standardized earnings surprise. Earnings surprise is one of the most widely used signals in fundamental-driven quant strategies. A positive surprise (actual exceeds estimate) typically triggers positive abnormal returns in the short term (post-earnings announcement drift, or PEAD). Mathematical formulation: surprise = (actual - estimate) / |estimate| When to use: Use earnings surprise as an input to event-driven strategies. Combine with ``news_impact`` to quantify the return effect. Parameters: actual: Actual reported earnings per share. estimate: Consensus analyst estimate of earnings per share. Returns: Standardized earnings surprise as a float. Positive values indicate a beat; negative values indicate a miss. Example: >>> earnings_surprise(actual=2.50, estimate=2.30) 0.08695652173913043 >>> earnings_surprise(actual=1.80, estimate=2.00) -0.1 See Also: news_impact: Measure the return impact of events. sentiment_score: Score textual sentiment. """ if abs(estimate) < 1e-12: return 0.0 return float((actual - estimate) / abs(estimate))
[docs] def sentiment_aggregate( scores: Sequence[float], method: str = "mean", ) -> float: """Aggregate multiple sentiment scores into a single composite. When to use: Use after collecting sentiment scores from multiple sources (multiple news articles, analyst reports, social media posts) to produce a single consensus sentiment for a given asset or time period. Parameters: scores: Sequence of sentiment scores (each in [-1, 1]). method: Aggregation method. ``"mean"`` (default) computes the arithmetic mean. ``"median"`` computes the median. Returns: Aggregated sentiment score as a float. Raises: ValueError: If *method* is not ``"mean"`` or ``"median"``. Example: >>> sentiment_aggregate([0.5, 0.3, -0.1, 0.7]) 0.35 >>> sentiment_aggregate([0.5, 0.3, -0.1, 0.7], method="median") 0.4 See Also: sentiment_score: Generate individual scores. news_sentiment: Full sentiment pipeline. """ arr = np.asarray(scores, dtype=float) if len(arr) == 0: return 0.0 if method == "mean": return float(np.mean(arr)) if method == "median": return float(np.median(arr)) msg = f"Unknown aggregation method: {method!r}. Use 'mean' or 'median'." raise ValueError(msg)
[docs] def news_signal( sentiment_series: pd.Series | Sequence[float], threshold: float = 0.5, ) -> pd.Series: """Convert a continuous sentiment series into discrete trading signals. Applies threshold-based classification to convert continuous sentiment scores into actionable trading signals: +1 (bullish), -1 (bearish), or 0 (neutral). When to use: Use as the final step in a sentiment pipeline, after scoring and aggregation, to generate position signals for a trading strategy. Parameters: sentiment_series: Series or sequence of sentiment scores. threshold: Absolute threshold for signal generation. Scores above ``+threshold`` produce +1; below ``-threshold`` produce -1; values in between produce 0. Returns: pd.Series of integer signals (-1, 0, or +1). Example: >>> import pandas as pd >>> sent = pd.Series([0.8, 0.3, -0.6, 0.1, -0.9]) >>> news_signal(sent, threshold=0.5) 0 1 1 0 2 -1 3 0 4 -1 dtype: int64 See Also: sentiment_score: Generate sentiment scores. sentiment_aggregate: Combine multiple scores. """ if isinstance(sentiment_series, pd.Series): arr = sentiment_series.values.astype(float) index = sentiment_series.index else: arr = np.asarray(sentiment_series, dtype=float) index = range(len(arr)) signals = np.where(arr > threshold, 1, np.where(arr < -threshold, -1, 0)) return pd.Series(signals.astype(int), index=index)
# --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _resolve_col( df: pd.DataFrame, candidates: list[str], ) -> str | None: """Find the first matching column name from a list of candidates. Parameters: df: DataFrame to search. candidates: Ordered list of possible column names. Returns: The first matching column name, or None if none match. """ for col in candidates: if col in df.columns: return col return None