Source code for wraquant.data.cache
"""Caching infrastructure for data fetches.
Provides both in-memory TTL cache and optional disk caching via diskcache.
"""
from __future__ import annotations
import hashlib
import time
from pathlib import Path
from typing import Any
import pandas as pd
from wraquant.core.config import get_config
from wraquant.core.logging import get_logger
log = get_logger(__name__)
[docs]
class MemoryCache:
"""Simple in-memory cache with TTL."""
[docs]
def __init__(self, ttl: int | None = None) -> None:
self._store: dict[str, tuple[float, Any]] = {}
self._ttl = ttl
@property
def ttl(self) -> int:
if self._ttl is not None:
return self._ttl
return get_config().cache_ttl_seconds
def _make_key(self, *args: Any, **kwargs: Any) -> str:
raw = f"{args}:{sorted(kwargs.items())}"
return hashlib.sha256(raw.encode()).hexdigest()
[docs]
def get(self, key: str) -> Any | None:
"""Get a cached value if it exists and hasn't expired."""
if key in self._store:
ts, value = self._store[key]
if time.monotonic() - ts < self.ttl:
return value
del self._store[key]
return None
[docs]
def set(self, key: str, value: Any) -> None:
"""Store a value in the cache."""
self._store[key] = (time.monotonic(), value)
[docs]
def clear(self) -> None:
"""Clear all cached entries."""
self._store.clear()
[docs]
class DiskCache:
"""Disk-based cache for persisting fetched data across sessions.
Falls back to no-op if diskcache is not installed.
"""
[docs]
def __init__(self, cache_dir: Path | None = None) -> None:
self._cache_dir = cache_dir
self._disk_cache: Any = None
self._initialized = False
def _init(self) -> None:
if self._initialized:
return
self._initialized = True
if not get_config().cache_enabled:
return
try:
import diskcache
cache_dir = self._cache_dir or get_config().cache_dir
cache_dir.mkdir(parents=True, exist_ok=True)
self._disk_cache = diskcache.Cache(str(cache_dir))
except ImportError:
log.debug("diskcache not installed, disk caching disabled")
[docs]
def get(self, key: str) -> pd.Series | pd.DataFrame | None:
"""Retrieve cached data from disk."""
self._init()
if self._disk_cache is None:
return None
return self._disk_cache.get(key)
[docs]
def set(
self, key: str, value: pd.Series | pd.DataFrame, ttl: int | None = None
) -> None:
"""Store data to disk cache."""
self._init()
if self._disk_cache is None:
return
expire = ttl or get_config().cache_ttl_seconds
self._disk_cache.set(key, value, expire=expire)
[docs]
def clear(self) -> None:
"""Clear the disk cache."""
self._init()
if self._disk_cache is not None:
self._disk_cache.clear()
# Shared instances
memory_cache = MemoryCache()
disk_cache = DiskCache()