"""Calm Filters — the canonical preference model and pure matching engine. Everything (localStorage today, query params on the API, a user_preferences row later) speaks this one shape, so the surfaces never drift. The functions here are deliberately pure and side-effect-free so they are easy to test and reuse from both the API and the CLI. The humane surface ("Not today" / "Less like this" / "Always hide this") maps onto this machinery: a pause is a topic/flavor muted *until* a timestamp; a mute is a standing exclusion; avoid-terms drop anything mentioning a phrase the reader would rather not see. """ from __future__ import annotations import json import re from dataclasses import dataclass, field from datetime import datetime, timezone # Split on any run of non-alphanumerics so matching is punctuation- and # case-insensitive, and anchored to whole words/phrases (no substring surprises: # "pan" must not match "pandemic", and "stock market" matches as a phrase). _NONWORD = re.compile(r"[^a-z0-9]+") def _normalize(text: str) -> str: """Lowercase, collapse non-alphanumerics to single spaces, pad with spaces.""" return " " + _NONWORD.sub(" ", text.lower()).strip() + " " def text_matches_avoid_terms(text: str | None, terms: list[str]) -> bool: """True if text contains any avoid term as a whole word or phrase.""" if not text or not terms: return False haystack = _normalize(text) for term in terms: needle = _normalize(term).strip() if needle and f" {needle} " in haystack: return True return False @dataclass class Pause: kind: str # "topic" or "flavor" value: str until: str # ISO 8601 UTC timestamp def active(self, now: datetime) -> bool: try: until = datetime.fromisoformat(self.until.replace("Z", "+00:00")) except (ValueError, AttributeError): return False # Defensive: never crash on an aware-vs-naive comparison. Treat a naive # `now` (and a naive `until`) as UTC. if now.tzinfo is None: now = now.replace(tzinfo=timezone.utc) if until.tzinfo is None: until = until.replace(tzinfo=timezone.utc) return until > now @dataclass class FilterPrefs: include_topics: list[str] = field(default_factory=list) include_flavors: list[str] = field(default_factory=list) mute_topics: list[str] = field(default_factory=list) mute_flavors: list[str] = field(default_factory=list) avoid_terms: list[str] = field(default_factory=list) pauses: list[Pause] = field(default_factory=list) max_cortisol: int | None = None max_ragebait: int | None = None @classmethod def from_dict(cls, data: dict | None) -> "FilterPrefs": if not isinstance(data, dict): return cls() def _opt_int(value: object) -> int | None: try: return int(value) if value is not None else None except (TypeError, ValueError): return None def _str_list(value: object) -> list[str]: if not isinstance(value, list): return [] return [str(v) for v in value if isinstance(v, str)] # Be forgiving: a malformed pause is skipped, never raised — a bad # localStorage/API blob must not break the feed. pauses: list[Pause] = [] for p in data.get("pauses") or []: try: pauses.append(Pause(kind=p["kind"], value=p["value"], until=p["until"])) except (KeyError, TypeError): continue return cls( include_topics=_str_list(data.get("include_topics")), include_flavors=_str_list(data.get("include_flavors")), mute_topics=_str_list(data.get("mute_topics")), mute_flavors=_str_list(data.get("mute_flavors")), avoid_terms=_str_list(data.get("avoid_terms")), pauses=pauses, max_cortisol=_opt_int(data.get("max_cortisol")), max_ragebait=_opt_int(data.get("max_ragebait")), ) def muted_topics(self, now: datetime) -> set[str]: """Standing mutes plus any topic currently paused.""" muted = set(self.mute_topics) muted |= {p.value for p in self.pauses if p.kind == "topic" and p.active(now)} return muted def muted_flavors(self, now: datetime) -> set[str]: muted = set(self.mute_flavors) muted |= {p.value for p in self.pauses if p.kind == "flavor" and p.active(now)} return muted def is_empty(self) -> bool: return not ( self.include_topics or self.include_flavors or self.mute_topics or self.mute_flavors or self.avoid_terms or self.pauses or self.max_cortisol is not None or self.max_ragebait is not None ) def prefs_from_json(raw: str | None) -> FilterPrefs: """Parse a JSON prefs string (from a query param) into FilterPrefs. Never raises on bad input — a malformed blob yields empty prefs so the feed keeps working. """ if not raw: return FilterPrefs() try: data = json.loads(raw) except (ValueError, TypeError): return FilterPrefs() return FilterPrefs.from_dict(data) def allows(article: dict, prefs: FilterPrefs, now: datetime) -> bool: """True if an article (a feed/brief row dict) survives the preferences.""" topic = article.get("topic") flavor = article.get("flavor") if prefs.include_topics and topic not in prefs.include_topics: return False if prefs.include_flavors and flavor not in prefs.include_flavors: return False if topic in prefs.muted_topics(now): return False if flavor in prefs.muted_flavors(now): return False if prefs.max_cortisol is not None and (article.get("cortisol_score") or 0) > prefs.max_cortisol: return False if prefs.max_ragebait is not None and (article.get("ragebait_score") or 0) > prefs.max_ragebait: return False blob = f"{article.get('title') or ''} {article.get('description') or ''}" if text_matches_avoid_terms(blob, prefs.avoid_terms): return False return True def filter_articles(articles: list[dict], prefs: FilterPrefs, now: datetime) -> list[dict]: """Apply preferences to a list of article rows, preserving order.""" if prefs.is_empty(): return articles return [a for a in articles if allows(a, prefs, now)]