From 091dec64ae768ce4c9dce43672ed063d102385f2 Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 30 May 2026 19:16:42 +0000 Subject: [PATCH] Calm Filters MVP: device-local personalization across feed/brief/counts - API endpoints (feed, brief, category-counts) accept a 'prefs' JSON query param, parsed tolerantly into FilterPrefs (bad blobs never break the feed). - Feed over-fetches then applies word-boundary filters in Python and slices to the page; brief is filtered down (no refill); counts are computed over the same filtered set so browse numbers match the feed exactly. - Pause.active() coerces naive datetimes to UTC; FilterPrefs.from_dict skips malformed pauses and non-string list entries. - Static site adds the humane ladder (Not today / Less like this / Always hide) plus a Calm filters panel managing pauses, mutes, and avoid-terms in localStorage. Nothing leaves the device. - Tests now 38 (added forgiving-parse and naive-now cases). README documents it. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 28 +++++ goodnews/api.py | 55 +++++++-- goodnews/filters.py | 54 +++++++-- goodnews/static/index.html | 238 +++++++++++++++++++++++++++++++++---- tests/test_filters.py | 35 ++++++ 5 files changed, 368 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index dacbea5..151af9e 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,34 @@ Endpoints: The ingestion CLI stays pure-stdlib; only the `web` extra pulls in FastAPI/uvicorn, so the two halves can be deployed and upgraded independently. +## Calm Filters + +Personal, device-local controls so a reader can stay informed without subjects +they'd rather not see right now. Preferences live in the browser (localStorage), +are sent to the read endpoints as a `prefs` JSON query param, and are applied +identically to the feed, the brief, and the category counts so the numbers always +match what's shown. The canonical shape (`goodnews/filters.py`): + +```json +{ + "include_topics": [], "include_flavors": [], + "mute_topics": [], "mute_flavors": [], + "avoid_terms": ["election", "stock market"], + "pauses": [{"kind": "topic", "value": "health", "until": "2026-06-02T00:00:00Z"}] +} +``` + +The site surfaces a humane ladder rather than a settings panel of dread: + +- **Not today** → pause that article's topic for 24h. +- **Less like this** → ease off that flavor for ~3 days. +- **Always hide …** → a standing mute (undoable in the Calm filters panel). + +Avoid-terms match whole words/phrases (case- and punctuation-insensitive, no +substring surprises like "pan" matching "pandemic"). The brief is filtered *down* +for MVP (no refill from outside the stored brief). No accounts; the same `prefs` +object is the clean migration path to server-side, multi-user preferences later. + ## Deployment The database is never baked into the image — the API and the ingestion CLI share diff --git a/goodnews/api.py b/goodnews/api.py index 8327657..3e0d709 100644 --- a/goodnews/api.py +++ b/goodnews/api.py @@ -15,7 +15,9 @@ from __future__ import annotations import os import sqlite3 +from collections import Counter from contextlib import contextmanager +from datetime import datetime, timezone from pathlib import Path from fastapi import FastAPI, HTTPException, Query @@ -25,6 +27,7 @@ from pydantic import BaseModel from . import queries from .db import connect, init_db +from .filters import filter_articles, prefs_from_json from .taxonomy import FLAVORS, TOPICS ROOT = Path(__file__).resolve().parents[1] @@ -148,9 +151,21 @@ def create_app() -> FastAPI: ) @app.get("/api/category-counts", response_model=list[CategoryCount]) - def category_counts(accepted_only: bool = True) -> list[CategoryCount]: + def category_counts(accepted_only: bool = True, prefs: str | None = Query(None)) -> list[CategoryCount]: + fp = prefs_from_json(prefs) with get_conn() as conn: - rows = queries.category_counts(conn, accepted_only=accepted_only) + if fp.is_empty(): + rows = queries.category_counts(conn, accepted_only=accepted_only) + else: + # Count over the SAME filtered set the feed would return, so the + # browse numbers always match what the user actually sees. + allrows = queries.feed(conn, accepted_only=accepted_only, limit=100000, offset=0) + kept = filter_articles(allrows, fp, datetime.now(timezone.utc)) + counts = Counter((r["topic"], r["flavor"]) for r in kept) + rows = [ + {"topic": t, "flavor": f, "count": n} + for (t, f), n in sorted(counts.items(), key=lambda kv: (str(kv[0][0]), str(kv[0][1]))) + ] return [CategoryCount(**row) for row in rows] @app.get("/api/feed", response_model=FeedResponse) @@ -160,20 +175,27 @@ def create_app() -> FastAPI: accepted_only: bool = True, limit: int = Query(30, ge=1, le=100), offset: int = Query(0, ge=0), + prefs: str | None = Query(None), ) -> FeedResponse: if topic and topic.lower() not in TOPICS: raise HTTPException(400, f"unknown topic: {topic}") if flavor and flavor.lower() not in FLAVORS: raise HTTPException(400, f"unknown flavor: {flavor}") + fp = prefs_from_json(prefs) with get_conn() as conn: - rows = queries.feed( - conn, - topic=topic, - flavor=flavor, - accepted_only=accepted_only, - limit=limit, - offset=offset, - ) + if fp.is_empty(): + rows = queries.feed( + conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=limit, offset=offset + ) + else: + # Over-fetch, apply the calm filters in Python (word-boundary + # avoid-terms can't be done in SQL), then slice to the page. + fetch_n = min(2000, (offset + limit) * 4 + 50) + raw = queries.feed( + conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=fetch_n, offset=0 + ) + filtered = filter_articles(raw, fp, datetime.now(timezone.utc)) + rows = filtered[offset : offset + limit] return FeedResponse( topic=topic, flavor=flavor, @@ -182,13 +204,22 @@ def create_app() -> FastAPI: ) @app.get("/api/brief", response_model=BriefResponse) - def brief(date: str | None = Query(None), limit: int = Query(10, ge=1, le=50)) -> BriefResponse: + def brief( + date: str | None = Query(None), + limit: int = Query(10, ge=1, le=50), + prefs: str | None = Query(None), + ) -> BriefResponse: + fp = prefs_from_json(prefs) with get_conn() as conn: data = queries.brief(conn, brief_date=date, limit=limit) + items = data["items"] + if not fp.is_empty(): + # MVP: filter the stored brief DOWN; no refill from outside the brief. + items = filter_articles(items, fp, datetime.now(timezone.utc)) return BriefResponse( brief_date=data["brief_date"], title=data["title"], - items=[Article.from_row(r) for r in data["items"]], + items=[Article.from_row(r) for r in items], ) @app.get("/api/brief-dates", response_model=list[str]) diff --git a/goodnews/filters.py b/goodnews/filters.py index 96259d9..0f3c2d5 100644 --- a/goodnews/filters.py +++ b/goodnews/filters.py @@ -13,9 +13,10 @@ rather not see. from __future__ import annotations +import json import re from dataclasses import dataclass, field -from datetime import datetime +from datetime import datetime, timezone # Split on any run of non-alphanumerics so matching is punctuation- and # case-insensitive, and anchored to whole words/phrases (no substring surprises: @@ -51,6 +52,12 @@ class Pause: until = datetime.fromisoformat(self.until.replace("Z", "+00:00")) except (ValueError, AttributeError): return False + # Defensive: never crash on an aware-vs-naive comparison. Treat a naive + # `now` (and a naive `until`) as UTC. + if now.tzinfo is None: + now = now.replace(tzinfo=timezone.utc) + if until.tzinfo is None: + until = until.replace(tzinfo=timezone.utc) return until > now @@ -65,14 +72,30 @@ class FilterPrefs: @classmethod def from_dict(cls, data: dict | None) -> "FilterPrefs": - data = data or {} + if not isinstance(data, dict): + return cls() + + def _str_list(value: object) -> list[str]: + if not isinstance(value, list): + return [] + return [str(v) for v in value if isinstance(v, str)] + + # Be forgiving: a malformed pause is skipped, never raised — a bad + # localStorage/API blob must not break the feed. + pauses: list[Pause] = [] + for p in data.get("pauses") or []: + try: + pauses.append(Pause(kind=p["kind"], value=p["value"], until=p["until"])) + except (KeyError, TypeError): + continue + return cls( - include_topics=list(data.get("include_topics") or []), - include_flavors=list(data.get("include_flavors") or []), - mute_topics=list(data.get("mute_topics") or []), - mute_flavors=list(data.get("mute_flavors") or []), - avoid_terms=list(data.get("avoid_terms") or []), - pauses=[Pause(**p) for p in (data.get("pauses") or [])], + include_topics=_str_list(data.get("include_topics")), + include_flavors=_str_list(data.get("include_flavors")), + mute_topics=_str_list(data.get("mute_topics")), + mute_flavors=_str_list(data.get("mute_flavors")), + avoid_terms=_str_list(data.get("avoid_terms")), + pauses=pauses, ) def muted_topics(self, now: datetime) -> set[str]: @@ -97,6 +120,21 @@ class FilterPrefs: ) +def prefs_from_json(raw: str | None) -> FilterPrefs: + """Parse a JSON prefs string (from a query param) into FilterPrefs. + + Never raises on bad input — a malformed blob yields empty prefs so the feed + keeps working. + """ + if not raw: + return FilterPrefs() + try: + data = json.loads(raw) + except (ValueError, TypeError): + return FilterPrefs() + return FilterPrefs.from_dict(data) + + def allows(article: dict, prefs: FilterPrefs, now: datetime) -> bool: """True if an article (a feed/brief row dict) survives the preferences.""" topic = article.get("topic") diff --git a/goodnews/static/index.html b/goodnews/static/index.html index 926a262..96a3ec6 100644 --- a/goodnews/static/index.html +++ b/goodnews/static/index.html @@ -16,11 +16,17 @@ } header { padding: 28px 20px 18px; text-align: center; border-bottom: 1px solid var(--line); - background: var(--card); + background: var(--card); position: relative; } header h1 { margin: 0; font-size: 1.7rem; letter-spacing: -0.02em; } header h1 span { color: var(--accent); } header p { margin: 6px 0 0; color: var(--muted); font-size: 0.95rem; } + .calm-btn { + position: absolute; top: 20px; right: 20px; border: 1px solid var(--line); + background: var(--card); border-radius: 999px; padding: 6px 14px; cursor: pointer; + font-size: 0.85rem; color: var(--ink); + } + .calm-btn.on { background: var(--accent); color: #fff; border-color: var(--accent); } main { max-width: 760px; margin: 0 auto; padding: 20px; } .chips { display: flex; flex-wrap: wrap; gap: 8px; margin: 6px 0 18px; } .chip { @@ -50,7 +56,38 @@ .rank-badge { background: var(--accent); color: #fff; border-radius: 50%; width: 24px; height: 24px; display: inline-flex; align-items: center; justify-content: center; font-size: 0.8rem; font-weight: 700; } + .actions { margin-top: 10px; display: flex; gap: 14px; } + .actions button { + background: none; border: none; padding: 0; cursor: pointer; font-size: 0.78rem; + color: var(--muted); border-bottom: 1px dotted var(--line); + } + .actions button:hover { color: var(--accent); border-bottom-color: var(--accent); } .empty { color: var(--muted); text-align: center; padding: 30px; } + /* Calm settings panel */ + .panel { + background: var(--card); border: 1px solid var(--line); border-radius: 12px; + padding: 16px 18px; margin-bottom: 18px; display: none; + } + .panel.open { display: block; } + .panel h2 { font-size: 1rem; margin: 0 0 4px; } + .panel .hint { color: var(--muted); font-size: 0.82rem; margin: 0 0 12px; } + .panel .group { margin-bottom: 12px; } + .panel .group-label { font-size: 0.74rem; text-transform: uppercase; letter-spacing: .07em; + color: var(--muted); margin-bottom: 6px; } + .pill { + display: inline-flex; align-items: center; gap: 6px; background: var(--accent-soft); + color: var(--accent); border-radius: 999px; padding: 3px 6px 3px 11px; font-size: 0.82rem; + margin: 0 6px 6px 0; + } + .pill button { background: none; border: none; cursor: pointer; color: var(--accent); + font-size: 0.95rem; line-height: 1; padding: 0 2px; } + .panel input[type=text] { border: 1px solid var(--line); border-radius: 8px; padding: 6px 10px; + font-size: 0.88rem; width: 60%; } + .panel .addbtn { margin-left: 6px; border: 1px solid var(--accent); background: var(--accent); + color: #fff; border-radius: 8px; padding: 6px 12px; cursor: pointer; font-size: 0.85rem; } + .panel .reset { margin-top: 6px; background: none; border: none; color: var(--muted); + cursor: pointer; font-size: 0.8rem; text-decoration: underline; } + .calm-note { color: var(--muted); font-size: 0.8rem; margin: -8px 0 14px; } footer { text-align: center; color: var(--muted); font-size: 0.78rem; padding: 20px; } footer a { color: var(--accent); } @@ -59,8 +96,31 @@

goodNews

Calm, constructive news worth your attention — and nothing that isn't.

+
+
+

Calm filters

+

Your boundaries, kept on this device. Nothing is sent anywhere or tied to an account.

+
+
Paused for now
+
+
+
+
Always hidden
+
+
+
+
Avoid words & phrases
+
+ + +
+ +
+ +
+
Five Good Things
Loading…
@@ -77,9 +137,55 @@ diff --git a/tests/test_filters.py b/tests/test_filters.py index 8dfda55..37ed47f 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -4,6 +4,7 @@ from goodnews.filters import ( FilterPrefs, Pause, filter_articles, + prefs_from_json, text_matches_avoid_terms, ) @@ -86,3 +87,37 @@ def test_pause_active_helper(): assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(NOW) assert not Pause("topic", "health", "2026-05-01T00:00:00Z").active(NOW) assert not Pause("topic", "health", "garbage").active(NOW) + + +def test_pause_active_tolerates_naive_now(): + # A naive `now` must not raise an aware-vs-naive comparison error. + naive = datetime(2026, 6, 1) + assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(naive) + + +# --- forgiving parsing (bad blobs must never break the feed) --- + +def test_prefs_from_json_tolerates_garbage(): + assert prefs_from_json("not json").is_empty() + assert prefs_from_json(None).is_empty() + assert prefs_from_json("[1,2,3]").is_empty() # wrong shape + + +def test_from_dict_skips_malformed_pauses(): + prefs = FilterPrefs.from_dict( + { + "mute_topics": ["health"], + "pauses": [ + {"kind": "topic", "value": "science", "until": "2026-06-02T00:00:00Z"}, + {"kind": "topic"}, # malformed — missing value/until + "garbage", # not even a dict + ], + } + ) + assert prefs.mute_topics == ["health"] + assert len(prefs.pauses) == 1 # only the well-formed pause survives + + +def test_from_dict_ignores_non_string_list_entries(): + prefs = FilterPrefs.from_dict({"avoid_terms": ["ok", 5, None, "fine"]}) + assert prefs.avoid_terms == ["ok", "fine"]