diff --git a/goodnews/api.py b/goodnews/api.py index 16b3af8..fd76447 100644 --- a/goodnews/api.py +++ b/goodnews/api.py @@ -1909,13 +1909,9 @@ def create_app() -> FastAPI: else: rows = _fetch(None, limit, offset) next_offset = offset + len(rows) if len(rows) == limit else None - # Paywalled below readable WITHIN each section (so tiers stay grouped); non-home - # rows all share section rank 0, preserving the original global behavior. + # Section grouping only — paywalled-source stories are excluded upstream now. _SEC = {"near": 0, "country": 1, "world": 2} - rows = sorted(rows, key=lambda r: ( - _SEC.get(r.get("__section"), 0), - is_paywalled_for_source(r["canonical_url"], r["paywall_override"]), - )) + rows = sorted(rows, key=lambda r: _SEC.get(r.get("__section"), 0)) return FeedResponse( topic=topic, flavor=flavor, @@ -2235,7 +2231,8 @@ def create_app() -> FastAPI: pool = data["items"] # Drop dismissed (replaced-away) items and anything the reader's # boundaries hide; avoid-terms take precedence over curation. - items = [a for a in pool if a["id"] not in excl] + items = [a for a in pool if a["id"] not in excl + and not is_paywalled_for_source(a.get("canonical_url"), a.get("paywall_override"))] if not fp.is_empty(): items = filter_articles(items, fp, now) items = items[:limit] # home mode over-fetches to survive filtering; cap here diff --git a/goodnews/queries.py b/goodnews/queries.py index 4664b5c..6819ad1 100644 --- a/goodnews/queries.py +++ b/goodnews/queries.py @@ -61,6 +61,19 @@ _ARTICLE_COLUMNS = f""" """ +def paywalled_source_ids(conn: sqlite3.Connection) -> list[int]: + """Source ids whose stories are paywalled — the domain rule (PAYWALL_DOMAINS), + overridable per source in admin. Computed live so an admin flag takes effect at + once. Small set (a handful of sources), so the lookup is cheap.""" + rows = conn.execute( + "SELECT id, homepage_url, feed_url, paywall_override FROM sources" + ).fetchall() + return [ + r["id"] for r in rows + if is_paywalled_for_source((r["homepage_url"] or r["feed_url"]), r["paywall_override"]) + ] + + def feed( conn: sqlite3.Connection, topic: str | None = None, @@ -84,6 +97,7 @@ def feed( home_country: str | None = None, home_state: str | None = None, geo_scope: str | None = None, # 'near' | 'country' | 'world' relative to the reader's home + include_paywalled: bool = False, # default: hide paywalled-source stories (no unreadable news in the feed) ) -> list[dict]: """Return articles with categorical filters applied in SQL. @@ -106,6 +120,13 @@ def feed( fts_join = "JOIN article_search ON article_search.article_id = a.id" clauses.append("article_search MATCH ?") params.append(match) + # Hard-exclude paywalled sources (admin-overridable). Added after MATCH so the FTS + # bound param keeps leading; the NOT IN list (a handful of ids) follows. + if not include_paywalled: + pwx = paywalled_source_ids(conn) + if pwx: + clauses.append("a.source_id NOT IN (%s)" % ",".join("?" * len(pwx))) + params.extend(pwx) if accepted_only: clauses.append("s.accepted = 1") if topic: diff --git a/tests/test_admin.py b/tests/test_admin.py index b1d27f2..253415c 100644 --- a/tests/test_admin.py +++ b/tests/test_admin.py @@ -532,22 +532,25 @@ def test_source_paywall_override(tmp_path, monkeypatch): c.commit(); c.close() tc = _signin(app, api, "boss@x.com") - def feed_badge(): - return next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"] + def in_feed(): + return any(a["id"] == 2 for a in tc.get("/api/feed?source_id=2").json()["items"]) - # domain rule: nytimes.com → paywalled in table, inspector, AND feed badge (all agree) + # domain rule: nytimes.com → paywalled in the source table + inspector, and HARD-EXCLUDED + # from the public feed (we don't surface stories you can't read for free) assert _src(tc, 2)["paywalled"] is True assert tc.get("/api/admin/sources/2/articles").json()["summary"]["paywalled"] is True - assert feed_badge() is True - # override 'free' (the NYT Learning fix) → effective OFF everywhere + assert in_feed() is False + # override 'free' (the NYT Learning fix) → effective OFF: it returns to the feed, no badge assert tc.post("/api/admin/sources/2/paywall", json={"override": "free"}).json()["override"] == "free" assert _src(tc, 2)["paywalled"] is False summ = tc.get("/api/admin/sources/2/articles").json()["summary"] assert summ["paywalled"] is False and summ["paywall_domain"] is True and summ["paywall_override"] == "free" - assert feed_badge() is False # ranking/badge now agree it's free - # back to domain rule, and the 'paywalled' override + assert in_feed() is True + assert next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"] is False + # back to domain rule → excluded again assert tc.post("/api/admin/sources/2/paywall", json={"override": None}).json()["override"] is None assert _src(tc, 2)["paywalled"] is True + assert in_feed() is False # validation + 404 assert tc.post("/api/admin/sources/2/paywall", json={"override": "bogus"}).status_code == 422 assert tc.post("/api/admin/sources/999/paywall", json={"override": "free"}).status_code == 404