news: hard-exclude paywalled sources from the feed + brief (no unreadable news)

Per Jay: don't surface stories people can't read without paying — it's off-brand
("no paywalls") and pointless. Paywalled is source-level (domain rule, admin-
overridable): just 3 sources today (Nature, New Scientist, MIT Tech Review),
~5.4% of accepted articles.

- queries.paywalled_source_ids(conn): live source set (admin override wins).
- queries.feed gains include_paywalled=False (default) → adds `a.source_id NOT IN
  (…)`. One chokepoint covers Latest/tags/sources/moods/topics/search/since AND
  the brief top-up. Source-level + SQL → paging stays exact, no frontend change.
- brief(): filter the cached/home pool by the same rule; replacement already
  avoids paywalled and now rides the feed exclusion too.
- Dropped the now-moot "paywalled below readable" demotion sort.
- Saved/history keep showing items you saved (their own queries, not excluded).
- test_source_paywall_override updated: paywalled source → excluded from the feed
  (was: shown with a badge); 'free' override → returns, no badge. 418 tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-28 17:10:00 -04:00
parent 54761f5083
commit 0d21231597
3 changed files with 35 additions and 14 deletions
+4 -7
View File
@@ -1909,13 +1909,9 @@ def create_app() -> FastAPI:
else:
rows = _fetch(None, limit, offset)
next_offset = offset + len(rows) if len(rows) == limit else None
# Paywalled below readable WITHIN each section (so tiers stay grouped); non-home
# rows all share section rank 0, preserving the original global behavior.
# Section grouping only — paywalled-source stories are excluded upstream now.
_SEC = {"near": 0, "country": 1, "world": 2}
rows = sorted(rows, key=lambda r: (
_SEC.get(r.get("__section"), 0),
is_paywalled_for_source(r["canonical_url"], r["paywall_override"]),
))
rows = sorted(rows, key=lambda r: _SEC.get(r.get("__section"), 0))
return FeedResponse(
topic=topic,
flavor=flavor,
@@ -2235,7 +2231,8 @@ def create_app() -> FastAPI:
pool = data["items"]
# Drop dismissed (replaced-away) items and anything the reader's
# boundaries hide; avoid-terms take precedence over curation.
items = [a for a in pool if a["id"] not in excl]
items = [a for a in pool if a["id"] not in excl
and not is_paywalled_for_source(a.get("canonical_url"), a.get("paywall_override"))]
if not fp.is_empty():
items = filter_articles(items, fp, now)
items = items[:limit] # home mode over-fetches to survive filtering; cap here
+21
View File
@@ -61,6 +61,19 @@ _ARTICLE_COLUMNS = f"""
"""
def paywalled_source_ids(conn: sqlite3.Connection) -> list[int]:
"""Source ids whose stories are paywalled — the domain rule (PAYWALL_DOMAINS),
overridable per source in admin. Computed live so an admin flag takes effect at
once. Small set (a handful of sources), so the lookup is cheap."""
rows = conn.execute(
"SELECT id, homepage_url, feed_url, paywall_override FROM sources"
).fetchall()
return [
r["id"] for r in rows
if is_paywalled_for_source((r["homepage_url"] or r["feed_url"]), r["paywall_override"])
]
def feed(
conn: sqlite3.Connection,
topic: str | None = None,
@@ -84,6 +97,7 @@ def feed(
home_country: str | None = None,
home_state: str | None = None,
geo_scope: str | None = None, # 'near' | 'country' | 'world' relative to the reader's home
include_paywalled: bool = False, # default: hide paywalled-source stories (no unreadable news in the feed)
) -> list[dict]:
"""Return articles with categorical filters applied in SQL.
@@ -106,6 +120,13 @@ def feed(
fts_join = "JOIN article_search ON article_search.article_id = a.id"
clauses.append("article_search MATCH ?")
params.append(match)
# Hard-exclude paywalled sources (admin-overridable). Added after MATCH so the FTS
# bound param keeps leading; the NOT IN list (a handful of ids) follows.
if not include_paywalled:
pwx = paywalled_source_ids(conn)
if pwx:
clauses.append("a.source_id NOT IN (%s)" % ",".join("?" * len(pwx)))
params.extend(pwx)
if accepted_only:
clauses.append("s.accepted = 1")
if topic:
+10 -7
View File
@@ -532,22 +532,25 @@ def test_source_paywall_override(tmp_path, monkeypatch):
c.commit(); c.close()
tc = _signin(app, api, "boss@x.com")
def feed_badge():
return next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"]
def in_feed():
return any(a["id"] == 2 for a in tc.get("/api/feed?source_id=2").json()["items"])
# domain rule: nytimes.com → paywalled in table, inspector, AND feed badge (all agree)
# domain rule: nytimes.com → paywalled in the source table + inspector, and HARD-EXCLUDED
# from the public feed (we don't surface stories you can't read for free)
assert _src(tc, 2)["paywalled"] is True
assert tc.get("/api/admin/sources/2/articles").json()["summary"]["paywalled"] is True
assert feed_badge() is True
# override 'free' (the NYT Learning fix) → effective OFF everywhere
assert in_feed() is False
# override 'free' (the NYT Learning fix) → effective OFF: it returns to the feed, no badge
assert tc.post("/api/admin/sources/2/paywall", json={"override": "free"}).json()["override"] == "free"
assert _src(tc, 2)["paywalled"] is False
summ = tc.get("/api/admin/sources/2/articles").json()["summary"]
assert summ["paywalled"] is False and summ["paywall_domain"] is True and summ["paywall_override"] == "free"
assert feed_badge() is False # ranking/badge now agree it's free
# back to domain rule, and the 'paywalled' override
assert in_feed() is True
assert next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"] is False
# back to domain rule → excluded again
assert tc.post("/api/admin/sources/2/paywall", json={"override": None}).json()["override"] is None
assert _src(tc, 2)["paywalled"] is True
assert in_feed() is False
# validation + 404
assert tc.post("/api/admin/sources/2/paywall", json={"override": "bogus"}).status_code == 422
assert tc.post("/api/admin/sources/999/paywall", json={"override": "free"}).status_code == 404