news: hard-exclude paywalled sources from the feed + brief (no unreadable news)
Per Jay: don't surface stories people can't read without paying — it's off-brand
("no paywalls") and pointless. Paywalled is source-level (domain rule, admin-
overridable): just 3 sources today (Nature, New Scientist, MIT Tech Review),
~5.4% of accepted articles.
- queries.paywalled_source_ids(conn): live source set (admin override wins).
- queries.feed gains include_paywalled=False (default) → adds `a.source_id NOT IN
(…)`. One chokepoint covers Latest/tags/sources/moods/topics/search/since AND
the brief top-up. Source-level + SQL → paging stays exact, no frontend change.
- brief(): filter the cached/home pool by the same rule; replacement already
avoids paywalled and now rides the feed exclusion too.
- Dropped the now-moot "paywalled below readable" demotion sort.
- Saved/history keep showing items you saved (their own queries, not excluded).
- test_source_paywall_override updated: paywalled source → excluded from the feed
(was: shown with a badge); 'free' override → returns, no badge. 418 tests green.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+4
-7
@@ -1909,13 +1909,9 @@ def create_app() -> FastAPI:
|
||||
else:
|
||||
rows = _fetch(None, limit, offset)
|
||||
next_offset = offset + len(rows) if len(rows) == limit else None
|
||||
# Paywalled below readable WITHIN each section (so tiers stay grouped); non-home
|
||||
# rows all share section rank 0, preserving the original global behavior.
|
||||
# Section grouping only — paywalled-source stories are excluded upstream now.
|
||||
_SEC = {"near": 0, "country": 1, "world": 2}
|
||||
rows = sorted(rows, key=lambda r: (
|
||||
_SEC.get(r.get("__section"), 0),
|
||||
is_paywalled_for_source(r["canonical_url"], r["paywall_override"]),
|
||||
))
|
||||
rows = sorted(rows, key=lambda r: _SEC.get(r.get("__section"), 0))
|
||||
return FeedResponse(
|
||||
topic=topic,
|
||||
flavor=flavor,
|
||||
@@ -2235,7 +2231,8 @@ def create_app() -> FastAPI:
|
||||
pool = data["items"]
|
||||
# Drop dismissed (replaced-away) items and anything the reader's
|
||||
# boundaries hide; avoid-terms take precedence over curation.
|
||||
items = [a for a in pool if a["id"] not in excl]
|
||||
items = [a for a in pool if a["id"] not in excl
|
||||
and not is_paywalled_for_source(a.get("canonical_url"), a.get("paywall_override"))]
|
||||
if not fp.is_empty():
|
||||
items = filter_articles(items, fp, now)
|
||||
items = items[:limit] # home mode over-fetches to survive filtering; cap here
|
||||
|
||||
@@ -61,6 +61,19 @@ _ARTICLE_COLUMNS = f"""
|
||||
"""
|
||||
|
||||
|
||||
def paywalled_source_ids(conn: sqlite3.Connection) -> list[int]:
|
||||
"""Source ids whose stories are paywalled — the domain rule (PAYWALL_DOMAINS),
|
||||
overridable per source in admin. Computed live so an admin flag takes effect at
|
||||
once. Small set (a handful of sources), so the lookup is cheap."""
|
||||
rows = conn.execute(
|
||||
"SELECT id, homepage_url, feed_url, paywall_override FROM sources"
|
||||
).fetchall()
|
||||
return [
|
||||
r["id"] for r in rows
|
||||
if is_paywalled_for_source((r["homepage_url"] or r["feed_url"]), r["paywall_override"])
|
||||
]
|
||||
|
||||
|
||||
def feed(
|
||||
conn: sqlite3.Connection,
|
||||
topic: str | None = None,
|
||||
@@ -84,6 +97,7 @@ def feed(
|
||||
home_country: str | None = None,
|
||||
home_state: str | None = None,
|
||||
geo_scope: str | None = None, # 'near' | 'country' | 'world' relative to the reader's home
|
||||
include_paywalled: bool = False, # default: hide paywalled-source stories (no unreadable news in the feed)
|
||||
) -> list[dict]:
|
||||
"""Return articles with categorical filters applied in SQL.
|
||||
|
||||
@@ -106,6 +120,13 @@ def feed(
|
||||
fts_join = "JOIN article_search ON article_search.article_id = a.id"
|
||||
clauses.append("article_search MATCH ?")
|
||||
params.append(match)
|
||||
# Hard-exclude paywalled sources (admin-overridable). Added after MATCH so the FTS
|
||||
# bound param keeps leading; the NOT IN list (a handful of ids) follows.
|
||||
if not include_paywalled:
|
||||
pwx = paywalled_source_ids(conn)
|
||||
if pwx:
|
||||
clauses.append("a.source_id NOT IN (%s)" % ",".join("?" * len(pwx)))
|
||||
params.extend(pwx)
|
||||
if accepted_only:
|
||||
clauses.append("s.accepted = 1")
|
||||
if topic:
|
||||
|
||||
+10
-7
@@ -532,22 +532,25 @@ def test_source_paywall_override(tmp_path, monkeypatch):
|
||||
c.commit(); c.close()
|
||||
tc = _signin(app, api, "boss@x.com")
|
||||
|
||||
def feed_badge():
|
||||
return next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"]
|
||||
def in_feed():
|
||||
return any(a["id"] == 2 for a in tc.get("/api/feed?source_id=2").json()["items"])
|
||||
|
||||
# domain rule: nytimes.com → paywalled in table, inspector, AND feed badge (all agree)
|
||||
# domain rule: nytimes.com → paywalled in the source table + inspector, and HARD-EXCLUDED
|
||||
# from the public feed (we don't surface stories you can't read for free)
|
||||
assert _src(tc, 2)["paywalled"] is True
|
||||
assert tc.get("/api/admin/sources/2/articles").json()["summary"]["paywalled"] is True
|
||||
assert feed_badge() is True
|
||||
# override 'free' (the NYT Learning fix) → effective OFF everywhere
|
||||
assert in_feed() is False
|
||||
# override 'free' (the NYT Learning fix) → effective OFF: it returns to the feed, no badge
|
||||
assert tc.post("/api/admin/sources/2/paywall", json={"override": "free"}).json()["override"] == "free"
|
||||
assert _src(tc, 2)["paywalled"] is False
|
||||
summ = tc.get("/api/admin/sources/2/articles").json()["summary"]
|
||||
assert summ["paywalled"] is False and summ["paywall_domain"] is True and summ["paywall_override"] == "free"
|
||||
assert feed_badge() is False # ranking/badge now agree it's free
|
||||
# back to domain rule, and the 'paywalled' override
|
||||
assert in_feed() is True
|
||||
assert next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"] is False
|
||||
# back to domain rule → excluded again
|
||||
assert tc.post("/api/admin/sources/2/paywall", json={"override": None}).json()["override"] is None
|
||||
assert _src(tc, 2)["paywalled"] is True
|
||||
assert in_feed() is False
|
||||
# validation + 404
|
||||
assert tc.post("/api/admin/sources/2/paywall", json={"override": "bogus"}).status_code == 422
|
||||
assert tc.post("/api/admin/sources/999/paywall", json={"override": "free"}).status_code == 404
|
||||
|
||||
Reference in New Issue
Block a user