news: hard-exclude paywalled sources from the feed + brief (no unreadable news)
Per Jay: don't surface stories people can't read without paying — it's off-brand
("no paywalls") and pointless. Paywalled is source-level (domain rule, admin-
overridable): just 3 sources today (Nature, New Scientist, MIT Tech Review),
~5.4% of accepted articles.
- queries.paywalled_source_ids(conn): live source set (admin override wins).
- queries.feed gains include_paywalled=False (default) → adds `a.source_id NOT IN
(…)`. One chokepoint covers Latest/tags/sources/moods/topics/search/since AND
the brief top-up. Source-level + SQL → paging stays exact, no frontend change.
- brief(): filter the cached/home pool by the same rule; replacement already
avoids paywalled and now rides the feed exclusion too.
- Dropped the now-moot "paywalled below readable" demotion sort.
- Saved/history keep showing items you saved (their own queries, not excluded).
- test_source_paywall_override updated: paywalled source → excluded from the feed
(was: shown with a badge); 'free' override → returns, no badge. 418 tests green.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+4
-7
@@ -1909,13 +1909,9 @@ def create_app() -> FastAPI:
|
|||||||
else:
|
else:
|
||||||
rows = _fetch(None, limit, offset)
|
rows = _fetch(None, limit, offset)
|
||||||
next_offset = offset + len(rows) if len(rows) == limit else None
|
next_offset = offset + len(rows) if len(rows) == limit else None
|
||||||
# Paywalled below readable WITHIN each section (so tiers stay grouped); non-home
|
# Section grouping only — paywalled-source stories are excluded upstream now.
|
||||||
# rows all share section rank 0, preserving the original global behavior.
|
|
||||||
_SEC = {"near": 0, "country": 1, "world": 2}
|
_SEC = {"near": 0, "country": 1, "world": 2}
|
||||||
rows = sorted(rows, key=lambda r: (
|
rows = sorted(rows, key=lambda r: _SEC.get(r.get("__section"), 0))
|
||||||
_SEC.get(r.get("__section"), 0),
|
|
||||||
is_paywalled_for_source(r["canonical_url"], r["paywall_override"]),
|
|
||||||
))
|
|
||||||
return FeedResponse(
|
return FeedResponse(
|
||||||
topic=topic,
|
topic=topic,
|
||||||
flavor=flavor,
|
flavor=flavor,
|
||||||
@@ -2235,7 +2231,8 @@ def create_app() -> FastAPI:
|
|||||||
pool = data["items"]
|
pool = data["items"]
|
||||||
# Drop dismissed (replaced-away) items and anything the reader's
|
# Drop dismissed (replaced-away) items and anything the reader's
|
||||||
# boundaries hide; avoid-terms take precedence over curation.
|
# boundaries hide; avoid-terms take precedence over curation.
|
||||||
items = [a for a in pool if a["id"] not in excl]
|
items = [a for a in pool if a["id"] not in excl
|
||||||
|
and not is_paywalled_for_source(a.get("canonical_url"), a.get("paywall_override"))]
|
||||||
if not fp.is_empty():
|
if not fp.is_empty():
|
||||||
items = filter_articles(items, fp, now)
|
items = filter_articles(items, fp, now)
|
||||||
items = items[:limit] # home mode over-fetches to survive filtering; cap here
|
items = items[:limit] # home mode over-fetches to survive filtering; cap here
|
||||||
|
|||||||
@@ -61,6 +61,19 @@ _ARTICLE_COLUMNS = f"""
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def paywalled_source_ids(conn: sqlite3.Connection) -> list[int]:
|
||||||
|
"""Source ids whose stories are paywalled — the domain rule (PAYWALL_DOMAINS),
|
||||||
|
overridable per source in admin. Computed live so an admin flag takes effect at
|
||||||
|
once. Small set (a handful of sources), so the lookup is cheap."""
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, homepage_url, feed_url, paywall_override FROM sources"
|
||||||
|
).fetchall()
|
||||||
|
return [
|
||||||
|
r["id"] for r in rows
|
||||||
|
if is_paywalled_for_source((r["homepage_url"] or r["feed_url"]), r["paywall_override"])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def feed(
|
def feed(
|
||||||
conn: sqlite3.Connection,
|
conn: sqlite3.Connection,
|
||||||
topic: str | None = None,
|
topic: str | None = None,
|
||||||
@@ -84,6 +97,7 @@ def feed(
|
|||||||
home_country: str | None = None,
|
home_country: str | None = None,
|
||||||
home_state: str | None = None,
|
home_state: str | None = None,
|
||||||
geo_scope: str | None = None, # 'near' | 'country' | 'world' relative to the reader's home
|
geo_scope: str | None = None, # 'near' | 'country' | 'world' relative to the reader's home
|
||||||
|
include_paywalled: bool = False, # default: hide paywalled-source stories (no unreadable news in the feed)
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Return articles with categorical filters applied in SQL.
|
"""Return articles with categorical filters applied in SQL.
|
||||||
|
|
||||||
@@ -106,6 +120,13 @@ def feed(
|
|||||||
fts_join = "JOIN article_search ON article_search.article_id = a.id"
|
fts_join = "JOIN article_search ON article_search.article_id = a.id"
|
||||||
clauses.append("article_search MATCH ?")
|
clauses.append("article_search MATCH ?")
|
||||||
params.append(match)
|
params.append(match)
|
||||||
|
# Hard-exclude paywalled sources (admin-overridable). Added after MATCH so the FTS
|
||||||
|
# bound param keeps leading; the NOT IN list (a handful of ids) follows.
|
||||||
|
if not include_paywalled:
|
||||||
|
pwx = paywalled_source_ids(conn)
|
||||||
|
if pwx:
|
||||||
|
clauses.append("a.source_id NOT IN (%s)" % ",".join("?" * len(pwx)))
|
||||||
|
params.extend(pwx)
|
||||||
if accepted_only:
|
if accepted_only:
|
||||||
clauses.append("s.accepted = 1")
|
clauses.append("s.accepted = 1")
|
||||||
if topic:
|
if topic:
|
||||||
|
|||||||
+10
-7
@@ -532,22 +532,25 @@ def test_source_paywall_override(tmp_path, monkeypatch):
|
|||||||
c.commit(); c.close()
|
c.commit(); c.close()
|
||||||
tc = _signin(app, api, "boss@x.com")
|
tc = _signin(app, api, "boss@x.com")
|
||||||
|
|
||||||
def feed_badge():
|
def in_feed():
|
||||||
return next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"]
|
return any(a["id"] == 2 for a in tc.get("/api/feed?source_id=2").json()["items"])
|
||||||
|
|
||||||
# domain rule: nytimes.com → paywalled in table, inspector, AND feed badge (all agree)
|
# domain rule: nytimes.com → paywalled in the source table + inspector, and HARD-EXCLUDED
|
||||||
|
# from the public feed (we don't surface stories you can't read for free)
|
||||||
assert _src(tc, 2)["paywalled"] is True
|
assert _src(tc, 2)["paywalled"] is True
|
||||||
assert tc.get("/api/admin/sources/2/articles").json()["summary"]["paywalled"] is True
|
assert tc.get("/api/admin/sources/2/articles").json()["summary"]["paywalled"] is True
|
||||||
assert feed_badge() is True
|
assert in_feed() is False
|
||||||
# override 'free' (the NYT Learning fix) → effective OFF everywhere
|
# override 'free' (the NYT Learning fix) → effective OFF: it returns to the feed, no badge
|
||||||
assert tc.post("/api/admin/sources/2/paywall", json={"override": "free"}).json()["override"] == "free"
|
assert tc.post("/api/admin/sources/2/paywall", json={"override": "free"}).json()["override"] == "free"
|
||||||
assert _src(tc, 2)["paywalled"] is False
|
assert _src(tc, 2)["paywalled"] is False
|
||||||
summ = tc.get("/api/admin/sources/2/articles").json()["summary"]
|
summ = tc.get("/api/admin/sources/2/articles").json()["summary"]
|
||||||
assert summ["paywalled"] is False and summ["paywall_domain"] is True and summ["paywall_override"] == "free"
|
assert summ["paywalled"] is False and summ["paywall_domain"] is True and summ["paywall_override"] == "free"
|
||||||
assert feed_badge() is False # ranking/badge now agree it's free
|
assert in_feed() is True
|
||||||
# back to domain rule, and the 'paywalled' override
|
assert next(a for a in tc.get("/api/feed?source_id=2").json()["items"] if a["id"] == 2)["paywalled"] is False
|
||||||
|
# back to domain rule → excluded again
|
||||||
assert tc.post("/api/admin/sources/2/paywall", json={"override": None}).json()["override"] is None
|
assert tc.post("/api/admin/sources/2/paywall", json={"override": None}).json()["override"] is None
|
||||||
assert _src(tc, 2)["paywalled"] is True
|
assert _src(tc, 2)["paywalled"] is True
|
||||||
|
assert in_feed() is False
|
||||||
# validation + 404
|
# validation + 404
|
||||||
assert tc.post("/api/admin/sources/2/paywall", json={"override": "bogus"}).status_code == 422
|
assert tc.post("/api/admin/sources/2/paywall", json={"override": "bogus"}).status_code == 422
|
||||||
assert tc.post("/api/admin/sources/999/paywall", json={"override": "free"}).status_code == 404
|
assert tc.post("/api/admin/sources/999/paywall", json={"override": "free"}).status_code == 404
|
||||||
|
|||||||
Reference in New Issue
Block a user