Files
upbeatBytes/tests/test_paywall_exclusion.py
T
thejayman77 c600145ba5 news: close the remaining no-paywall bypass paths (Codex audit)
queries.feed was the main chokepoint, but several discovery paths have their own
SQL. Apply the shared source exclusion to all of them so "no paywalls" is truly
site-wide:
- briefs.build_daily_brief: EXCLUDE paywalled candidates (was: demote) — never
  stored in a new brief.
- queries.brief: stored-brief retrieval (covers /today + /api/brief) filters the
  paywalled source.
- digest.digest_items + followed_digest_items: the morning email + "from what you
  follow" omit paywalled sources.
- sitemap(): paywalled article pages excluded from the sitemap.
All reuse queries.paywalled_source_ids (admin override still wins).

Regression tests (test_paywall_exclusion.py): never stored in a new brief; /today
+ digest omit it; followed-source email omits it; Saved retains it; 'free'
override restores eligibility. 423 backend tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 17:22:52 -04:00

77 lines
3.8 KiB
Python

"""The no-paywall promise across every public discovery path: paywalled sources are
excluded from brief generation, stored-brief retrieval (/today + /api/brief), and both
digest queries — while Saved keeps anything the reader saved, and a 'free' override
restores eligibility."""
from datetime import date
from goodnews.db import connect, init_db
from goodnews import briefs, digest, queries
def _setup(c, pay_override="paywalled"):
"""Source 1 = paywalled (via override, so the test doesn't depend on the domain list),
source 2 = free. One recent accepted article each, both in a stored brief."""
today = date.today().isoformat()
c.execute("INSERT INTO sources (id,name,feed_url,content_visible,paywall_override) VALUES (1,'Pay','http://p/f',1,?)",
(pay_override,))
c.execute("INSERT INTO sources (id,name,feed_url,content_visible) VALUES (2,'Free','http://f/f',1)")
for aid, sid in [(1, 1), (2, 2)]:
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) VALUES (?,?,?,?,?,?)",
(aid, sid, f"http://x/{aid}", f"t{aid}", today + "T12:00:00+00:00", f"h{aid}"))
c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (?,1,'science','discovery')", (aid,))
c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"s{aid}"))
c.commit()
return today
def _store_brief(c, today, ids=(1, 2)):
bid = c.execute("INSERT INTO daily_briefs (brief_date,title) VALUES (?,'t')", (today,)).lastrowid
for rank, aid in enumerate(ids, start=1):
c.execute("INSERT INTO daily_brief_items (brief_id,article_id,rank) VALUES (?,?,?)", (bid, aid, rank))
c.commit()
def test_paywalled_never_stored_in_a_new_brief():
c = connect(":memory:"); init_db(c)
today = _setup(c)
briefs.build_daily_brief(c, brief_date=today, limit=5, replace=True)
stored = [r["article_id"] for r in c.execute("SELECT article_id FROM daily_brief_items")]
assert stored == [2] # paywalled candidate excluded, never written
def test_stored_brief_retrieval_and_digest_omit_paywalled():
c = connect(":memory:"); init_db(c)
today = _setup(c)
_store_brief(c, today) # both stored directly → retrieval/digest must still filter
assert [r["id"] for r in queries.brief(c)["items"]] == [2] # /today + /api/brief
assert [d["id"] for d in digest.digest_items(c, today)] == [2] # morning email
def test_followed_source_email_omits_paywalled():
c = connect(":memory:"); init_db(c)
today = _setup(c)
c.execute("INSERT INTO users (id,email) VALUES (1,'r@x.com')")
c.execute("INSERT INTO user_follows (user_id,kind,value) VALUES (1,'source','1')") # follow the paywalled one
c.execute("INSERT INTO user_follows (user_id,kind,value) VALUES (1,'source','2')")
c.commit()
ids = [d["id"] for d in digest.followed_digest_items(c, 1, exclude_ids=[])]
assert ids == [2] # even a followed paywalled source is omitted from the email
def test_saved_retains_paywalled():
c = connect(":memory:"); init_db(c)
_setup(c)
c.execute("INSERT INTO users (id,email) VALUES (1,'r@x.com')")
c.execute("INSERT INTO saved_articles (user_id,article_id,saved_at) VALUES (1,1,'2026-06-28T00:00:00')")
c.commit()
assert 1 in [r["id"] for r in queries.saved(c, 1)] # you keep what you saved
def test_free_override_restores_eligibility():
c = connect(":memory:"); init_db(c)
today = _setup(c, pay_override="free") # same domain-less source, but marked free
briefs.build_daily_brief(c, brief_date=today, limit=5, replace=True)
stored = [r["article_id"] for r in c.execute("SELECT article_id FROM daily_brief_items")]
assert 1 in stored and 2 in stored
assert {r["id"] for r in queries.brief(c)["items"]} == {1, 2}