Files
upbeatBytes/tests/test_paywall_exclusion.py
T
thejayman77 1c1ecefde8 news: harden paywall exclusion at the candidate query + add the missing regressions
Codex's two non-blocking hardening items, folded in before cutover:
- _candidate_articles() now excludes paywalled sources IN-QUERY (before LIMIT 50),
  so flagged stories can't consume candidate slots and leave a full brief thin.
  Dropped the now-redundant post-fetch filter in build_daily_brief.
- Regressions: history retains a viewed paywalled article; sitemap omits a
  paywalled source AND restores it under override="free".
- Aligned test_brief_paywall to the source-level model (paywalled sources carry a
  paywalled homepage, as in production) — it had relied on article-URL detection.

425 backend tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 18:54:53 -04:00

86 lines
4.1 KiB
Python

"""The no-paywall promise across every public discovery path: paywalled sources are
excluded from brief generation, stored-brief retrieval (/today + /api/brief), and both
digest queries — while Saved keeps anything the reader saved, and a 'free' override
restores eligibility."""
from datetime import date
from goodnews.db import connect, init_db
from goodnews import briefs, digest, queries
def _setup(c, pay_override="paywalled"):
"""Source 1 = paywalled (via override, so the test doesn't depend on the domain list),
source 2 = free. One recent accepted article each, both in a stored brief."""
today = date.today().isoformat()
c.execute("INSERT INTO sources (id,name,feed_url,content_visible,paywall_override) VALUES (1,'Pay','http://p/f',1,?)",
(pay_override,))
c.execute("INSERT INTO sources (id,name,feed_url,content_visible) VALUES (2,'Free','http://f/f',1)")
for aid, sid in [(1, 1), (2, 2)]:
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) VALUES (?,?,?,?,?,?)",
(aid, sid, f"http://x/{aid}", f"t{aid}", today + "T12:00:00+00:00", f"h{aid}"))
c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (?,1,'science','discovery')", (aid,))
c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"s{aid}"))
c.commit()
return today
def _store_brief(c, today, ids=(1, 2)):
bid = c.execute("INSERT INTO daily_briefs (brief_date,title) VALUES (?,'t')", (today,)).lastrowid
for rank, aid in enumerate(ids, start=1):
c.execute("INSERT INTO daily_brief_items (brief_id,article_id,rank) VALUES (?,?,?)", (bid, aid, rank))
c.commit()
def test_paywalled_never_stored_in_a_new_brief():
c = connect(":memory:"); init_db(c)
today = _setup(c)
briefs.build_daily_brief(c, brief_date=today, limit=5, replace=True)
stored = [r["article_id"] for r in c.execute("SELECT article_id FROM daily_brief_items")]
assert stored == [2] # paywalled candidate excluded, never written
def test_stored_brief_retrieval_and_digest_omit_paywalled():
c = connect(":memory:"); init_db(c)
today = _setup(c)
_store_brief(c, today) # both stored directly → retrieval/digest must still filter
assert [r["id"] for r in queries.brief(c)["items"]] == [2] # /today + /api/brief
assert [d["id"] for d in digest.digest_items(c, today)] == [2] # morning email
def test_followed_source_email_omits_paywalled():
c = connect(":memory:"); init_db(c)
today = _setup(c)
c.execute("INSERT INTO users (id,email) VALUES (1,'r@x.com')")
c.execute("INSERT INTO user_follows (user_id,kind,value) VALUES (1,'source','1')") # follow the paywalled one
c.execute("INSERT INTO user_follows (user_id,kind,value) VALUES (1,'source','2')")
c.commit()
ids = [d["id"] for d in digest.followed_digest_items(c, 1, exclude_ids=[])]
assert ids == [2] # even a followed paywalled source is omitted from the email
def test_saved_retains_paywalled():
c = connect(":memory:"); init_db(c)
_setup(c)
c.execute("INSERT INTO users (id,email) VALUES (1,'r@x.com')")
c.execute("INSERT INTO saved_articles (user_id,article_id,saved_at) VALUES (1,1,'2026-06-28T00:00:00')")
c.commit()
assert 1 in [r["id"] for r in queries.saved(c, 1)] # you keep what you saved
def test_history_retains_paywalled():
c = connect(":memory:"); init_db(c)
_setup(c)
c.execute("INSERT INTO users (id,email) VALUES (1,'r@x.com')")
c.execute("INSERT INTO user_history (user_id,article_id,at) VALUES (1,1,'2026-06-28T00:00:00')")
c.commit()
assert 1 in [r["id"] for r in queries.history(c, 1)] # a viewed paywalled article stays in history
def test_free_override_restores_eligibility():
c = connect(":memory:"); init_db(c)
today = _setup(c, pay_override="free") # same domain-less source, but marked free
briefs.build_daily_brief(c, brief_date=today, limit=5, replace=True)
stored = [r["article_id"] for r in c.execute("SELECT article_id FROM daily_brief_items")]
assert 1 in stored and 2 in stored
assert {r["id"] for r in queries.brief(c)["items"]} == {1, 2}