1c1ecefde8
Codex's two non-blocking hardening items, folded in before cutover: - _candidate_articles() now excludes paywalled sources IN-QUERY (before LIMIT 50), so flagged stories can't consume candidate slots and leave a full brief thin. Dropped the now-redundant post-fetch filter in build_daily_brief. - Regressions: history retains a viewed paywalled article; sitemap omits a paywalled source AND restores it under override="free". - Aligned test_brief_paywall to the source-level model (paywalled sources carry a paywalled homepage, as in production) — it had relied on article-URL detection. 425 backend tests green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
40 lines
2.0 KiB
Python
40 lines
2.0 KiB
Python
from datetime import date
|
|
|
|
from goodnews.db import connect, init_db
|
|
from goodnews.briefs import build_daily_brief, show_brief
|
|
from goodnews.paywall import is_paywalled
|
|
|
|
|
|
def test_brief_prefers_readable_over_higher_scored_paywalled():
|
|
c = connect(":memory:"); init_db(c)
|
|
today = date.today().isoformat()
|
|
# Sources 1-2 are paywalled by their homepage domain (matches production: a paywalled
|
|
# source has a paywalled site); 3-7 are free.
|
|
homes = {1: "https://www.newscientist.com/", 2: "https://www.nature.com/"}
|
|
for sid in range(1, 8):
|
|
c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,trust_score) VALUES (?,?,?,?,5)",
|
|
(sid, f"S{sid}", f"http://s{sid}/f", homes.get(sid)))
|
|
|
|
def add(aid, sid, url, score):
|
|
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) "
|
|
"VALUES (?,?,?,?,?,?)", (aid, sid, url, f"t{aid}", today + "T12:00:00+00:00", f"h{aid}"))
|
|
c.execute("INSERT INTO article_scores (article_id,constructive_score,agency_score,human_benefit_score,"
|
|
"cortisol_score,ragebait_score,pr_risk_score,accepted,topic,flavor) "
|
|
"VALUES (?,?,?,?,0,0,2,1,'science','discovery')", (aid, score, score, score))
|
|
|
|
# Paywalled sources are scored HIGHER — they must still be EXCLUDED, leaving the five readable.
|
|
add(1, 1, "https://www.newscientist.com/a", 9)
|
|
add(2, 2, "https://www.nature.com/b", 9)
|
|
add(3, 3, "https://phys.org/c", 4)
|
|
add(4, 4, "https://www.goodnewsnetwork.org/d", 4)
|
|
add(5, 5, "https://e360.yale.edu/e", 4)
|
|
add(6, 6, "https://news.mongabay.com/f", 4)
|
|
add(7, 7, "https://theconversation.com/g", 4)
|
|
c.commit()
|
|
|
|
build_daily_brief(c, brief_date=today, limit=5, replace=True)
|
|
urls = [r["canonical_url"] for r in show_brief(c, brief_date=today, limit=10)]
|
|
c.close()
|
|
assert len(urls) == 5
|
|
assert not any(is_paywalled(u) for u in urls) # paywalled sources excluded; five readable fill
|