Files
upbeatBytes/tests/test_dashboard.py
T
thejayman77 eacf91225a Sources table: Media column (image coverage % + paywall marker)
Per Codex — make the table more decision-ready from data we already have.
Paywall is a domain-level hint, so it's a per-source flag (not a meaningful
rate): show image-coverage % plus a 🔒 marker for subscription domains in one
compact "Media" column (tooltip spells it out). source_health gains a
`paywalled` flag (is_paywalled on homepage/feed); also added to sources.csv.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 14:58:19 -04:00

148 lines
7.5 KiB
Python

from goodnews.db import connect, init_db
from goodnews import queries, localtime
def test_site_tz_from_env_and_fallback(monkeypatch):
monkeypatch.setenv("GOODNEWS_TZ", "America/New_York")
assert localtime.site_tz().key == "America/New_York"
assert localtime.local_now().utcoffset() is not None # tz-aware
# A bogus zone must not crash — fall back to UTC.
monkeypatch.setenv("GOODNEWS_TZ", "Not/AZone")
assert localtime.site_tz().key == "UTC"
def _seed(c):
c.execute("INSERT INTO sources (id,name,feed_url,active,default_category) VALUES (1,'Good','http://s/1',1,'science')")
c.execute("INSERT INTO sources (id,name,feed_url,active,consecutive_failures,poll_interval_minutes) "
"VALUES (2,'Flaky','http://s/2',1,5,60)")
# source 1: two accepted (one duplicate → not served), one rejected
for aid, dup in [(1, None), (2, None), (3, 1)]:
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,duplicate_of,published_at) "
"VALUES (?,1,?,?,?,?,datetime('now'))", (aid, f"u{aid}", f"T{aid}", f"h{aid}", dup))
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (1,1)")
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (2,0)")
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (3,1)") # duplicate → excluded
c.commit()
def test_content_stats_counts_served_excluding_duplicates(tmp_path):
c = connect(str(tmp_path / "t.db")); init_db(c); _seed(c)
cs = queries.content_stats(c)
assert cs["served"] == 1 # only article 1 (2 is rejected, 3 is a duplicate)
assert cs["total"] == 3
assert cs["rejected"] == 1
assert cs["active_sources"] == 2
def test_source_health_orders_failing_first_and_computes_next_due(tmp_path):
c = connect(str(tmp_path / "t.db")); init_db(c); _seed(c)
c.execute("INSERT INTO ingest_runs (source_id, finished_at, status) "
"VALUES (2, datetime('now','-30 minutes'), 'failed')")
c.commit()
sh = queries.source_health(c)
assert [s["name"] for s in sh][0] == "Flaky" # failing source floats to top
flaky = next(s for s in sh if s["name"] == "Flaky")
assert flaky["failures"] == 5
# next_due = last_attempt + 60*(1+5)=360 min; attempt was 30m ago → still resting
assert flaky["next_due_at"] is not None
good = next(s for s in sh if s["name"] == "Good")
assert good["served"] == 1 and good["next_due_at"] is None # never attempted → due now
# source 1: 2 accepted (art 1, art 3) but art 3 is a duplicate → 50% of accepted were dupes
assert good["accepted_dup_rate"] == 50
def test_attention_flags_resting_flagged_and_brief():
from goodnews import queries
content = {"served": 100, "with_image": 90, "latest_brief_size": 5}
sources = [
{"active": 1, "failures": 3, "review_flag": 0},
{"active": 1, "failures": 0, "review_flag": 1},
{"active": 1, "failures": 0, "review_flag": 0},
]
items = queries._attention(content, sources, feedback_unread=2)
texts = " | ".join(i["text"] for i in items)
assert "1 source backing off" in texts # one resting
assert "1 source flagged" in texts # one flagged
assert "brief has only 5" in texts # brief < 7
assert "2 unread feedback" in texts # unread feedback
# 90/100 = 90% image coverage → no coverage warning
assert "Image coverage" not in texts
def test_attention_clear_when_healthy():
from goodnews import queries
content = {"served": 100, "with_image": 95, "latest_brief_size": 7}
sources = [{"failures": 0, "review_flag": 0}]
assert queries._attention(content, sources, feedback_unread=0) == []
def test_attention_richer_items_fire_and_are_quiet_below_threshold():
from datetime import UTC, datetime
from goodnews import queries
now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC)
content = {"served": 100, "with_image": 95, "latest_brief_size": 7} # no site-wide items
sources = [
# stale: active, visible, last success 20 days ago
{"status": "active", "content_visible": 1, "last_success_at": "2026-05-20 00:00:00"},
# high rejection: 40 ingested, 10% acceptance
{"status": "active", "content_visible": 1, "total_articles": 40, "acceptance_rate": 10},
# high duplicate: 30 accepted, 70% accepted-dup
{"status": "active", "content_visible": 1, "accepted_total": 30, "accepted_dup_rate": 70},
# thin images: 40 served, 5% coverage
{"status": "active", "content_visible": 1, "served": 40, "image_coverage": 5},
# long rate-limit rest: 2 days out
{"status": "active", "content_visible": 1, "retry_after_at": "2026-06-11 12:00:00"},
]
texts = " | ".join(i["text"] for i in queries._attention(content, sources, 0, now=now))
assert "haven't updated in over 10 days" in texts
assert "accepting under 25%" in texts
assert "mostly duplicating" in texts
assert "thin image coverage" in texts
assert "rate-limited for 12h+" in texts
def test_attention_quiet_below_thresholds():
from datetime import UTC, datetime
from goodnews import queries
now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC)
content = {"served": 100, "with_image": 95, "latest_brief_size": 7}
sources = [
{"status": "active", "content_visible": 1, "last_success_at": "2026-06-08 00:00:00"}, # 1 day — fresh
{"status": "active", "content_visible": 1, "total_articles": 5, "acceptance_rate": 10}, # too little volume
{"status": "active", "content_visible": 1, "accepted_total": 3, "accepted_dup_rate": 90}, # too little volume
{"status": "active", "content_visible": 1, "served": 5, "image_coverage": 0}, # too little volume
{"status": "paused", "content_visible": 1, "last_success_at": "2020-01-01 00:00:00"}, # paused → ignored
]
assert queries._attention(content, sources, 0, now=now) == []
def test_attention_ignores_rate_limit_on_paused_or_retired():
from datetime import UTC, datetime
from goodnews import queries
now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC)
content = {"served": 100, "with_image": 95, "latest_brief_size": 7}
sources = [
{"status": "paused", "retry_after_at": "2026-06-12 00:00:00"}, # intentionally not polling
{"status": "retired", "retry_after_at": "2026-06-12 00:00:00"},
]
# neither should nag about a rate-limit rest
assert queries._attention(content, sources, 0, now=now) == []
def test_source_health_paywall_and_image_coverage(tmp_path):
import sqlite3
from goodnews.db import connect, init_db
from goodnews import queries
c = connect(str(tmp_path / "t.db")); init_db(c)
# a paywalled-domain source and a free one, each with a served article
c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,active) VALUES (1,'NS','http://x/f','https://www.nature.com',1)")
c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,active) VALUES (2,'Free','http://y/f','https://goodsite.org',1)")
for aid, sid, img in [(1, 1, 'http://i/1.jpg'), (2, 2, None)]:
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) VALUES (?,?,?,?,?,?)",
(aid, sid, f'http://u/{aid}', f't{aid}', f'h{aid}', img))
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (?,1)", (aid,))
c.commit()
sh = {s["id"]: s for s in queries.source_health(c)}
assert sh[1]["paywalled"] is True and sh[2]["paywalled"] is False
assert sh[1]["image_coverage"] == 100 and sh[2]["image_coverage"] == 0