from goodnews.db import connect, init_db from goodnews import queries, localtime def test_site_tz_from_env_and_fallback(monkeypatch): monkeypatch.setenv("GOODNEWS_TZ", "America/New_York") assert localtime.site_tz().key == "America/New_York" assert localtime.local_now().utcoffset() is not None # tz-aware # A bogus zone must not crash — fall back to UTC. monkeypatch.setenv("GOODNEWS_TZ", "Not/AZone") assert localtime.site_tz().key == "UTC" def _seed(c): c.execute("INSERT INTO sources (id,name,feed_url,active,default_category) VALUES (1,'Good','http://s/1',1,'science')") c.execute("INSERT INTO sources (id,name,feed_url,active,consecutive_failures,poll_interval_minutes) " "VALUES (2,'Flaky','http://s/2',1,5,60)") # source 1: two accepted (one duplicate → not served), one rejected for aid, dup in [(1, None), (2, None), (3, 1)]: c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,duplicate_of,published_at) " "VALUES (?,1,?,?,?,?,datetime('now'))", (aid, f"u{aid}", f"T{aid}", f"h{aid}", dup)) c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (1,1)") c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (2,0)") c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (3,1)") # duplicate → excluded c.commit() def test_content_stats_counts_served_excluding_duplicates(tmp_path): c = connect(str(tmp_path / "t.db")); init_db(c); _seed(c) cs = queries.content_stats(c) assert cs["served"] == 1 # only article 1 (2 is rejected, 3 is a duplicate) assert cs["total"] == 3 assert cs["rejected"] == 1 assert cs["active_sources"] == 2 def test_source_health_orders_failing_first_and_computes_next_due(tmp_path): c = connect(str(tmp_path / "t.db")); init_db(c); _seed(c) c.execute("INSERT INTO ingest_runs (source_id, finished_at, status) " "VALUES (2, datetime('now','-30 minutes'), 'failed')") c.commit() sh = queries.source_health(c) assert [s["name"] for s in sh][0] == "Flaky" # failing source floats to top flaky = next(s for s in sh if s["name"] == "Flaky") assert flaky["failures"] == 5 # next_due = last_attempt + 60*(1+5)=360 min; attempt was 30m ago → still resting assert flaky["next_due_at"] is not None good = next(s for s in sh if s["name"] == "Good") assert good["served"] == 1 and good["next_due_at"] is None # never attempted → due now # source 1: 2 accepted (art 1, art 3) but art 3 is a duplicate → 50% of accepted were dupes assert good["accepted_dup_rate"] == 50 def test_attention_flags_resting_flagged_and_brief(): from goodnews import queries content = {"served": 100, "with_image": 90, "latest_brief_size": 5} sources = [ {"active": 1, "failures": 3, "review_flag": 0}, {"active": 1, "failures": 0, "review_flag": 1}, {"active": 1, "failures": 0, "review_flag": 0}, ] items = queries._attention(content, sources, feedback_unread=2) texts = " | ".join(i["text"] for i in items) assert "1 source backing off" in texts # one resting assert "1 source flagged" in texts # one flagged assert "brief has only 5" in texts # brief < 7 assert "2 unread feedback" in texts # unread feedback # 90/100 = 90% image coverage → no coverage warning assert "Image coverage" not in texts def test_attention_clear_when_healthy(): from goodnews import queries content = {"served": 100, "with_image": 95, "latest_brief_size": 7} sources = [{"failures": 0, "review_flag": 0}] assert queries._attention(content, sources, feedback_unread=0) == [] def test_attention_richer_items_fire_and_are_quiet_below_threshold(): from datetime import UTC, datetime from goodnews import queries now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC) content = {"served": 100, "with_image": 95, "latest_brief_size": 7} # no site-wide items sources = [ # stale: active, visible, last success 20 days ago {"status": "active", "content_visible": 1, "last_success_at": "2026-05-20 00:00:00"}, # high rejection: 40 ingested, 10% acceptance {"status": "active", "content_visible": 1, "total_articles": 40, "acceptance_rate": 10}, # high duplicate: 30 accepted, 70% accepted-dup {"status": "active", "content_visible": 1, "accepted_total": 30, "accepted_dup_rate": 70}, # thin images: 40 served, 5% coverage {"status": "active", "content_visible": 1, "served": 40, "image_coverage": 5}, # long rate-limit rest: 2 days out {"status": "active", "content_visible": 1, "retry_after_at": "2026-06-11 12:00:00"}, ] texts = " | ".join(i["text"] for i in queries._attention(content, sources, 0, now=now)) assert "haven't updated in over 10 days" in texts assert "accepting under 25%" in texts assert "mostly duplicating" in texts assert "thin image coverage" in texts assert "rate-limited for 12h+" in texts def test_attention_quiet_below_thresholds(): from datetime import UTC, datetime from goodnews import queries now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC) content = {"served": 100, "with_image": 95, "latest_brief_size": 7} sources = [ {"status": "active", "content_visible": 1, "last_success_at": "2026-06-08 00:00:00"}, # 1 day — fresh {"status": "active", "content_visible": 1, "total_articles": 5, "acceptance_rate": 10}, # too little volume {"status": "active", "content_visible": 1, "accepted_total": 3, "accepted_dup_rate": 90}, # too little volume {"status": "active", "content_visible": 1, "served": 5, "image_coverage": 0}, # too little volume {"status": "paused", "content_visible": 1, "last_success_at": "2020-01-01 00:00:00"}, # paused → ignored ] assert queries._attention(content, sources, 0, now=now) == [] def test_attention_ignores_rate_limit_on_paused_or_retired(): from datetime import UTC, datetime from goodnews import queries now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC) content = {"served": 100, "with_image": 95, "latest_brief_size": 7} sources = [ {"status": "paused", "retry_after_at": "2026-06-12 00:00:00"}, # intentionally not polling {"status": "retired", "retry_after_at": "2026-06-12 00:00:00"}, ] # neither should nag about a rate-limit rest assert queries._attention(content, sources, 0, now=now) == [] def test_source_health_paywall_and_image_coverage(tmp_path): import sqlite3 from goodnews.db import connect, init_db from goodnews import queries c = connect(str(tmp_path / "t.db")); init_db(c) # a paywalled-domain source and a free one, each with a served article c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,active) VALUES (1,'NS','http://x/f','https://www.nature.com',1)") c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,active) VALUES (2,'Free','http://y/f','https://goodsite.org',1)") for aid, sid, img in [(1, 1, 'http://i/1.jpg'), (2, 2, None)]: c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) VALUES (?,?,?,?,?,?)", (aid, sid, f'http://u/{aid}', f't{aid}', f'h{aid}', img)) c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (?,1)", (aid,)) c.commit() sh = {s["id"]: s for s in queries.source_health(c)} assert sh[1]["paywalled"] is True and sh[2]["paywalled"] is False assert sh[1]["image_coverage"] == 100 and sh[2]["image_coverage"] == 0