"""On This Day: harvest tone-filters + dedupes, deterministic daily pick (idempotent, respects blocked/featured), never-empty get_today, run_daily bootstraps.""" import pytest from goodnews import onthisday from goodnews.db import connect, init_db # md '06-22': three good/neutral + one clearly negative (dropped by the keyword floor). FAKE = [ {"md": "06-22", "year": 1611, "text": "The first public art museum opens its doors.", "summary": "A milestone for public culture.", "image_url": "i1", "page_url": "u1"}, {"md": "06-22", "year": 1990, "text": "A new species of butterfly is discovered.", "summary": None, "image_url": None, "page_url": "u2"}, {"md": "06-22", "year": 1941, "text": "An army invades a neighbour, killing thousands.", "summary": None, "image_url": None, "page_url": "u3"}, # 'invad' + 'kill' -> dropped {"md": "06-22", "year": 1868, "text": "An inventor patents a joyful new musical instrument.", "summary": "s", "image_url": None, "page_url": "u4"}, ] @pytest.fixture def conn(monkeypatch): monkeypatch.setattr(onthisday, "_fetch_events", lambda md: [dict(e) for e in FAKE if e["md"] == md]) c = connect(":memory:"); init_db(c) yield c c.close() def test_harvest_filters_negatives_and_dedupes(conn): r = onthisday.harvest(conn, "06-22") assert r["fetched"] == 4 and r["kept"] == 3 and r["added"] == 3 # the invasion dropped assert onthisday.harvest(conn, "06-22")["added"] == 0 # idempotent (content key) texts = [row[0] for row in conn.execute("SELECT text FROM onthisday_pool").fetchall()] assert not any("invades" in t for t in texts) def test_pick_caches_marks_shown_and_idempotent(conn): onthisday.harvest(conn, "06-22") a = onthisday.pick_daily(conn, feature_date="2026-06-22") assert a and a["md"] == "06-22" and a["year"] in (1611, 1990, 1868) shown = conn.execute("SELECT shown_at FROM onthisday_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] assert shown == "2026-06-22" assert onthisday.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"] # unchanged def test_blocked_never_picked(conn): onthisday.harvest(conn, "06-22") conn.execute("UPDATE onthisday_pool SET blocked=1 WHERE year!=1990") # only the butterfly left conn.commit() a = onthisday.pick_daily(conn, feature_date="2026-06-22") assert a["year"] == 1990 def test_featured_is_pinned(conn): onthisday.harvest(conn, "06-22") conn.execute("UPDATE onthisday_pool SET featured=1 WHERE year=1868") conn.commit() # featured wins regardless of the date seed assert onthisday.pick_daily(conn, feature_date="2026-06-22", force=True)["year"] == 1868 def test_get_today_never_empty(conn): onthisday.harvest(conn, "06-22") a = onthisday.pick_daily(conn, feature_date="2026-06-22") assert onthisday.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"] # falls back to latest def test_pick_returns_none_when_pool_empty(conn): assert onthisday.pick_daily(conn, feature_date="2026-03-03") is None # nothing harvested def test_tone_filter_llm_narrows(conn): class FakeClient: def chat_text(self, messages): return 'sure: {"keep": [0]}' # keep only the first of whatever it's given kept = onthisday._tone_filter([dict(e) for e in FAKE], client=FakeClient()) # keyword floor drops the invasion (3 remain), then the LLM narrows to 1 assert len(kept) == 1 and kept[0]["year"] == 1611 def test_best_image_prefers_original_over_thumbnail(): # the 330px thumbnail upscales (blurry); originalimage is reliably sharp → prefer it page = { "thumbnail": {"source": "https://x/thumb/Foo.jpg/330px-Foo.jpg", "width": 330}, "originalimage": {"source": "https://x/thumb/Foo.jpg/3840px-Foo.jpg", "width": 7000}, } assert onthisday._best_image(page) == "https://x/thumb/Foo.jpg/3840px-Foo.jpg" # falls back to the thumbnail when there's no originalimage, and to None when neither exists assert onthisday._best_image({"thumbnail": {"source": "https://x/330px-Foo.jpg"}}) == "https://x/330px-Foo.jpg" assert onthisday._best_image({}) is None