Small joys backend: shared daily framework + On This Day engine

- goodnews/daily.py: shared helpers for the daily "small joys" (http_json, date-seeded
  deterministic pick, dedup key) so each joy is a small self-contained module.
- goodnews/onthisday.py: harvest today's MM-DD from Wikimedia's On-this-day feed →
  tone-filter to good/neutral (keyword floor + optional LLM refine) → pool → deterministic
  daily pick (idempotent, respects blocked/featured) → cached row. Network/LLM before any
  DB write. Multi-source ready (source column).
- db.py: onthisday_pool + daily_onthisday tables.
- api.py: GET /api/onthisday/today (edge-cacheable).
- cli.py: cycle step (run after Daily Art; --no-joys to skip), LLM client for tone refine.
- tests/test_onthisday.py: 7 tests (filter+dedup, pick idempotent, blocked/featured,
  never-empty, empty-pool, LLM-narrow). 382 backend tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-22 16:51:29 -04:00
parent 4739d87f4b
commit a7da8362ab
6 changed files with 351 additions and 2 deletions
+78
View File
@@ -0,0 +1,78 @@
"""On This Day: harvest tone-filters + dedupes, deterministic daily pick (idempotent,
respects blocked/featured), never-empty get_today, run_daily bootstraps."""
import pytest
from goodnews import onthisday
from goodnews.db import connect, init_db
# md '06-22': three good/neutral + one clearly negative (dropped by the keyword floor).
FAKE = [
{"md": "06-22", "year": 1611, "text": "The first public art museum opens its doors.",
"summary": "A milestone for public culture.", "image_url": "i1", "page_url": "u1"},
{"md": "06-22", "year": 1990, "text": "A new species of butterfly is discovered.",
"summary": None, "image_url": None, "page_url": "u2"},
{"md": "06-22", "year": 1941, "text": "An army invades a neighbour, killing thousands.",
"summary": None, "image_url": None, "page_url": "u3"}, # 'invad' + 'kill' -> dropped
{"md": "06-22", "year": 1868, "text": "An inventor patents a joyful new musical instrument.",
"summary": "s", "image_url": None, "page_url": "u4"},
]
@pytest.fixture
def conn(monkeypatch):
monkeypatch.setattr(onthisday, "_fetch_events", lambda md: [dict(e) for e in FAKE if e["md"] == md])
c = connect(":memory:"); init_db(c)
yield c
c.close()
def test_harvest_filters_negatives_and_dedupes(conn):
r = onthisday.harvest(conn, "06-22")
assert r["fetched"] == 4 and r["kept"] == 3 and r["added"] == 3 # the invasion dropped
assert onthisday.harvest(conn, "06-22")["added"] == 0 # idempotent (content key)
texts = [row[0] for row in conn.execute("SELECT text FROM onthisday_pool").fetchall()]
assert not any("invades" in t for t in texts)
def test_pick_caches_marks_shown_and_idempotent(conn):
onthisday.harvest(conn, "06-22")
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
assert a and a["md"] == "06-22" and a["year"] in (1611, 1990, 1868)
shown = conn.execute("SELECT shown_at FROM onthisday_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
assert shown == "2026-06-22"
assert onthisday.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"] # unchanged
def test_blocked_never_picked(conn):
onthisday.harvest(conn, "06-22")
conn.execute("UPDATE onthisday_pool SET blocked=1 WHERE year!=1990") # only the butterfly left
conn.commit()
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
assert a["year"] == 1990
def test_featured_is_pinned(conn):
onthisday.harvest(conn, "06-22")
conn.execute("UPDATE onthisday_pool SET featured=1 WHERE year=1868")
conn.commit()
# featured wins regardless of the date seed
assert onthisday.pick_daily(conn, feature_date="2026-06-22", force=True)["year"] == 1868
def test_get_today_never_empty(conn):
onthisday.harvest(conn, "06-22")
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
assert onthisday.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"] # falls back to latest
def test_pick_returns_none_when_pool_empty(conn):
assert onthisday.pick_daily(conn, feature_date="2026-03-03") is None # nothing harvested
def test_tone_filter_llm_narrows(conn):
class FakeClient:
def chat_text(self, messages):
return 'sure: {"keep": [0]}' # keep only the first of whatever it's given
kept = onthisday._tone_filter([dict(e) for e in FAKE], client=FakeClient())
# keyword floor drops the invasion (3 remain), then the LLM narrows to 1
assert len(kept) == 1 and kept[0]["year"] == 1611