"""Word of the Day: LLM-proposed words validated against the dictionary (mocked), harvest dedupes + drops unknowns, audio cached when present, deterministic pick.""" import json import pytest from goodnews import wotd from goodnews.db import connect, init_db FAKE_DICT = { "serene": {"word": "serene", "part_of_speech": "adjective", "phonetic": "/səˈriːn/", "audio_url": "https://a/serene.mp3", "definition": "calm, peaceful, and untroubled", "examples": ["a serene mountain lake"]}, "dawn": {"word": "dawn", "part_of_speech": "noun", "phonetic": "/dɔːn/", "audio_url": None, "definition": "the first appearance of light in the sky", "examples": []}, } class FakeClient: def chat_text(self, messages): return '{"words": ["serene", "dawn", "xyzzyq"]}' # xyzzyq isn't a real word @pytest.fixture def conn(tmp_path, monkeypatch): monkeypatch.setenv("GOODNEWS_WOTD_AUDIO", str(tmp_path / "audio")) monkeypatch.setattr(wotd, "_lookup", lambda w, prefer_pos=None: FAKE_DICT.get(w)) # no dictionary network monkeypatch.setattr(wotd, "_cache_audio", lambda url, word: f"{word}.mp3" if url else None) c = connect(":memory:"); init_db(c) yield c c.close() def test_harvest_validates_dedupes_and_caches_audio(conn): r = wotd.harvest(conn, FakeClient()) assert r["added"] == 2 # serene + dawn; the nonsense word dropped assert wotd.harvest(conn, FakeClient())["added"] == 0 # idempotent (word is UNIQUE) assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='serene'").fetchone()[0] == "serene.mp3" assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='dawn'").fetchone()[0] is None def test_pick_caches_marks_shown_idempotent(conn): wotd.harvest(conn, FakeClient()) a = wotd.pick_daily(conn, feature_date="2026-06-22") assert a and a["word"] in ("serene", "dawn") and a["definition"] assert json.loads(a["examples"]) == FAKE_DICT[a["word"]]["examples"] shown = conn.execute("SELECT shown_at FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] assert shown == "2026-06-22" assert wotd.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"] def test_featured_pinned(conn): wotd.harvest(conn, FakeClient()) conn.execute("UPDATE wotd_pool SET featured=1 WHERE word='dawn'"); conn.commit() assert wotd.pick_daily(conn, feature_date="2026-06-22", force=True)["word"] == "dawn" def test_get_today_never_empty(conn): wotd.harvest(conn, FakeClient()) a = wotd.pick_daily(conn, feature_date="2026-06-22") assert wotd.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"] def test_run_daily_bootstraps(conn): r = wotd.run_daily(conn, client=FakeClient()) assert r["pool"] == 2 and r["picked"] in ("serene", "dawn") def test_lookup_prefers_intended_pos(monkeypatch): """When the LLM says 'serene' as an adjective, _lookup must pick the adjective sense, not an earlier archaic noun sense the dictionary lists first.""" entry = {"word": "serene", "phonetics": [], "meanings": [ {"partOfSpeech": "noun", "definitions": [{"definition": "Serenity; clearness; calmness."}]}, {"partOfSpeech": "adjective", "definitions": [{"definition": "Calm, peaceful, untroubled."}]}, ]} monkeypatch.setattr(wotd.daily, "http_json", lambda url, timeout=20: [entry]) assert wotd._lookup("serene", "adjective")["part_of_speech"] == "adjective" assert wotd._lookup("serene", "adjective")["definition"] == "Calm, peaceful, untroubled." assert wotd._lookup("serene")["part_of_speech"] == "noun" # no preference → first usable sense def test_propose_words_accepts_dicts_and_strings(): class C: def chat_text(self, m): return '{"words": [{"word": "Serene", "pos": "Adjective"}, "dawn", {"word": ""}]}' out = wotd._propose_words(C(), 3) assert out == [{"word": "serene", "pos": "adjective"}, {"word": "dawn", "pos": None}] def test_harvest_stores_polished_gloss_and_usage(conn, monkeypatch): monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w}: plain.", "examples": [f"A {w} day."]}) wotd.harvest(conn, FakeClient()) row = conn.execute("SELECT gloss, usage FROM wotd_pool WHERE word='serene'").fetchone() assert row[0] == "serene: plain." and json.loads(row[1]) == ["A serene day."] def test_pick_lazy_polishes_older_words(conn, monkeypatch): monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: None) # harvested before polish existed wotd.harvest(conn, FakeClient()) assert conn.execute("SELECT gloss FROM wotd_pool WHERE word='serene'").fetchone()[0] is None monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": "calm and untroubled.", "examples": ["The lake was serene."]}) a = wotd.pick_daily(conn, feature_date="2026-06-22", client=FakeClient()) assert a["gloss"] == "calm and untroubled." and json.loads(a["usage"]) == ["The lake was serene."] # cached back to the pool so it's only generated once assert conn.execute("SELECT gloss FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] == "calm and untroubled." def test_polish_trims_and_caps_two_examples(): class C: def chat_text(self, m): return ('sure: {"gloss": " Calm and peaceful. ", "examples": ' '["A serene lake.", "The serene night.", "A serene mood."]} done') out = wotd._polish(C(), "serene", "adjective", "x") assert out["gloss"] == "Calm and peaceful." and out["examples"] == ["A serene lake.", "The serene night."] def test_polish_returns_none_without_a_gloss(): class C: def chat_text(self, m): return '{"examples": ["A serene lake."]}' assert wotd._polish(C(), "serene", None, "x") is None def test_polish_drops_examples_that_dont_use_the_word(): # the word must appear (case-insensitive) — example here keeps only the matching one class C: def chat_text(self, m): return '{"gloss": "Calm.", "examples": ["It was quiet.", "A SERENE harbor."]}' out = wotd._polish(C(), "serene", "adjective", "x") assert out["examples"] == ["A SERENE harbor."] def test_polish_returns_none_when_no_example_uses_the_word(): class C: def chat_text(self, m): return '{"gloss": "A warm clear gloss.", "examples": ["Totally unrelated.", "Still nothing."]}' assert wotd._polish(C(), "serene", "adjective", "x") is None def test_polish_returns_none_with_empty_examples(): class C: def chat_text(self, m): return '{"gloss": "A warm clear gloss.", "examples": []}' assert wotd._polish(C(), "serene", "adjective", "x") is None def test_no_repeat_until_pool_exhausted(conn): """Same freshness guarantee as QOTD: every word featured once before any repeat, then the oldest-shown repeats first. (Regression for 'harmony' repeating after 3 days.)""" import datetime for w in ["alpha", "bravo", "charlie", "delta"]: conn.execute("INSERT INTO wotd_pool (word, definition) VALUES (?, 'a definition')", (w,)) conn.commit() n = 4 d0 = datetime.date(2026, 2, 1) picks = [wotd.pick_daily(conn, feature_date=(d0 + datetime.timedelta(days=i)).isoformat())["pool_id"] for i in range(n)] assert len(set(picks)) == n nxt = (d0 + datetime.timedelta(days=n)).isoformat() assert wotd.pick_daily(conn, feature_date=nxt)["pool_id"] == picks[0]