0ae789752e
Both selectors ordered candidates least-recently-shown, then daily.seeded_order() ROTATED the whole list and took [0] — an arbitrary date-hashed item, undoing the ordering. Result: repeats (quote id 2 on 6/28+6/29; word "harmony" on 6/25+6/28), no guarantee a pool item is shown before it recurs. Fix: daily.freshest(rows) returns the freshest cohort only — every NEVER-shown item while any remain, else the oldest-shown group. quote/wotd _candidates use it; seeded_order now picks deterministically WITHIN that cohort. So every pool item is featured once before any repeat, then cycles oldest-first. Dropped the unused _NO_REPEAT_POOL window. Tests: no-repeat-until-exhausted (quote + wotd) + a freshest() unit test. 428 backend tests green. (Separate follow-up: expand the QOTD pool from 16 → 90+ vetted public-domain quotes for a longer no-repeat window.) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
161 lines
7.4 KiB
Python
161 lines
7.4 KiB
Python
"""Word of the Day: LLM-proposed words validated against the dictionary (mocked),
|
||
harvest dedupes + drops unknowns, audio cached when present, deterministic pick."""
|
||
import json
|
||
|
||
import pytest
|
||
|
||
from goodnews import wotd
|
||
from goodnews.db import connect, init_db
|
||
|
||
FAKE_DICT = {
|
||
"serene": {"word": "serene", "part_of_speech": "adjective", "phonetic": "/səˈriːn/",
|
||
"audio_url": "https://a/serene.mp3", "definition": "calm, peaceful, and untroubled",
|
||
"examples": ["a serene mountain lake"]},
|
||
"dawn": {"word": "dawn", "part_of_speech": "noun", "phonetic": "/dɔːn/",
|
||
"audio_url": None, "definition": "the first appearance of light in the sky", "examples": []},
|
||
}
|
||
|
||
|
||
class FakeClient:
|
||
def chat_text(self, messages):
|
||
return '{"words": ["serene", "dawn", "xyzzyq"]}' # xyzzyq isn't a real word
|
||
|
||
|
||
@pytest.fixture
|
||
def conn(tmp_path, monkeypatch):
|
||
monkeypatch.setenv("GOODNEWS_WOTD_AUDIO", str(tmp_path / "audio"))
|
||
monkeypatch.setattr(wotd, "_lookup", lambda w, prefer_pos=None: FAKE_DICT.get(w)) # no dictionary network
|
||
monkeypatch.setattr(wotd, "_cache_audio", lambda url, word: f"{word}.mp3" if url else None)
|
||
c = connect(":memory:"); init_db(c)
|
||
yield c
|
||
c.close()
|
||
|
||
|
||
def test_harvest_validates_dedupes_and_caches_audio(conn):
|
||
r = wotd.harvest(conn, FakeClient())
|
||
assert r["added"] == 2 # serene + dawn; the nonsense word dropped
|
||
assert wotd.harvest(conn, FakeClient())["added"] == 0 # idempotent (word is UNIQUE)
|
||
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='serene'").fetchone()[0] == "serene.mp3"
|
||
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='dawn'").fetchone()[0] is None
|
||
|
||
|
||
def test_pick_caches_marks_shown_idempotent(conn):
|
||
wotd.harvest(conn, FakeClient())
|
||
a = wotd.pick_daily(conn, feature_date="2026-06-22")
|
||
assert a and a["word"] in ("serene", "dawn") and a["definition"]
|
||
assert json.loads(a["examples"]) == FAKE_DICT[a["word"]]["examples"]
|
||
shown = conn.execute("SELECT shown_at FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
|
||
assert shown == "2026-06-22"
|
||
assert wotd.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"]
|
||
|
||
|
||
def test_featured_pinned(conn):
|
||
wotd.harvest(conn, FakeClient())
|
||
conn.execute("UPDATE wotd_pool SET featured=1 WHERE word='dawn'"); conn.commit()
|
||
assert wotd.pick_daily(conn, feature_date="2026-06-22", force=True)["word"] == "dawn"
|
||
|
||
|
||
def test_get_today_never_empty(conn):
|
||
wotd.harvest(conn, FakeClient())
|
||
a = wotd.pick_daily(conn, feature_date="2026-06-22")
|
||
assert wotd.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"]
|
||
|
||
|
||
def test_run_daily_bootstraps(conn):
|
||
r = wotd.run_daily(conn, client=FakeClient())
|
||
assert r["pool"] == 2 and r["picked"] in ("serene", "dawn")
|
||
|
||
|
||
def test_lookup_prefers_intended_pos(monkeypatch):
|
||
"""When the LLM says 'serene' as an adjective, _lookup must pick the adjective sense,
|
||
not an earlier archaic noun sense the dictionary lists first."""
|
||
entry = {"word": "serene", "phonetics": [], "meanings": [
|
||
{"partOfSpeech": "noun", "definitions": [{"definition": "Serenity; clearness; calmness."}]},
|
||
{"partOfSpeech": "adjective", "definitions": [{"definition": "Calm, peaceful, untroubled."}]},
|
||
]}
|
||
monkeypatch.setattr(wotd.daily, "http_json", lambda url, timeout=20: [entry])
|
||
assert wotd._lookup("serene", "adjective")["part_of_speech"] == "adjective"
|
||
assert wotd._lookup("serene", "adjective")["definition"] == "Calm, peaceful, untroubled."
|
||
assert wotd._lookup("serene")["part_of_speech"] == "noun" # no preference → first usable sense
|
||
|
||
|
||
def test_propose_words_accepts_dicts_and_strings():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"words": [{"word": "Serene", "pos": "Adjective"}, "dawn", {"word": ""}]}'
|
||
out = wotd._propose_words(C(), 3)
|
||
assert out == [{"word": "serene", "pos": "adjective"}, {"word": "dawn", "pos": None}]
|
||
|
||
|
||
def test_harvest_stores_polished_gloss_and_usage(conn, monkeypatch):
|
||
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w}: plain.", "examples": [f"A {w} day."]})
|
||
wotd.harvest(conn, FakeClient())
|
||
row = conn.execute("SELECT gloss, usage FROM wotd_pool WHERE word='serene'").fetchone()
|
||
assert row[0] == "serene: plain." and json.loads(row[1]) == ["A serene day."]
|
||
|
||
|
||
def test_pick_lazy_polishes_older_words(conn, monkeypatch):
|
||
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: None) # harvested before polish existed
|
||
wotd.harvest(conn, FakeClient())
|
||
assert conn.execute("SELECT gloss FROM wotd_pool WHERE word='serene'").fetchone()[0] is None
|
||
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": "calm and untroubled.", "examples": ["The lake was serene."]})
|
||
a = wotd.pick_daily(conn, feature_date="2026-06-22", client=FakeClient())
|
||
assert a["gloss"] == "calm and untroubled." and json.loads(a["usage"]) == ["The lake was serene."]
|
||
# cached back to the pool so it's only generated once
|
||
assert conn.execute("SELECT gloss FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] == "calm and untroubled."
|
||
|
||
|
||
def test_polish_trims_and_caps_two_examples():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return ('sure: {"gloss": " Calm and peaceful. ", "examples": '
|
||
'["A serene lake.", "The serene night.", "A serene mood."]} done')
|
||
out = wotd._polish(C(), "serene", "adjective", "x")
|
||
assert out["gloss"] == "Calm and peaceful." and out["examples"] == ["A serene lake.", "The serene night."]
|
||
|
||
|
||
def test_polish_returns_none_without_a_gloss():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"examples": ["A serene lake."]}'
|
||
assert wotd._polish(C(), "serene", None, "x") is None
|
||
|
||
|
||
def test_polish_drops_examples_that_dont_use_the_word():
|
||
# the word must appear (case-insensitive) — example here keeps only the matching one
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"gloss": "Calm.", "examples": ["It was quiet.", "A SERENE harbor."]}'
|
||
out = wotd._polish(C(), "serene", "adjective", "x")
|
||
assert out["examples"] == ["A SERENE harbor."]
|
||
|
||
|
||
def test_polish_returns_none_when_no_example_uses_the_word():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"gloss": "A warm clear gloss.", "examples": ["Totally unrelated.", "Still nothing."]}'
|
||
assert wotd._polish(C(), "serene", "adjective", "x") is None
|
||
|
||
|
||
def test_polish_returns_none_with_empty_examples():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"gloss": "A warm clear gloss.", "examples": []}'
|
||
assert wotd._polish(C(), "serene", "adjective", "x") is None
|
||
|
||
|
||
def test_no_repeat_until_pool_exhausted(conn):
|
||
"""Same freshness guarantee as QOTD: every word featured once before any repeat,
|
||
then the oldest-shown repeats first. (Regression for 'harmony' repeating after 3 days.)"""
|
||
import datetime
|
||
for w in ["alpha", "bravo", "charlie", "delta"]:
|
||
conn.execute("INSERT INTO wotd_pool (word, definition) VALUES (?, 'a definition')", (w,))
|
||
conn.commit()
|
||
n = 4
|
||
d0 = datetime.date(2026, 2, 1)
|
||
picks = [wotd.pick_daily(conn, feature_date=(d0 + datetime.timedelta(days=i)).isoformat())["pool_id"]
|
||
for i in range(n)]
|
||
assert len(set(picks)) == n
|
||
nxt = (d0 + datetime.timedelta(days=n)).isoformat()
|
||
assert wotd.pick_daily(conn, feature_date=nxt)["pool_id"] == picks[0]
|