Files
thejayman77 0ae789752e fix: QOTD/WOTD freshness — pick within the freshest cohort, not the rotated pool
Both selectors ordered candidates least-recently-shown, then daily.seeded_order()
ROTATED the whole list and took [0] — an arbitrary date-hashed item, undoing the
ordering. Result: repeats (quote id 2 on 6/28+6/29; word "harmony" on 6/25+6/28),
no guarantee a pool item is shown before it recurs.

Fix: daily.freshest(rows) returns the freshest cohort only — every NEVER-shown
item while any remain, else the oldest-shown group. quote/wotd _candidates use it;
seeded_order now picks deterministically WITHIN that cohort. So every pool item is
featured once before any repeat, then cycles oldest-first. Dropped the unused
_NO_REPEAT_POOL window. Tests: no-repeat-until-exhausted (quote + wotd) + a
freshest() unit test. 428 backend tests green.

(Separate follow-up: expand the QOTD pool from 16 → 90+ vetted public-domain
quotes for a longer no-repeat window.)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 05:39:06 -04:00

161 lines
7.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Word of the Day: LLM-proposed words validated against the dictionary (mocked),
harvest dedupes + drops unknowns, audio cached when present, deterministic pick."""
import json
import pytest
from goodnews import wotd
from goodnews.db import connect, init_db
FAKE_DICT = {
"serene": {"word": "serene", "part_of_speech": "adjective", "phonetic": "/səˈriːn/",
"audio_url": "https://a/serene.mp3", "definition": "calm, peaceful, and untroubled",
"examples": ["a serene mountain lake"]},
"dawn": {"word": "dawn", "part_of_speech": "noun", "phonetic": "/dɔːn/",
"audio_url": None, "definition": "the first appearance of light in the sky", "examples": []},
}
class FakeClient:
def chat_text(self, messages):
return '{"words": ["serene", "dawn", "xyzzyq"]}' # xyzzyq isn't a real word
@pytest.fixture
def conn(tmp_path, monkeypatch):
monkeypatch.setenv("GOODNEWS_WOTD_AUDIO", str(tmp_path / "audio"))
monkeypatch.setattr(wotd, "_lookup", lambda w, prefer_pos=None: FAKE_DICT.get(w)) # no dictionary network
monkeypatch.setattr(wotd, "_cache_audio", lambda url, word: f"{word}.mp3" if url else None)
c = connect(":memory:"); init_db(c)
yield c
c.close()
def test_harvest_validates_dedupes_and_caches_audio(conn):
r = wotd.harvest(conn, FakeClient())
assert r["added"] == 2 # serene + dawn; the nonsense word dropped
assert wotd.harvest(conn, FakeClient())["added"] == 0 # idempotent (word is UNIQUE)
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='serene'").fetchone()[0] == "serene.mp3"
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='dawn'").fetchone()[0] is None
def test_pick_caches_marks_shown_idempotent(conn):
wotd.harvest(conn, FakeClient())
a = wotd.pick_daily(conn, feature_date="2026-06-22")
assert a and a["word"] in ("serene", "dawn") and a["definition"]
assert json.loads(a["examples"]) == FAKE_DICT[a["word"]]["examples"]
shown = conn.execute("SELECT shown_at FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
assert shown == "2026-06-22"
assert wotd.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"]
def test_featured_pinned(conn):
wotd.harvest(conn, FakeClient())
conn.execute("UPDATE wotd_pool SET featured=1 WHERE word='dawn'"); conn.commit()
assert wotd.pick_daily(conn, feature_date="2026-06-22", force=True)["word"] == "dawn"
def test_get_today_never_empty(conn):
wotd.harvest(conn, FakeClient())
a = wotd.pick_daily(conn, feature_date="2026-06-22")
assert wotd.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"]
def test_run_daily_bootstraps(conn):
r = wotd.run_daily(conn, client=FakeClient())
assert r["pool"] == 2 and r["picked"] in ("serene", "dawn")
def test_lookup_prefers_intended_pos(monkeypatch):
"""When the LLM says 'serene' as an adjective, _lookup must pick the adjective sense,
not an earlier archaic noun sense the dictionary lists first."""
entry = {"word": "serene", "phonetics": [], "meanings": [
{"partOfSpeech": "noun", "definitions": [{"definition": "Serenity; clearness; calmness."}]},
{"partOfSpeech": "adjective", "definitions": [{"definition": "Calm, peaceful, untroubled."}]},
]}
monkeypatch.setattr(wotd.daily, "http_json", lambda url, timeout=20: [entry])
assert wotd._lookup("serene", "adjective")["part_of_speech"] == "adjective"
assert wotd._lookup("serene", "adjective")["definition"] == "Calm, peaceful, untroubled."
assert wotd._lookup("serene")["part_of_speech"] == "noun" # no preference → first usable sense
def test_propose_words_accepts_dicts_and_strings():
class C:
def chat_text(self, m):
return '{"words": [{"word": "Serene", "pos": "Adjective"}, "dawn", {"word": ""}]}'
out = wotd._propose_words(C(), 3)
assert out == [{"word": "serene", "pos": "adjective"}, {"word": "dawn", "pos": None}]
def test_harvest_stores_polished_gloss_and_usage(conn, monkeypatch):
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w}: plain.", "examples": [f"A {w} day."]})
wotd.harvest(conn, FakeClient())
row = conn.execute("SELECT gloss, usage FROM wotd_pool WHERE word='serene'").fetchone()
assert row[0] == "serene: plain." and json.loads(row[1]) == ["A serene day."]
def test_pick_lazy_polishes_older_words(conn, monkeypatch):
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: None) # harvested before polish existed
wotd.harvest(conn, FakeClient())
assert conn.execute("SELECT gloss FROM wotd_pool WHERE word='serene'").fetchone()[0] is None
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": "calm and untroubled.", "examples": ["The lake was serene."]})
a = wotd.pick_daily(conn, feature_date="2026-06-22", client=FakeClient())
assert a["gloss"] == "calm and untroubled." and json.loads(a["usage"]) == ["The lake was serene."]
# cached back to the pool so it's only generated once
assert conn.execute("SELECT gloss FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] == "calm and untroubled."
def test_polish_trims_and_caps_two_examples():
class C:
def chat_text(self, m):
return ('sure: {"gloss": " Calm and peaceful. ", "examples": '
'["A serene lake.", "The serene night.", "A serene mood."]} done')
out = wotd._polish(C(), "serene", "adjective", "x")
assert out["gloss"] == "Calm and peaceful." and out["examples"] == ["A serene lake.", "The serene night."]
def test_polish_returns_none_without_a_gloss():
class C:
def chat_text(self, m):
return '{"examples": ["A serene lake."]}'
assert wotd._polish(C(), "serene", None, "x") is None
def test_polish_drops_examples_that_dont_use_the_word():
# the word must appear (case-insensitive) — example here keeps only the matching one
class C:
def chat_text(self, m):
return '{"gloss": "Calm.", "examples": ["It was quiet.", "A SERENE harbor."]}'
out = wotd._polish(C(), "serene", "adjective", "x")
assert out["examples"] == ["A SERENE harbor."]
def test_polish_returns_none_when_no_example_uses_the_word():
class C:
def chat_text(self, m):
return '{"gloss": "A warm clear gloss.", "examples": ["Totally unrelated.", "Still nothing."]}'
assert wotd._polish(C(), "serene", "adjective", "x") is None
def test_polish_returns_none_with_empty_examples():
class C:
def chat_text(self, m):
return '{"gloss": "A warm clear gloss.", "examples": []}'
assert wotd._polish(C(), "serene", "adjective", "x") is None
def test_no_repeat_until_pool_exhausted(conn):
"""Same freshness guarantee as QOTD: every word featured once before any repeat,
then the oldest-shown repeats first. (Regression for 'harmony' repeating after 3 days.)"""
import datetime
for w in ["alpha", "bravo", "charlie", "delta"]:
conn.execute("INSERT INTO wotd_pool (word, definition) VALUES (?, 'a definition')", (w,))
conn.commit()
n = 4
d0 = datetime.date(2026, 2, 1)
picks = [wotd.pick_daily(conn, feature_date=(d0 + datetime.timedelta(days=i)).isoformat())["pool_id"]
for i in range(n)]
assert len(set(picks)) == n
nxt = (d0 + datetime.timedelta(days=n)).isoformat()
assert wotd.pick_daily(conn, feature_date=nxt)["pool_id"] == picks[0]