Files
upbeatBytes/tests/test_wotd.py
T
thejayman77 cebbed58ab WOTD #4/#5 content quality + Editorial Asymmetric /word page (CD)
Content quality ("LLM polishes, dictionary anchors"):
- New wotd._polish: rewrites the real dictionary gloss into ONE warm plain
  sentence + two clear everyday example sentences, grounded in the real
  definition (no invented meanings). Stored in new wotd_pool/daily_wotd columns
  gloss + usage, alongside the raw definition/examples which stay the anchor.
- harvest() polishes each new word; pick_daily() lazily polishes + caches back
  any older pooled word that lacks a gloss (client threaded through run_daily).
- Admin word-add polishes on insert; re-pick passes an LLM client so quote
  meaning / word gloss fill on a forced fresh pick.
- /api/word/today now prefers gloss + usage, falling back to the raw dictionary
  def/examples when polish is absent (so it's always safe).
- db._migrate adds gloss/usage to wotd_pool + daily_wotd (idempotent ALTER).

Frontend — /word redesigned to CD's "Editorial Asymmetric": faded oversized
initial bleeding off the right, vertical part-of-speech rail, big Newsreader
word, airy definition, left-ruled italic example sentences, outline Listen
button + date. (Uses our self-hosted Newsreader/Hanken stack rather than the
mockup's Google fonts; the made-up syllable respelling is omitted since we only
have real IPA.)

Tests: _polish parse/trim/cap, harvest stores gloss/usage, pick lazy-polishes
older words, admin gloss flows through to /api/word/today. 403 backend + 27 fe.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 06:08:14 -04:00

121 lines
5.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Word of the Day: LLM-proposed words validated against the dictionary (mocked),
harvest dedupes + drops unknowns, audio cached when present, deterministic pick."""
import json
import pytest
from goodnews import wotd
from goodnews.db import connect, init_db
FAKE_DICT = {
"serene": {"word": "serene", "part_of_speech": "adjective", "phonetic": "/səˈriːn/",
"audio_url": "https://a/serene.mp3", "definition": "calm, peaceful, and untroubled",
"examples": ["a serene mountain lake"]},
"dawn": {"word": "dawn", "part_of_speech": "noun", "phonetic": "/dɔːn/",
"audio_url": None, "definition": "the first appearance of light in the sky", "examples": []},
}
class FakeClient:
def chat_text(self, messages):
return '{"words": ["serene", "dawn", "xyzzyq"]}' # xyzzyq isn't a real word
@pytest.fixture
def conn(tmp_path, monkeypatch):
monkeypatch.setenv("GOODNEWS_WOTD_AUDIO", str(tmp_path / "audio"))
monkeypatch.setattr(wotd, "_lookup", lambda w, prefer_pos=None: FAKE_DICT.get(w)) # no dictionary network
monkeypatch.setattr(wotd, "_cache_audio", lambda url, word: f"{word}.mp3" if url else None)
c = connect(":memory:"); init_db(c)
yield c
c.close()
def test_harvest_validates_dedupes_and_caches_audio(conn):
r = wotd.harvest(conn, FakeClient())
assert r["added"] == 2 # serene + dawn; the nonsense word dropped
assert wotd.harvest(conn, FakeClient())["added"] == 0 # idempotent (word is UNIQUE)
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='serene'").fetchone()[0] == "serene.mp3"
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='dawn'").fetchone()[0] is None
def test_pick_caches_marks_shown_idempotent(conn):
wotd.harvest(conn, FakeClient())
a = wotd.pick_daily(conn, feature_date="2026-06-22")
assert a and a["word"] in ("serene", "dawn") and a["definition"]
assert json.loads(a["examples"]) == FAKE_DICT[a["word"]]["examples"]
shown = conn.execute("SELECT shown_at FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
assert shown == "2026-06-22"
assert wotd.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"]
def test_featured_pinned(conn):
wotd.harvest(conn, FakeClient())
conn.execute("UPDATE wotd_pool SET featured=1 WHERE word='dawn'"); conn.commit()
assert wotd.pick_daily(conn, feature_date="2026-06-22", force=True)["word"] == "dawn"
def test_get_today_never_empty(conn):
wotd.harvest(conn, FakeClient())
a = wotd.pick_daily(conn, feature_date="2026-06-22")
assert wotd.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"]
def test_run_daily_bootstraps(conn):
r = wotd.run_daily(conn, client=FakeClient())
assert r["pool"] == 2 and r["picked"] in ("serene", "dawn")
def test_lookup_prefers_intended_pos(monkeypatch):
"""When the LLM says 'serene' as an adjective, _lookup must pick the adjective sense,
not an earlier archaic noun sense the dictionary lists first."""
entry = {"word": "serene", "phonetics": [], "meanings": [
{"partOfSpeech": "noun", "definitions": [{"definition": "Serenity; clearness; calmness."}]},
{"partOfSpeech": "adjective", "definitions": [{"definition": "Calm, peaceful, untroubled."}]},
]}
monkeypatch.setattr(wotd.daily, "http_json", lambda url, timeout=20: [entry])
assert wotd._lookup("serene", "adjective")["part_of_speech"] == "adjective"
assert wotd._lookup("serene", "adjective")["definition"] == "Calm, peaceful, untroubled."
assert wotd._lookup("serene")["part_of_speech"] == "noun" # no preference → first usable sense
def test_propose_words_accepts_dicts_and_strings():
class C:
def chat_text(self, m):
return '{"words": [{"word": "Serene", "pos": "Adjective"}, "dawn", {"word": ""}]}'
out = wotd._propose_words(C(), 3)
assert out == [{"word": "serene", "pos": "adjective"}, {"word": "dawn", "pos": None}]
def test_harvest_stores_polished_gloss_and_usage(conn, monkeypatch):
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w}: plain.", "examples": [f"A {w} day."]})
wotd.harvest(conn, FakeClient())
row = conn.execute("SELECT gloss, usage FROM wotd_pool WHERE word='serene'").fetchone()
assert row[0] == "serene: plain." and json.loads(row[1]) == ["A serene day."]
def test_pick_lazy_polishes_older_words(conn, monkeypatch):
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: None) # harvested before polish existed
wotd.harvest(conn, FakeClient())
assert conn.execute("SELECT gloss FROM wotd_pool WHERE word='serene'").fetchone()[0] is None
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": "calm and untroubled.", "examples": ["The lake was serene."]})
a = wotd.pick_daily(conn, feature_date="2026-06-22", client=FakeClient())
assert a["gloss"] == "calm and untroubled." and json.loads(a["usage"]) == ["The lake was serene."]
# cached back to the pool so it's only generated once
assert conn.execute("SELECT gloss FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] == "calm and untroubled."
def test_polish_trims_and_caps_two_examples():
class C:
def chat_text(self, m):
return 'sure: {"gloss": " Calm and peaceful. ", "examples": ["One.", "Two.", "Three."]} done'
out = wotd._polish(C(), "serene", "adjective", "x")
assert out["gloss"] == "Calm and peaceful." and out["examples"] == ["One.", "Two."]
def test_polish_returns_none_without_a_gloss():
class C:
def chat_text(self, m):
return '{"examples": ["x"]}'
assert wotd._polish(C(), "serene", None, "x") is None