cebbed58ab
Content quality ("LLM polishes, dictionary anchors"):
- New wotd._polish: rewrites the real dictionary gloss into ONE warm plain
sentence + two clear everyday example sentences, grounded in the real
definition (no invented meanings). Stored in new wotd_pool/daily_wotd columns
gloss + usage, alongside the raw definition/examples which stay the anchor.
- harvest() polishes each new word; pick_daily() lazily polishes + caches back
any older pooled word that lacks a gloss (client threaded through run_daily).
- Admin word-add polishes on insert; re-pick passes an LLM client so quote
meaning / word gloss fill on a forced fresh pick.
- /api/word/today now prefers gloss + usage, falling back to the raw dictionary
def/examples when polish is absent (so it's always safe).
- db._migrate adds gloss/usage to wotd_pool + daily_wotd (idempotent ALTER).
Frontend — /word redesigned to CD's "Editorial Asymmetric": faded oversized
initial bleeding off the right, vertical part-of-speech rail, big Newsreader
word, airy definition, left-ruled italic example sentences, outline Listen
button + date. (Uses our self-hosted Newsreader/Hanken stack rather than the
mockup's Google fonts; the made-up syllable respelling is omitted since we only
have real IPA.)
Tests: _polish parse/trim/cap, harvest stores gloss/usage, pick lazy-polishes
older words, admin gloss flows through to /api/word/today. 403 backend + 27 fe.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
121 lines
5.6 KiB
Python
121 lines
5.6 KiB
Python
"""Word of the Day: LLM-proposed words validated against the dictionary (mocked),
|
||
harvest dedupes + drops unknowns, audio cached when present, deterministic pick."""
|
||
import json
|
||
|
||
import pytest
|
||
|
||
from goodnews import wotd
|
||
from goodnews.db import connect, init_db
|
||
|
||
FAKE_DICT = {
|
||
"serene": {"word": "serene", "part_of_speech": "adjective", "phonetic": "/səˈriːn/",
|
||
"audio_url": "https://a/serene.mp3", "definition": "calm, peaceful, and untroubled",
|
||
"examples": ["a serene mountain lake"]},
|
||
"dawn": {"word": "dawn", "part_of_speech": "noun", "phonetic": "/dɔːn/",
|
||
"audio_url": None, "definition": "the first appearance of light in the sky", "examples": []},
|
||
}
|
||
|
||
|
||
class FakeClient:
|
||
def chat_text(self, messages):
|
||
return '{"words": ["serene", "dawn", "xyzzyq"]}' # xyzzyq isn't a real word
|
||
|
||
|
||
@pytest.fixture
|
||
def conn(tmp_path, monkeypatch):
|
||
monkeypatch.setenv("GOODNEWS_WOTD_AUDIO", str(tmp_path / "audio"))
|
||
monkeypatch.setattr(wotd, "_lookup", lambda w, prefer_pos=None: FAKE_DICT.get(w)) # no dictionary network
|
||
monkeypatch.setattr(wotd, "_cache_audio", lambda url, word: f"{word}.mp3" if url else None)
|
||
c = connect(":memory:"); init_db(c)
|
||
yield c
|
||
c.close()
|
||
|
||
|
||
def test_harvest_validates_dedupes_and_caches_audio(conn):
|
||
r = wotd.harvest(conn, FakeClient())
|
||
assert r["added"] == 2 # serene + dawn; the nonsense word dropped
|
||
assert wotd.harvest(conn, FakeClient())["added"] == 0 # idempotent (word is UNIQUE)
|
||
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='serene'").fetchone()[0] == "serene.mp3"
|
||
assert conn.execute("SELECT audio_file FROM wotd_pool WHERE word='dawn'").fetchone()[0] is None
|
||
|
||
|
||
def test_pick_caches_marks_shown_idempotent(conn):
|
||
wotd.harvest(conn, FakeClient())
|
||
a = wotd.pick_daily(conn, feature_date="2026-06-22")
|
||
assert a and a["word"] in ("serene", "dawn") and a["definition"]
|
||
assert json.loads(a["examples"]) == FAKE_DICT[a["word"]]["examples"]
|
||
shown = conn.execute("SELECT shown_at FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
|
||
assert shown == "2026-06-22"
|
||
assert wotd.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"]
|
||
|
||
|
||
def test_featured_pinned(conn):
|
||
wotd.harvest(conn, FakeClient())
|
||
conn.execute("UPDATE wotd_pool SET featured=1 WHERE word='dawn'"); conn.commit()
|
||
assert wotd.pick_daily(conn, feature_date="2026-06-22", force=True)["word"] == "dawn"
|
||
|
||
|
||
def test_get_today_never_empty(conn):
|
||
wotd.harvest(conn, FakeClient())
|
||
a = wotd.pick_daily(conn, feature_date="2026-06-22")
|
||
assert wotd.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"]
|
||
|
||
|
||
def test_run_daily_bootstraps(conn):
|
||
r = wotd.run_daily(conn, client=FakeClient())
|
||
assert r["pool"] == 2 and r["picked"] in ("serene", "dawn")
|
||
|
||
|
||
def test_lookup_prefers_intended_pos(monkeypatch):
|
||
"""When the LLM says 'serene' as an adjective, _lookup must pick the adjective sense,
|
||
not an earlier archaic noun sense the dictionary lists first."""
|
||
entry = {"word": "serene", "phonetics": [], "meanings": [
|
||
{"partOfSpeech": "noun", "definitions": [{"definition": "Serenity; clearness; calmness."}]},
|
||
{"partOfSpeech": "adjective", "definitions": [{"definition": "Calm, peaceful, untroubled."}]},
|
||
]}
|
||
monkeypatch.setattr(wotd.daily, "http_json", lambda url, timeout=20: [entry])
|
||
assert wotd._lookup("serene", "adjective")["part_of_speech"] == "adjective"
|
||
assert wotd._lookup("serene", "adjective")["definition"] == "Calm, peaceful, untroubled."
|
||
assert wotd._lookup("serene")["part_of_speech"] == "noun" # no preference → first usable sense
|
||
|
||
|
||
def test_propose_words_accepts_dicts_and_strings():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"words": [{"word": "Serene", "pos": "Adjective"}, "dawn", {"word": ""}]}'
|
||
out = wotd._propose_words(C(), 3)
|
||
assert out == [{"word": "serene", "pos": "adjective"}, {"word": "dawn", "pos": None}]
|
||
|
||
|
||
def test_harvest_stores_polished_gloss_and_usage(conn, monkeypatch):
|
||
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w}: plain.", "examples": [f"A {w} day."]})
|
||
wotd.harvest(conn, FakeClient())
|
||
row = conn.execute("SELECT gloss, usage FROM wotd_pool WHERE word='serene'").fetchone()
|
||
assert row[0] == "serene: plain." and json.loads(row[1]) == ["A serene day."]
|
||
|
||
|
||
def test_pick_lazy_polishes_older_words(conn, monkeypatch):
|
||
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: None) # harvested before polish existed
|
||
wotd.harvest(conn, FakeClient())
|
||
assert conn.execute("SELECT gloss FROM wotd_pool WHERE word='serene'").fetchone()[0] is None
|
||
monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": "calm and untroubled.", "examples": ["The lake was serene."]})
|
||
a = wotd.pick_daily(conn, feature_date="2026-06-22", client=FakeClient())
|
||
assert a["gloss"] == "calm and untroubled." and json.loads(a["usage"]) == ["The lake was serene."]
|
||
# cached back to the pool so it's only generated once
|
||
assert conn.execute("SELECT gloss FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] == "calm and untroubled."
|
||
|
||
|
||
def test_polish_trims_and_caps_two_examples():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return 'sure: {"gloss": " Calm and peaceful. ", "examples": ["One.", "Two.", "Three."]} done'
|
||
out = wotd._polish(C(), "serene", "adjective", "x")
|
||
assert out["gloss"] == "Calm and peaceful." and out["examples"] == ["One.", "Two."]
|
||
|
||
|
||
def test_polish_returns_none_without_a_gloss():
|
||
class C:
|
||
def chat_text(self, m):
|
||
return '{"examples": ["x"]}'
|
||
assert wotd._polish(C(), "serene", None, "x") is None
|