From cebbed58aba982f3fcb4eedb4110591575637b40 Mon Sep 17 00:00:00 2001 From: jay Date: Tue, 23 Jun 2026 06:08:14 -0400 Subject: [PATCH] WOTD #4/#5 content quality + Editorial Asymmetric /word page (CD) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Content quality ("LLM polishes, dictionary anchors"): - New wotd._polish: rewrites the real dictionary gloss into ONE warm plain sentence + two clear everyday example sentences, grounded in the real definition (no invented meanings). Stored in new wotd_pool/daily_wotd columns gloss + usage, alongside the raw definition/examples which stay the anchor. - harvest() polishes each new word; pick_daily() lazily polishes + caches back any older pooled word that lacks a gloss (client threaded through run_daily). - Admin word-add polishes on insert; re-pick passes an LLM client so quote meaning / word gloss fill on a forced fresh pick. - /api/word/today now prefers gloss + usage, falling back to the raw dictionary def/examples when polish is absent (so it's always safe). - db._migrate adds gloss/usage to wotd_pool + daily_wotd (idempotent ALTER). Frontend — /word redesigned to CD's "Editorial Asymmetric": faded oversized initial bleeding off the right, vertical part-of-speech rail, big Newsreader word, airy definition, left-ruled italic example sentences, outline Listen button + date. (Uses our self-hosted Newsreader/Hanken stack rather than the mockup's Google fonts; the made-up syllable respelling is omitted since we only have real IPA.) Tests: _polish parse/trim/cap, harvest stores gloss/usage, pick lazy-polishes older words, admin gloss flows through to /api/word/today. 403 backend + 27 fe. Co-Authored-By: Claude Opus 4.8 --- frontend/src/routes/word/+page.svelte | 129 ++++++++++++++++---------- goodnews/api.py | 20 ++-- goodnews/db.py | 16 +++- goodnews/wotd.py | 59 ++++++++++-- tests/test_joys_admin.py | 4 + tests/test_wotd.py | 33 +++++++ 6 files changed, 196 insertions(+), 65 deletions(-) diff --git a/frontend/src/routes/word/+page.svelte b/frontend/src/routes/word/+page.svelte index 963fa26..2f2bbb3 100644 --- a/frontend/src/routes/word/+page.svelte +++ b/frontend/src/routes/word/+page.svelte @@ -7,6 +7,12 @@ let state = $state('loading'); // loading | ready | empty const cap = (s) => (s ? s[0].toUpperCase() + s.slice(1) : ''); + const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; + let dateLabel = $derived.by(() => { + if (!w?.date) return ''; + const [, m, d] = w.date.split('-').map(Number); + return MONTHS[m - 1] ? `${MONTHS[m - 1]} ${d}` : ''; + }); onMount(async () => { try { @@ -43,29 +49,41 @@
{#if state === 'ready'} -

Word of the day

-

{cap(w.word)}

-
- {#if w.part_of_speech}{w.part_of_speech}{/if} - {#if w.phonetic}{w.phonetic}{/if} - + +
+ + +
+ Word of the day + {#if dateLabel}{dateLabel}{/if} +
+ +
+ {#if w.part_of_speech}{w.part_of_speech}{/if} +
+

{cap(w.word)}

+
+ {#if w.phonetic}{w.phonetic}{/if} + +
+
+
+ +

{w.definition}

+ + {#if w.examples?.length} +
+
In a sentence
+ {#each w.examples as ex}

{ex}

{/each} +
+ {/if}
- -

{w.definition}

- - {#if w.examples?.length} -
-

In a sentence

-
    - {#each w.examples as ex}
  • {ex}
  • {/each} -
-
- {/if} {:else if state === 'empty'}

Today's word is on its way. Check back soon.

{:else} @@ -75,41 +93,58 @@ diff --git a/goodnews/api.py b/goodnews/api.py index 942cb6f..7833efb 100644 --- a/goodnews/api.py +++ b/goodnews/api.py @@ -2340,12 +2340,14 @@ def create_app() -> FastAPI: response.headers["Cache-Control"] = _PRIVATE raise HTTPException(status_code=404, detail="No word yet.") response.headers["Cache-Control"] = _EDGE_FEED + # Prefer the LLM-polished gloss + everyday sentences; fall back to the raw dictionary. + raw_examples = w.get("usage") or w.get("examples") try: - examples = json.loads(w["examples"]) if w["examples"] else [] + examples = json.loads(raw_examples) if raw_examples else [] except (ValueError, TypeError): examples = [] return {"date": w["feature_date"], "word": w["word"], "part_of_speech": w["part_of_speech"], - "phonetic": w["phonetic"], "definition": w["definition"], "examples": examples, + "phonetic": w["phonetic"], "definition": w.get("gloss") or w["definition"], "examples": examples, "audio_url": f"/api/word/audio/{w['word']}" if w["audio_file"] else None} @app.api_route("/api/word/audio/{word}", methods=["GET", "HEAD"]) @@ -2423,10 +2425,13 @@ def create_app() -> FastAPI: if not info: raise HTTPException(status_code=400, detail="Word not found in dictionary.") audio_file = wotd._cache_audio(info["audio_url"], info["word"]) - conn.execute("INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples) " - "VALUES ('admin',?,?,?,?,?,?,?)", + polished = wotd._polish(LocalModelClient.from_env(), info["word"], info["part_of_speech"], info["definition"]) + gloss = polished["gloss"] if polished else None + usage = json.dumps(polished["examples"]) if polished else None + conn.execute("INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples, gloss, usage) " + "VALUES ('admin',?,?,?,?,?,?,?,?,?)", (info["word"], info["part_of_speech"], info["phonetic"], audio_file, info["audio_url"], - info["definition"], json.dumps(info["examples"]))) + info["definition"], json.dumps(info["examples"]), gloss, usage)) else: raise HTTPException(status_code=404, detail="Unknown joy.") conn.commit() @@ -2443,7 +2448,10 @@ def create_app() -> FastAPI: f"SELECT pool_id FROM {_JOY_DAILY[kind]} WHERE feature_date=?", (local_today(),) ).fetchone() avoid = cur["pool_id"] if cur else None # force a DIFFERENT item, not the same one - picked = mod.pick_daily(conn, force=True, avoid=avoid) + kwargs = {"force": True, "avoid": avoid} + if kind in ("quote", "word"): # these polish lazily (gloss / meaning) + kwargs["client"] = LocalModelClient.from_env() + picked = mod.pick_daily(conn, **kwargs) return {"ok": True, "picked": bool(picked)} @app.get("/api/replacement", response_model=Article | None) diff --git a/goodnews/db.py b/goodnews/db.py index 33de3f9..ff1e6c7 100644 --- a/goodnews/db.py +++ b/goodnews/db.py @@ -338,8 +338,10 @@ CREATE TABLE IF NOT EXISTS wotd_pool ( phonetic TEXT, -- IPA audio_file TEXT, -- our cached pronunciation clip (or null → browser TTS) audio_url TEXT, -- source clip URL - definition TEXT NOT NULL, - examples TEXT, -- JSON array of example sentences + definition TEXT NOT NULL, -- raw dictionary gloss (anchor / ground truth) + examples TEXT, -- JSON array of raw dictionary example sentences (anchor) + gloss TEXT, -- LLM plain-language rewrite of the definition (for display) + usage TEXT, -- JSON array of LLM everyday example sentences (for display) shown_at TEXT, blocked INTEGER NOT NULL DEFAULT 0, featured INTEGER NOT NULL DEFAULT 0, @@ -349,7 +351,7 @@ CREATE TABLE IF NOT EXISTS daily_wotd ( feature_date TEXT PRIMARY KEY, pool_id INTEGER NOT NULL, word TEXT, part_of_speech TEXT, phonetic TEXT, audio_file TEXT, - definition TEXT, examples TEXT, + definition TEXT, examples TEXT, gloss TEXT, usage TEXT, created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); @@ -648,3 +650,11 @@ def _migrate(conn: sqlite3.Connection) -> None: for column in ("what_happened", "why_matters", "why_belongs"): if sum_cols and column not in sum_cols: conn.execute(f"ALTER TABLE article_summaries ADD COLUMN {column} TEXT") + + # WOTD display polish: LLM plain-language gloss + everyday example sentences, kept + # alongside the raw dictionary def/examples (which stay the anchor / ground truth). + for tbl in ("wotd_pool", "daily_wotd"): + cols = {row["name"] for row in conn.execute(f"PRAGMA table_info({tbl})")} + for column in ("gloss", "usage"): + if cols and column not in cols: + conn.execute(f"ALTER TABLE {tbl} ADD COLUMN {column} TEXT") diff --git a/goodnews/wotd.py b/goodnews/wotd.py index 9144dd6..3e0624d 100644 --- a/goodnews/wotd.py +++ b/goodnews/wotd.py @@ -68,6 +68,38 @@ def _propose_words(client, n: int) -> list[dict]: return out +def _polish(client, word: str, part_of_speech: str | None, definition: str) -> dict | None: + """LLM polish for display: rewrite the real dictionary gloss as ONE warm plain sentence, + and write two clear everyday example sentences. Grounded in the real definition (the + dictionary stays the anchor); returns None on any trouble so callers fall back to raw.""" + pos = f" ({part_of_speech})" if part_of_speech else "" + user = ( + f'The word is "{word}"{pos}. Its dictionary definition is: "{definition}".\n' + "1) Rewrite that definition as ONE warm, plain-language sentence that a general reader " + "instantly understands. Stay faithful to the meaning; do not invent extra facts.\n" + "2) Write TWO short, natural example sentences that clearly show the word used in " + "everyday life — concrete and easy to picture, not abstract, archaic, or a proper-noun " + f'title. Each must actually use the word "{word}".\n' + 'Reply with JSON only: {"gloss": "...", "examples": ["...", "..."]}' + ) + try: + txt = client.chat_text([{"role": "user", "content": user}]) + except Exception: # noqa: BLE001 — polish is best-effort; raw dictionary data stands + return None + m = re.search(r"\{.*\}", txt, re.S) + if not m: + return None + try: + data = json.loads(m.group(0)) + except ValueError: + return None + gloss = " ".join(str(data.get("gloss") or "").split()).strip() + examples = [" ".join(str(e).split()).strip() for e in (data.get("examples") or []) if str(e).strip()] + if not gloss: + return None + return {"gloss": gloss, "examples": examples[:2]} + + def _lookup(word: str, prefer_pos: str | None = None) -> dict | None: """Validate + enrich a word via the dictionary. Returns None if it's not a real word. When prefer_pos is given, picks the meaning of that part of speech (the sense the LLM meant).""" @@ -167,12 +199,15 @@ def harvest(conn: sqlite3.Connection, client, count: int = _HARVEST_BATCH) -> di if not info: continue audio_file = _cache_audio(info["audio_url"], info["word"]) + polished = _polish(client, info["word"], info["part_of_speech"], info["definition"]) + gloss = polished["gloss"] if polished else None + usage = json.dumps(polished["examples"]) if polished else None rows.append((info["word"], info["part_of_speech"], info["phonetic"], audio_file, - info["audio_url"], info["definition"], json.dumps(info["examples"]))) + info["audio_url"], info["definition"], json.dumps(info["examples"]), gloss, usage)) before = _pool_count(conn) conn.executemany( - "INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples) " - "VALUES ('llm', ?, ?, ?, ?, ?, ?, ?)", rows, + "INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples, gloss, usage) " + "VALUES ('llm', ?, ?, ?, ?, ?, ?, ?, ?, ?)", rows, ) conn.commit() after = _pool_count(conn) @@ -195,7 +230,7 @@ def _candidates(conn: sqlite3.Connection, avoid: int | None = None) -> list[int] def pick_daily(conn: sqlite3.Connection, feature_date: str | None = None, force: bool = False, - avoid: int | None = None) -> dict | None: + avoid: int | None = None, client=None) -> dict | None: feature_date = feature_date or local_today() existing = conn.execute("SELECT * FROM daily_wotd WHERE feature_date=?", (feature_date,)).fetchone() if existing and not force: @@ -205,14 +240,20 @@ def pick_daily(conn: sqlite3.Connection, feature_date: str | None = None, force: return None pick_id = daily.seeded_order(ids, feature_date)[0] row = conn.execute("SELECT * FROM wotd_pool WHERE id=?", (pick_id,)).fetchone() + gloss, usage = row["gloss"], row["usage"] + if not gloss and client: # lazy polish for older pool words; cached back + polished = _polish(client, row["word"], row["part_of_speech"], row["definition"]) + if polished: + gloss, usage = polished["gloss"], json.dumps(polished["examples"]) + conn.execute("UPDATE wotd_pool SET gloss=?, usage=? WHERE id=?", (gloss, usage, pick_id)) conn.execute( - "INSERT INTO daily_wotd (feature_date, pool_id, word, part_of_speech, phonetic, audio_file, definition, examples) " - "VALUES (?,?,?,?,?,?,?,?) " + "INSERT INTO daily_wotd (feature_date, pool_id, word, part_of_speech, phonetic, audio_file, definition, examples, gloss, usage) " + "VALUES (?,?,?,?,?,?,?,?,?,?) " "ON CONFLICT(feature_date) DO UPDATE SET pool_id=excluded.pool_id, word=excluded.word, " "part_of_speech=excluded.part_of_speech, phonetic=excluded.phonetic, audio_file=excluded.audio_file, " - "definition=excluded.definition, examples=excluded.examples", + "definition=excluded.definition, examples=excluded.examples, gloss=excluded.gloss, usage=excluded.usage", (feature_date, row["id"], row["word"], row["part_of_speech"], row["phonetic"], - row["audio_file"], row["definition"], row["examples"]), + row["audio_file"], row["definition"], row["examples"], gloss, usage), ) conn.execute("UPDATE wotd_pool SET shown_at=? WHERE id=?", (feature_date, pick_id)) conn.commit() @@ -233,5 +274,5 @@ def run_daily(conn: sqlite3.Connection, client=None) -> dict: harvested = None if client and _pool_count(conn) < _TARGET_POOL: harvested = harvest(conn, client) - picked = pick_daily(conn) + picked = pick_daily(conn, client=client) return {"pool": _pool_count(conn), "harvested": harvested, "picked": (picked or {}).get("word")} diff --git a/tests/test_joys_admin.py b/tests/test_joys_admin.py index 0ab947f..2fdecde 100644 --- a/tests/test_joys_admin.py +++ b/tests/test_joys_admin.py @@ -82,6 +82,7 @@ def test_word_add_and_repick(api_app, monkeypatch): "audio_url": None, "definition": "Giving off light; radiant.", "examples": []}} monkeypatch.setattr(wotd, "_lookup", lambda w, prefer_pos=None: fake.get(w)) monkeypatch.setattr(wotd, "_cache_audio", lambda url, word: None) + monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w} means lovely.", "examples": [f"What a {w} morning."]}) tc = _admin(api_app) assert tc.post("/api/admin/joys/word/add", json={"word": "serene"}).json()["ok"] @@ -90,6 +91,9 @@ def test_word_add_and_repick(api_app, monkeypatch): assert {"serene", "luminous"} <= {it.get("word") for it in items} assert tc.post("/api/admin/joys/word/repick").json()["picked"] is True first = tc.get("/api/word/today").json() + # the LLM-polished gloss + sentence are what the page shows (not the raw dictionary def) + assert first["definition"] == f"{first['word']} means lovely." + assert first["examples"] == [f"What a {first['word']} morning."] assert tc.post("/api/admin/joys/word/repick").json()["picked"] is True second = tc.get("/api/word/today").json() assert second["word"] != first["word"] diff --git a/tests/test_wotd.py b/tests/test_wotd.py index 611a899..b834a87 100644 --- a/tests/test_wotd.py +++ b/tests/test_wotd.py @@ -85,3 +85,36 @@ def test_propose_words_accepts_dicts_and_strings(): return '{"words": [{"word": "Serene", "pos": "Adjective"}, "dawn", {"word": ""}]}' out = wotd._propose_words(C(), 3) assert out == [{"word": "serene", "pos": "adjective"}, {"word": "dawn", "pos": None}] + + +def test_harvest_stores_polished_gloss_and_usage(conn, monkeypatch): + monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": f"{w}: plain.", "examples": [f"A {w} day."]}) + wotd.harvest(conn, FakeClient()) + row = conn.execute("SELECT gloss, usage FROM wotd_pool WHERE word='serene'").fetchone() + assert row[0] == "serene: plain." and json.loads(row[1]) == ["A serene day."] + + +def test_pick_lazy_polishes_older_words(conn, monkeypatch): + monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: None) # harvested before polish existed + wotd.harvest(conn, FakeClient()) + assert conn.execute("SELECT gloss FROM wotd_pool WHERE word='serene'").fetchone()[0] is None + monkeypatch.setattr(wotd, "_polish", lambda c, w, pos, d: {"gloss": "calm and untroubled.", "examples": ["The lake was serene."]}) + a = wotd.pick_daily(conn, feature_date="2026-06-22", client=FakeClient()) + assert a["gloss"] == "calm and untroubled." and json.loads(a["usage"]) == ["The lake was serene."] + # cached back to the pool so it's only generated once + assert conn.execute("SELECT gloss FROM wotd_pool WHERE id=?", (a["pool_id"],)).fetchone()[0] == "calm and untroubled." + + +def test_polish_trims_and_caps_two_examples(): + class C: + def chat_text(self, m): + return 'sure: {"gloss": " Calm and peaceful. ", "examples": ["One.", "Two.", "Three."]} done' + out = wotd._polish(C(), "serene", "adjective", "x") + assert out["gloss"] == "Calm and peaceful." and out["examples"] == ["One.", "Two."] + + +def test_polish_returns_none_without_a_gloss(): + class C: + def chat_text(self, m): + return '{"examples": ["x"]}' + assert wotd._polish(C(), "serene", None, "x") is None