WOTD #4/#5 content quality + Editorial Asymmetric /word page (CD)

Content quality ("LLM polishes, dictionary anchors"):
- New wotd._polish: rewrites the real dictionary gloss into ONE warm plain
  sentence + two clear everyday example sentences, grounded in the real
  definition (no invented meanings). Stored in new wotd_pool/daily_wotd columns
  gloss + usage, alongside the raw definition/examples which stay the anchor.
- harvest() polishes each new word; pick_daily() lazily polishes + caches back
  any older pooled word that lacks a gloss (client threaded through run_daily).
- Admin word-add polishes on insert; re-pick passes an LLM client so quote
  meaning / word gloss fill on a forced fresh pick.
- /api/word/today now prefers gloss + usage, falling back to the raw dictionary
  def/examples when polish is absent (so it's always safe).
- db._migrate adds gloss/usage to wotd_pool + daily_wotd (idempotent ALTER).

Frontend — /word redesigned to CD's "Editorial Asymmetric": faded oversized
initial bleeding off the right, vertical part-of-speech rail, big Newsreader
word, airy definition, left-ruled italic example sentences, outline Listen
button + date. (Uses our self-hosted Newsreader/Hanken stack rather than the
mockup's Google fonts; the made-up syllable respelling is omitted since we only
have real IPA.)

Tests: _polish parse/trim/cap, harvest stores gloss/usage, pick lazy-polishes
older words, admin gloss flows through to /api/word/today. 403 backend + 27 fe.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-23 06:08:14 -04:00
parent e5f3d942e2
commit cebbed58ab
6 changed files with 196 additions and 65 deletions
+14 -6
View File
@@ -2340,12 +2340,14 @@ def create_app() -> FastAPI:
response.headers["Cache-Control"] = _PRIVATE
raise HTTPException(status_code=404, detail="No word yet.")
response.headers["Cache-Control"] = _EDGE_FEED
# Prefer the LLM-polished gloss + everyday sentences; fall back to the raw dictionary.
raw_examples = w.get("usage") or w.get("examples")
try:
examples = json.loads(w["examples"]) if w["examples"] else []
examples = json.loads(raw_examples) if raw_examples else []
except (ValueError, TypeError):
examples = []
return {"date": w["feature_date"], "word": w["word"], "part_of_speech": w["part_of_speech"],
"phonetic": w["phonetic"], "definition": w["definition"], "examples": examples,
"phonetic": w["phonetic"], "definition": w.get("gloss") or w["definition"], "examples": examples,
"audio_url": f"/api/word/audio/{w['word']}" if w["audio_file"] else None}
@app.api_route("/api/word/audio/{word}", methods=["GET", "HEAD"])
@@ -2423,10 +2425,13 @@ def create_app() -> FastAPI:
if not info:
raise HTTPException(status_code=400, detail="Word not found in dictionary.")
audio_file = wotd._cache_audio(info["audio_url"], info["word"])
conn.execute("INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples) "
"VALUES ('admin',?,?,?,?,?,?,?)",
polished = wotd._polish(LocalModelClient.from_env(), info["word"], info["part_of_speech"], info["definition"])
gloss = polished["gloss"] if polished else None
usage = json.dumps(polished["examples"]) if polished else None
conn.execute("INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples, gloss, usage) "
"VALUES ('admin',?,?,?,?,?,?,?,?,?)",
(info["word"], info["part_of_speech"], info["phonetic"], audio_file, info["audio_url"],
info["definition"], json.dumps(info["examples"])))
info["definition"], json.dumps(info["examples"]), gloss, usage))
else:
raise HTTPException(status_code=404, detail="Unknown joy.")
conn.commit()
@@ -2443,7 +2448,10 @@ def create_app() -> FastAPI:
f"SELECT pool_id FROM {_JOY_DAILY[kind]} WHERE feature_date=?", (local_today(),)
).fetchone()
avoid = cur["pool_id"] if cur else None # force a DIFFERENT item, not the same one
picked = mod.pick_daily(conn, force=True, avoid=avoid)
kwargs = {"force": True, "avoid": avoid}
if kind in ("quote", "word"): # these polish lazily (gloss / meaning)
kwargs["client"] = LocalModelClient.from_env()
picked = mod.pick_daily(conn, **kwargs)
return {"ok": True, "picked": bool(picked)}
@app.get("/api/replacement", response_model=Article | None)
+13 -3
View File
@@ -338,8 +338,10 @@ CREATE TABLE IF NOT EXISTS wotd_pool (
phonetic TEXT, -- IPA
audio_file TEXT, -- our cached pronunciation clip (or null → browser TTS)
audio_url TEXT, -- source clip URL
definition TEXT NOT NULL,
examples TEXT, -- JSON array of example sentences
definition TEXT NOT NULL, -- raw dictionary gloss (anchor / ground truth)
examples TEXT, -- JSON array of raw dictionary example sentences (anchor)
gloss TEXT, -- LLM plain-language rewrite of the definition (for display)
usage TEXT, -- JSON array of LLM everyday example sentences (for display)
shown_at TEXT,
blocked INTEGER NOT NULL DEFAULT 0,
featured INTEGER NOT NULL DEFAULT 0,
@@ -349,7 +351,7 @@ CREATE TABLE IF NOT EXISTS daily_wotd (
feature_date TEXT PRIMARY KEY,
pool_id INTEGER NOT NULL,
word TEXT, part_of_speech TEXT, phonetic TEXT, audio_file TEXT,
definition TEXT, examples TEXT,
definition TEXT, examples TEXT, gloss TEXT, usage TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
@@ -648,3 +650,11 @@ def _migrate(conn: sqlite3.Connection) -> None:
for column in ("what_happened", "why_matters", "why_belongs"):
if sum_cols and column not in sum_cols:
conn.execute(f"ALTER TABLE article_summaries ADD COLUMN {column} TEXT")
# WOTD display polish: LLM plain-language gloss + everyday example sentences, kept
# alongside the raw dictionary def/examples (which stay the anchor / ground truth).
for tbl in ("wotd_pool", "daily_wotd"):
cols = {row["name"] for row in conn.execute(f"PRAGMA table_info({tbl})")}
for column in ("gloss", "usage"):
if cols and column not in cols:
conn.execute(f"ALTER TABLE {tbl} ADD COLUMN {column} TEXT")
+50 -9
View File
@@ -68,6 +68,38 @@ def _propose_words(client, n: int) -> list[dict]:
return out
def _polish(client, word: str, part_of_speech: str | None, definition: str) -> dict | None:
"""LLM polish for display: rewrite the real dictionary gloss as ONE warm plain sentence,
and write two clear everyday example sentences. Grounded in the real definition (the
dictionary stays the anchor); returns None on any trouble so callers fall back to raw."""
pos = f" ({part_of_speech})" if part_of_speech else ""
user = (
f'The word is "{word}"{pos}. Its dictionary definition is: "{definition}".\n'
"1) Rewrite that definition as ONE warm, plain-language sentence that a general reader "
"instantly understands. Stay faithful to the meaning; do not invent extra facts.\n"
"2) Write TWO short, natural example sentences that clearly show the word used in "
"everyday life — concrete and easy to picture, not abstract, archaic, or a proper-noun "
f'title. Each must actually use the word "{word}".\n'
'Reply with JSON only: {"gloss": "...", "examples": ["...", "..."]}'
)
try:
txt = client.chat_text([{"role": "user", "content": user}])
except Exception: # noqa: BLE001 — polish is best-effort; raw dictionary data stands
return None
m = re.search(r"\{.*\}", txt, re.S)
if not m:
return None
try:
data = json.loads(m.group(0))
except ValueError:
return None
gloss = " ".join(str(data.get("gloss") or "").split()).strip()
examples = [" ".join(str(e).split()).strip() for e in (data.get("examples") or []) if str(e).strip()]
if not gloss:
return None
return {"gloss": gloss, "examples": examples[:2]}
def _lookup(word: str, prefer_pos: str | None = None) -> dict | None:
"""Validate + enrich a word via the dictionary. Returns None if it's not a real word.
When prefer_pos is given, picks the meaning of that part of speech (the sense the LLM meant)."""
@@ -167,12 +199,15 @@ def harvest(conn: sqlite3.Connection, client, count: int = _HARVEST_BATCH) -> di
if not info:
continue
audio_file = _cache_audio(info["audio_url"], info["word"])
polished = _polish(client, info["word"], info["part_of_speech"], info["definition"])
gloss = polished["gloss"] if polished else None
usage = json.dumps(polished["examples"]) if polished else None
rows.append((info["word"], info["part_of_speech"], info["phonetic"], audio_file,
info["audio_url"], info["definition"], json.dumps(info["examples"])))
info["audio_url"], info["definition"], json.dumps(info["examples"]), gloss, usage))
before = _pool_count(conn)
conn.executemany(
"INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples) "
"VALUES ('llm', ?, ?, ?, ?, ?, ?, ?)", rows,
"INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples, gloss, usage) "
"VALUES ('llm', ?, ?, ?, ?, ?, ?, ?, ?, ?)", rows,
)
conn.commit()
after = _pool_count(conn)
@@ -195,7 +230,7 @@ def _candidates(conn: sqlite3.Connection, avoid: int | None = None) -> list[int]
def pick_daily(conn: sqlite3.Connection, feature_date: str | None = None, force: bool = False,
avoid: int | None = None) -> dict | None:
avoid: int | None = None, client=None) -> dict | None:
feature_date = feature_date or local_today()
existing = conn.execute("SELECT * FROM daily_wotd WHERE feature_date=?", (feature_date,)).fetchone()
if existing and not force:
@@ -205,14 +240,20 @@ def pick_daily(conn: sqlite3.Connection, feature_date: str | None = None, force:
return None
pick_id = daily.seeded_order(ids, feature_date)[0]
row = conn.execute("SELECT * FROM wotd_pool WHERE id=?", (pick_id,)).fetchone()
gloss, usage = row["gloss"], row["usage"]
if not gloss and client: # lazy polish for older pool words; cached back
polished = _polish(client, row["word"], row["part_of_speech"], row["definition"])
if polished:
gloss, usage = polished["gloss"], json.dumps(polished["examples"])
conn.execute("UPDATE wotd_pool SET gloss=?, usage=? WHERE id=?", (gloss, usage, pick_id))
conn.execute(
"INSERT INTO daily_wotd (feature_date, pool_id, word, part_of_speech, phonetic, audio_file, definition, examples) "
"VALUES (?,?,?,?,?,?,?,?) "
"INSERT INTO daily_wotd (feature_date, pool_id, word, part_of_speech, phonetic, audio_file, definition, examples, gloss, usage) "
"VALUES (?,?,?,?,?,?,?,?,?,?) "
"ON CONFLICT(feature_date) DO UPDATE SET pool_id=excluded.pool_id, word=excluded.word, "
"part_of_speech=excluded.part_of_speech, phonetic=excluded.phonetic, audio_file=excluded.audio_file, "
"definition=excluded.definition, examples=excluded.examples",
"definition=excluded.definition, examples=excluded.examples, gloss=excluded.gloss, usage=excluded.usage",
(feature_date, row["id"], row["word"], row["part_of_speech"], row["phonetic"],
row["audio_file"], row["definition"], row["examples"]),
row["audio_file"], row["definition"], row["examples"], gloss, usage),
)
conn.execute("UPDATE wotd_pool SET shown_at=? WHERE id=?", (feature_date, pick_id))
conn.commit()
@@ -233,5 +274,5 @@ def run_daily(conn: sqlite3.Connection, client=None) -> dict:
harvested = None
if client and _pool_count(conn) < _TARGET_POOL:
harvested = harvest(conn, client)
picked = pick_daily(conn)
picked = pick_daily(conn, client=client)
return {"pool": _pool_count(conn), "harvested": harvested, "picked": (picked or {}).get("word")}