WOTD #4/#5 content quality + Editorial Asymmetric /word page (CD)
Content quality ("LLM polishes, dictionary anchors"):
- New wotd._polish: rewrites the real dictionary gloss into ONE warm plain
sentence + two clear everyday example sentences, grounded in the real
definition (no invented meanings). Stored in new wotd_pool/daily_wotd columns
gloss + usage, alongside the raw definition/examples which stay the anchor.
- harvest() polishes each new word; pick_daily() lazily polishes + caches back
any older pooled word that lacks a gloss (client threaded through run_daily).
- Admin word-add polishes on insert; re-pick passes an LLM client so quote
meaning / word gloss fill on a forced fresh pick.
- /api/word/today now prefers gloss + usage, falling back to the raw dictionary
def/examples when polish is absent (so it's always safe).
- db._migrate adds gloss/usage to wotd_pool + daily_wotd (idempotent ALTER).
Frontend — /word redesigned to CD's "Editorial Asymmetric": faded oversized
initial bleeding off the right, vertical part-of-speech rail, big Newsreader
word, airy definition, left-ruled italic example sentences, outline Listen
button + date. (Uses our self-hosted Newsreader/Hanken stack rather than the
mockup's Google fonts; the made-up syllable respelling is omitted since we only
have real IPA.)
Tests: _polish parse/trim/cap, harvest stores gloss/usage, pick lazy-polishes
older words, admin gloss flows through to /api/word/today. 403 backend + 27 fe.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+14
-6
@@ -2340,12 +2340,14 @@ def create_app() -> FastAPI:
|
||||
response.headers["Cache-Control"] = _PRIVATE
|
||||
raise HTTPException(status_code=404, detail="No word yet.")
|
||||
response.headers["Cache-Control"] = _EDGE_FEED
|
||||
# Prefer the LLM-polished gloss + everyday sentences; fall back to the raw dictionary.
|
||||
raw_examples = w.get("usage") or w.get("examples")
|
||||
try:
|
||||
examples = json.loads(w["examples"]) if w["examples"] else []
|
||||
examples = json.loads(raw_examples) if raw_examples else []
|
||||
except (ValueError, TypeError):
|
||||
examples = []
|
||||
return {"date": w["feature_date"], "word": w["word"], "part_of_speech": w["part_of_speech"],
|
||||
"phonetic": w["phonetic"], "definition": w["definition"], "examples": examples,
|
||||
"phonetic": w["phonetic"], "definition": w.get("gloss") or w["definition"], "examples": examples,
|
||||
"audio_url": f"/api/word/audio/{w['word']}" if w["audio_file"] else None}
|
||||
|
||||
@app.api_route("/api/word/audio/{word}", methods=["GET", "HEAD"])
|
||||
@@ -2423,10 +2425,13 @@ def create_app() -> FastAPI:
|
||||
if not info:
|
||||
raise HTTPException(status_code=400, detail="Word not found in dictionary.")
|
||||
audio_file = wotd._cache_audio(info["audio_url"], info["word"])
|
||||
conn.execute("INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples) "
|
||||
"VALUES ('admin',?,?,?,?,?,?,?)",
|
||||
polished = wotd._polish(LocalModelClient.from_env(), info["word"], info["part_of_speech"], info["definition"])
|
||||
gloss = polished["gloss"] if polished else None
|
||||
usage = json.dumps(polished["examples"]) if polished else None
|
||||
conn.execute("INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples, gloss, usage) "
|
||||
"VALUES ('admin',?,?,?,?,?,?,?,?,?)",
|
||||
(info["word"], info["part_of_speech"], info["phonetic"], audio_file, info["audio_url"],
|
||||
info["definition"], json.dumps(info["examples"])))
|
||||
info["definition"], json.dumps(info["examples"]), gloss, usage))
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail="Unknown joy.")
|
||||
conn.commit()
|
||||
@@ -2443,7 +2448,10 @@ def create_app() -> FastAPI:
|
||||
f"SELECT pool_id FROM {_JOY_DAILY[kind]} WHERE feature_date=?", (local_today(),)
|
||||
).fetchone()
|
||||
avoid = cur["pool_id"] if cur else None # force a DIFFERENT item, not the same one
|
||||
picked = mod.pick_daily(conn, force=True, avoid=avoid)
|
||||
kwargs = {"force": True, "avoid": avoid}
|
||||
if kind in ("quote", "word"): # these polish lazily (gloss / meaning)
|
||||
kwargs["client"] = LocalModelClient.from_env()
|
||||
picked = mod.pick_daily(conn, **kwargs)
|
||||
return {"ok": True, "picked": bool(picked)}
|
||||
|
||||
@app.get("/api/replacement", response_model=Article | None)
|
||||
|
||||
+13
-3
@@ -338,8 +338,10 @@ CREATE TABLE IF NOT EXISTS wotd_pool (
|
||||
phonetic TEXT, -- IPA
|
||||
audio_file TEXT, -- our cached pronunciation clip (or null → browser TTS)
|
||||
audio_url TEXT, -- source clip URL
|
||||
definition TEXT NOT NULL,
|
||||
examples TEXT, -- JSON array of example sentences
|
||||
definition TEXT NOT NULL, -- raw dictionary gloss (anchor / ground truth)
|
||||
examples TEXT, -- JSON array of raw dictionary example sentences (anchor)
|
||||
gloss TEXT, -- LLM plain-language rewrite of the definition (for display)
|
||||
usage TEXT, -- JSON array of LLM everyday example sentences (for display)
|
||||
shown_at TEXT,
|
||||
blocked INTEGER NOT NULL DEFAULT 0,
|
||||
featured INTEGER NOT NULL DEFAULT 0,
|
||||
@@ -349,7 +351,7 @@ CREATE TABLE IF NOT EXISTS daily_wotd (
|
||||
feature_date TEXT PRIMARY KEY,
|
||||
pool_id INTEGER NOT NULL,
|
||||
word TEXT, part_of_speech TEXT, phonetic TEXT, audio_file TEXT,
|
||||
definition TEXT, examples TEXT,
|
||||
definition TEXT, examples TEXT, gloss TEXT, usage TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
@@ -648,3 +650,11 @@ def _migrate(conn: sqlite3.Connection) -> None:
|
||||
for column in ("what_happened", "why_matters", "why_belongs"):
|
||||
if sum_cols and column not in sum_cols:
|
||||
conn.execute(f"ALTER TABLE article_summaries ADD COLUMN {column} TEXT")
|
||||
|
||||
# WOTD display polish: LLM plain-language gloss + everyday example sentences, kept
|
||||
# alongside the raw dictionary def/examples (which stay the anchor / ground truth).
|
||||
for tbl in ("wotd_pool", "daily_wotd"):
|
||||
cols = {row["name"] for row in conn.execute(f"PRAGMA table_info({tbl})")}
|
||||
for column in ("gloss", "usage"):
|
||||
if cols and column not in cols:
|
||||
conn.execute(f"ALTER TABLE {tbl} ADD COLUMN {column} TEXT")
|
||||
|
||||
+50
-9
@@ -68,6 +68,38 @@ def _propose_words(client, n: int) -> list[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def _polish(client, word: str, part_of_speech: str | None, definition: str) -> dict | None:
|
||||
"""LLM polish for display: rewrite the real dictionary gloss as ONE warm plain sentence,
|
||||
and write two clear everyday example sentences. Grounded in the real definition (the
|
||||
dictionary stays the anchor); returns None on any trouble so callers fall back to raw."""
|
||||
pos = f" ({part_of_speech})" if part_of_speech else ""
|
||||
user = (
|
||||
f'The word is "{word}"{pos}. Its dictionary definition is: "{definition}".\n'
|
||||
"1) Rewrite that definition as ONE warm, plain-language sentence that a general reader "
|
||||
"instantly understands. Stay faithful to the meaning; do not invent extra facts.\n"
|
||||
"2) Write TWO short, natural example sentences that clearly show the word used in "
|
||||
"everyday life — concrete and easy to picture, not abstract, archaic, or a proper-noun "
|
||||
f'title. Each must actually use the word "{word}".\n'
|
||||
'Reply with JSON only: {"gloss": "...", "examples": ["...", "..."]}'
|
||||
)
|
||||
try:
|
||||
txt = client.chat_text([{"role": "user", "content": user}])
|
||||
except Exception: # noqa: BLE001 — polish is best-effort; raw dictionary data stands
|
||||
return None
|
||||
m = re.search(r"\{.*\}", txt, re.S)
|
||||
if not m:
|
||||
return None
|
||||
try:
|
||||
data = json.loads(m.group(0))
|
||||
except ValueError:
|
||||
return None
|
||||
gloss = " ".join(str(data.get("gloss") or "").split()).strip()
|
||||
examples = [" ".join(str(e).split()).strip() for e in (data.get("examples") or []) if str(e).strip()]
|
||||
if not gloss:
|
||||
return None
|
||||
return {"gloss": gloss, "examples": examples[:2]}
|
||||
|
||||
|
||||
def _lookup(word: str, prefer_pos: str | None = None) -> dict | None:
|
||||
"""Validate + enrich a word via the dictionary. Returns None if it's not a real word.
|
||||
When prefer_pos is given, picks the meaning of that part of speech (the sense the LLM meant)."""
|
||||
@@ -167,12 +199,15 @@ def harvest(conn: sqlite3.Connection, client, count: int = _HARVEST_BATCH) -> di
|
||||
if not info:
|
||||
continue
|
||||
audio_file = _cache_audio(info["audio_url"], info["word"])
|
||||
polished = _polish(client, info["word"], info["part_of_speech"], info["definition"])
|
||||
gloss = polished["gloss"] if polished else None
|
||||
usage = json.dumps(polished["examples"]) if polished else None
|
||||
rows.append((info["word"], info["part_of_speech"], info["phonetic"], audio_file,
|
||||
info["audio_url"], info["definition"], json.dumps(info["examples"])))
|
||||
info["audio_url"], info["definition"], json.dumps(info["examples"]), gloss, usage))
|
||||
before = _pool_count(conn)
|
||||
conn.executemany(
|
||||
"INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples) "
|
||||
"VALUES ('llm', ?, ?, ?, ?, ?, ?, ?)", rows,
|
||||
"INSERT OR IGNORE INTO wotd_pool (source, word, part_of_speech, phonetic, audio_file, audio_url, definition, examples, gloss, usage) "
|
||||
"VALUES ('llm', ?, ?, ?, ?, ?, ?, ?, ?, ?)", rows,
|
||||
)
|
||||
conn.commit()
|
||||
after = _pool_count(conn)
|
||||
@@ -195,7 +230,7 @@ def _candidates(conn: sqlite3.Connection, avoid: int | None = None) -> list[int]
|
||||
|
||||
|
||||
def pick_daily(conn: sqlite3.Connection, feature_date: str | None = None, force: bool = False,
|
||||
avoid: int | None = None) -> dict | None:
|
||||
avoid: int | None = None, client=None) -> dict | None:
|
||||
feature_date = feature_date or local_today()
|
||||
existing = conn.execute("SELECT * FROM daily_wotd WHERE feature_date=?", (feature_date,)).fetchone()
|
||||
if existing and not force:
|
||||
@@ -205,14 +240,20 @@ def pick_daily(conn: sqlite3.Connection, feature_date: str | None = None, force:
|
||||
return None
|
||||
pick_id = daily.seeded_order(ids, feature_date)[0]
|
||||
row = conn.execute("SELECT * FROM wotd_pool WHERE id=?", (pick_id,)).fetchone()
|
||||
gloss, usage = row["gloss"], row["usage"]
|
||||
if not gloss and client: # lazy polish for older pool words; cached back
|
||||
polished = _polish(client, row["word"], row["part_of_speech"], row["definition"])
|
||||
if polished:
|
||||
gloss, usage = polished["gloss"], json.dumps(polished["examples"])
|
||||
conn.execute("UPDATE wotd_pool SET gloss=?, usage=? WHERE id=?", (gloss, usage, pick_id))
|
||||
conn.execute(
|
||||
"INSERT INTO daily_wotd (feature_date, pool_id, word, part_of_speech, phonetic, audio_file, definition, examples) "
|
||||
"VALUES (?,?,?,?,?,?,?,?) "
|
||||
"INSERT INTO daily_wotd (feature_date, pool_id, word, part_of_speech, phonetic, audio_file, definition, examples, gloss, usage) "
|
||||
"VALUES (?,?,?,?,?,?,?,?,?,?) "
|
||||
"ON CONFLICT(feature_date) DO UPDATE SET pool_id=excluded.pool_id, word=excluded.word, "
|
||||
"part_of_speech=excluded.part_of_speech, phonetic=excluded.phonetic, audio_file=excluded.audio_file, "
|
||||
"definition=excluded.definition, examples=excluded.examples",
|
||||
"definition=excluded.definition, examples=excluded.examples, gloss=excluded.gloss, usage=excluded.usage",
|
||||
(feature_date, row["id"], row["word"], row["part_of_speech"], row["phonetic"],
|
||||
row["audio_file"], row["definition"], row["examples"]),
|
||||
row["audio_file"], row["definition"], row["examples"], gloss, usage),
|
||||
)
|
||||
conn.execute("UPDATE wotd_pool SET shown_at=? WHERE id=?", (feature_date, pick_id))
|
||||
conn.commit()
|
||||
@@ -233,5 +274,5 @@ def run_daily(conn: sqlite3.Connection, client=None) -> dict:
|
||||
harvested = None
|
||||
if client and _pool_count(conn) < _TARGET_POOL:
|
||||
harvested = harvest(conn, client)
|
||||
picked = pick_daily(conn)
|
||||
picked = pick_daily(conn, client=client)
|
||||
return {"pool": _pool_count(conn), "harvested": harvested, "picked": (picked or {}).get("word")}
|
||||
|
||||
Reference in New Issue
Block a user