Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker

The deploy pipeline runs from the working tree, so a wave of shipped features had never been committed. This snapshots git to what's actually running. SEO impression recovery (live + verified): - Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404 (a hard 404 silently dropped already-indexed URLs and tanked impressions). - Dedup representative selection reworked: accepted/serveable -> established rep (URL stability) -> quality score, so an accepted page never retires to a rejected rep and an indexed canonical doesn't churn when a newer twin arrives. - HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of falling through to the static mount and 404ing. - `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the policy to the existing corpus (shared cycle_lock context manager). - CLI honors GOODNEWS_DB for its default --db (was silently ignored). Publishing Desk (admin tool to post highlights to X via Web Intents): - publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji picker (bundled data, no CDN) for the blurb editor. Play games + site: - Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated). - English-only language gate; source prospecting; paywall + dedup hardening. Tests: full suite green (349). Ignores tightened (node_modules, data/*.db). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 11:32:27 -04:00
parent 2dbe73430c
commit 89c0fbe1f6
66 changed files with 6138 additions and 109 deletions
@@ -18,10 +18,13 @@ import hashlib
 import hmac
 import io
 import json
+import logging
 import os
 import re
 import secrets
 import sqlite3
+import threading
+import time
 from collections import Counter
 from contextlib import contextmanager
 from datetime import datetime, timezone
@@ -33,7 +36,7 @@ from fastapi.responses import HTMLResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel

-from . import auth, email_send, feeds, games, oauth_google, queries, share, sources, summarize
+from . import auth, bloom, email_send, feeds, games, oauth_google, publishing, queries, share, sources, summarize
 from .localtime import local_today
 from .markup import reply_html_to_text, sanitize_reply_html
 from .db import connect
@@ -55,6 +58,8 @@ _EDGE_DERIVED = "public, max-age=0, s-maxage=120, stale-while-revalidate=120"
 _EDGE_FEED = "public, max-age=0, s-maxage=45, stale-while-revalidate=30"        # global feed (URL-keyed, shareable only)
 _PRIVATE = "private, no-store"                                                  # never share across users

+log = logging.getLogger("goodnews.api")
+
 ROOT = Path(__file__).resolve().parents[1]
 DEFAULT_DB = ROOT / "data" / "goodnews.sqlite3"
 # Prefer the built SvelteKit site; fall back to the legacy single-page harness.
@@ -147,6 +152,32 @@ def _user_out(user: sqlite3.Row) -> dict:
 # scrapers don't each kick off a duplicate LLM call.
 _summarizing: set[int] = set()

+# In-process cache of fully-rendered /a/{id} share pages. We're direct-origin (no
+# CDN), so Cache-Control alone can't shield the box from crawler bursts hitting the
+# sitemap's article URLs while the LAN LLM / cycle is loading it. Only COMPLETE
+# pages (summary + explanation present) are cached, so a "still generating" page is
+# never pinned; a short TTL still picks up edits. Per-process (fine across workers).
+# INVARIANT: the share page is PUBLIC/anonymous — the cache key is article_id alone.
+# If /a/{id} ever personalizes (per-viewer content), key by viewer or drop the cache,
+# or one visitor's variant would be served to another.
+_SHARE_CACHE: dict[int, tuple[float, str]] = {}
+_SHARE_TTL = 900.0       # 15 min
+_SHARE_CACHE_MAX = 512
+
+
+def _share_cache_get(aid: int) -> str | None:
+    hit = _SHARE_CACHE.get(aid)
+    if hit and (time.monotonic() - hit[0]) < _SHARE_TTL:
+        return hit[1]
+    return None
+
+
+def _share_cache_put(aid: int, html: str) -> None:
+    if len(_SHARE_CACHE) >= _SHARE_CACHE_MAX:
+        oldest = min(_SHARE_CACHE, key=lambda k: _SHARE_CACHE[k][0])
+        _SHARE_CACHE.pop(oldest, None)
+    _SHARE_CACHE[aid] = (time.monotonic(), html)
+

 def _run_summary(article_id: int) -> None:
    try:
@@ -158,6 +189,29 @@ def _run_summary(article_id: int) -> None:
        _summarizing.discard(article_id)


+# Publishing Desk: the "Build queue" job runs in the background (one bounded
+# comparative LLM call can be slow); the admin polls the queue endpoint. Mirrors the
+# summary-kick pattern — never holds an HTTP request open on the model. The lock makes
+# the check-and-set atomic so two rapid clicks can't launch two expensive jobs.
+_publish_build: dict = {"building": False, "result": None, "error": None}
+_publish_build_lock = threading.Lock()
+
+
+def _run_publish_build() -> None:
+    try:
+        try:
+            client = LocalModelClient.from_env()
+        except Exception:  # noqa: BLE001 — model down → deterministic fallback inside build_queue
+            client = None
+        with get_conn() as conn:
+            res = publishing.build_queue(conn, PUBLIC_BASE_URL, client=client)
+        _publish_build.update(result=res, error=None)
+    except Exception as exc:  # noqa: BLE001 — surface, don't crash the worker
+        _publish_build.update(error=str(exc)[:300])
+    finally:
+        _publish_build["building"] = False
+
+
 def _kick_summary(article_id: int, background_tasks: BackgroundTasks) -> None:
    if article_id in _summarizing:
        return
@@ -332,7 +386,7 @@ class SourcePreview(BaseModel):
    sampled: int
    classified: bool
    accepted: int
-    acceptance_rate: float
+    acceptance_rate: float | None   # None when there are no English items to judge (all held)
    avg_cortisol: float
    avg_ragebait: float
    avg_pr_risk: float
@@ -357,6 +411,54 @@ class GameStateBody(BaseModel):
    state: dict = {}


+class PublishStatusBody(BaseModel):
+    status: str
+    draft_text: str | None = None
+    final_text: str | None = None
+    post_url: str | None = None
+    snooze_until: str | None = None
+
+
+class PublishDraftBody(BaseModel):
+    draft_text: str = ""
+
+
+class EntityHandleBody(BaseModel):
+    entity_name: str
+    handle: str
+    profile_url: str | None = None
+
+
+class GameStateItem(BaseModel):
+    game: str
+    variant: str
+    state: dict = {}
+
+
+class GameStateBatchBody(BaseModel):
+    date: str
+    items: list[GameStateItem] = []
+
+
+class BloomReportBody(BaseModel):
+    word: str = ""
+    date: str | None = None
+    mode: str | None = None
+    format: str | None = None
+    letters: str | None = None
+    reason: str | None = None
+
+
+class BloomOverrideBody(BaseModel):
+    word: str = ""
+    action: str = "allow"        # 'allow' | 'block'
+    reason: str | None = None
+
+
+class BloomReportActionBody(BaseModel):
+    action: str = ""             # 'approve' | 'block' | 'dismiss'
+
+
 class WordPoolBody(BaseModel):
    word: str

@@ -495,6 +597,13 @@ _EVENT_KINDS = {
 }


+def _fts_query(q: str) -> str:
+    """Raw search box → safe FTS5 query: alnum terms only (no operator/quote
+    injection), each prefix-matched and AND'd together. '' when nothing usable."""
+    terms = re.findall(r"[A-Za-z0-9]+", q or "")[:8]
+    return " ".join(f"{t}*" for t in terms)
+
+
 def _visitor_hash(token: str | None) -> str:
    token = (token or "").strip()[:200]
    if not token:
@@ -660,22 +769,38 @@ def create_app() -> FastAPI:
        state: str | None = None,
        error: str | None = None,
    ) -> RedirectResponse:
-        fail = RedirectResponse(f"{PUBLIC_BASE_URL}/auth/verify?error=google", status_code=302)
-        if error or not code or not state:
-            return fail
+        # The user always sees the same generic error=google (no detail leaked),
+        # but we log WHY internally so device/host-specific failures (e.g. a www
+        # vs apex cookie loss, a state mismatch, a token-exchange error) are
+        # diagnosable instead of all looking identical.
+        def fail(reason: str, exc: Exception | None = None) -> RedirectResponse:
+            host = request.headers.get("Host", "?")
+            if exc is not None:
+                log.warning("google callback failed: %s (host=%s): %s", reason, host, exc)
+            else:
+                log.warning("google callback failed: %s (host=%s)", reason, host)
+            return RedirectResponse(f"{PUBLIC_BASE_URL}/auth/verify?error=google", status_code=302)
+
+        if error:
+            return fail(f"provider_error:{error}")
+        if not code or not state:
+            return fail("missing_code_or_state")
        saved = _unsign(request.cookies.get(OAUTH_COOKIE))
        if not saved:
-            return fail
+            # Most likely the host-only ub_oauth cookie was set on a different
+            # host than this callback (www vs apex). Canonicalizing www→apex at
+            # the edge prevents this.
+            return fail("missing_oauth_cookie")
        saved_state, _, verifier = saved.partition(":")
        if not hmac.compare_digest(saved_state, state):
-            return fail
+            return fail("state_mismatch")
        try:
            tokens = oauth_google.exchange_code(code, _google_redirect_uri(), verifier)
            info = oauth_google.verify_id_token(tokens["id_token"])
            if not info.get("picture") and tokens.get("access_token"):
                info["picture"] = oauth_google.fetch_userinfo(tokens["access_token"]).get("picture")
-        except Exception:
-            return fail
+        except Exception as exc:  # noqa: BLE001 — log reason, show generic error
+            return fail("token_exchange_or_verify", exc)
        with get_conn() as conn:
            user_id = auth.find_or_create_user(
                conn, info["email"], "google", info["sub"],
@@ -925,13 +1050,19 @@ def create_app() -> FastAPI:

    # --- Public share/landing page for an article -------------------------

-    @app.get("/a/{article_id}", response_class=HTMLResponse)
+    # GET + HEAD: FastAPI's @app.get registers GET only (no auto-HEAD), so a HEAD would
+    # fall through to the catch-all StaticFiles mount at "/" and 404. Register both so
+    # HEAD returns the same status (200/301/404) as GET, sans body.
+    @app.api_route("/a/{article_id}", methods=["GET", "HEAD"], response_class=HTMLResponse)
    def share_page(article_id: str, background_tasks: BackgroundTasks) -> HTMLResponse:
        not_found = HTMLResponse(share.render_not_found(PUBLIC_BASE_URL), status_code=404)
        try:
            aid = int(article_id)
        except (TypeError, ValueError):
            return not_found  # malformed id → calm 404, no stack trace
+        cached = _share_cache_get(aid)
+        if cached is not None:  # serve a rendered page without touching SQLite/render
+            return HTMLResponse(cached, headers={"Cache-Control": "public, max-age=300"})
        with get_conn() as conn:
            row = conn.execute(
                "SELECT a.id, a.title, a.description, a.image_url, a.canonical_url, "
@@ -941,16 +1072,45 @@ def create_app() -> FastAPI:
                "LEFT JOIN article_scores s ON s.article_id = a.id WHERE a.id = ?",
                (aid,),
            ).fetchone()
-            # Only render real, accepted, non-duplicate stories.
-            if not row or row["duplicate_of"] is not None or not row["accepted"]:
+            if not row:
+                return not_found
+            # A duplicate's URL may already be indexed by Google. A hard 404 silently
+            # drops it (and any newer twin that arrives later retires the OLDER, already
+            # indexed URL) — that's what tanked impressions. So 301 to the canonical twin
+            # instead: Google consolidates the page onto the survivor. dedup stores a star
+            # (dup -> rep, rep.duplicate_of IS NULL); we still follow a short chain with a
+            # cycle guard as cheap insurance.
+            if row["duplicate_of"] is not None:
+                seen, cur, target = {aid}, row["duplicate_of"], None
+                for _ in range(8):
+                    if cur in seen:
+                        break
+                    seen.add(cur)
+                    r2 = conn.execute(
+                        "SELECT a.id, a.duplicate_of, s.accepted FROM articles a "
+                        "LEFT JOIN article_scores s ON s.article_id = a.id WHERE a.id = ?",
+                        (cur,),
+                    ).fetchone()
+                    if not r2:
+                        break
+                    if r2["duplicate_of"] is None:
+                        target = r2 if r2["accepted"] else None
+                        break
+                    cur = r2["duplicate_of"]
+                if target is not None:
+                    return RedirectResponse(f"/a/{target['id']}", status_code=301)
+                return not_found     # canonical itself is gone/rejected → genuinely 404
+            if not row["accepted"]:
                return not_found
            summary = summarize.get_summary(conn, aid)
            explanation = summarize.get_explanation(conn, aid)
-        if not summary or not explanation:
+        complete = bool(summary and explanation)
+        if not complete:
            _kick_summary(aid, background_tasks)  # generate/top-up for next time; page polls
-        return HTMLResponse(
-            share.render_share_page(dict(row), PUBLIC_BASE_URL, summary=summary, explanation=explanation)
-        )
+        html = share.render_share_page(dict(row), PUBLIC_BASE_URL, summary=summary, explanation=explanation)
+        if complete:
+            _share_cache_put(aid, html)   # cache only the finished page (never the "generating" state)
+        return HTMLResponse(html, headers={"Cache-Control": "public, max-age=300" if complete else "no-cache"})

    # --- Privacy-respecting first-party analytics -------------------------

@@ -1305,6 +1465,76 @@ def create_app() -> FastAPI:
            cand = conn.execute("SELECT * FROM source_candidates WHERE id = ?", (cid,)).fetchone()
        return _candidate_dict(cand)

+    @app.post("/api/admin/candidates/{cid}/restore")
+    def admin_candidate_restore(cid: int, request: Request) -> dict:
+        # Send a rejected candidate back to staging for another look.
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            if not sources.restore_candidate(conn, cid):
+                raise HTTPException(status_code=404, detail="no rejected candidate with that id")
+            cand = conn.execute("SELECT * FROM source_candidates WHERE id = ?", (cid,)).fetchone()
+        return _candidate_dict(cand)
+
+    # --- Publishing Desk (admin): outbound-share queue for X (platform-neutral) ---
+    @app.post("/api/admin/publishing/build")
+    def admin_publishing_build(request: Request, background_tasks: BackgroundTasks) -> dict:
+        # Kick the queue build in the background (the comparative LLM call can be slow);
+        # the client polls /queue. No-op if a build is already running.
+        with get_conn() as conn:
+            _require_admin(conn, request)
+        with _publish_build_lock:   # atomic check-and-set: one job at a time
+            if not _publish_build["building"]:
+                _publish_build.update(building=True, result=None, error=None)
+                background_tasks.add_task(_run_publish_build)
+        return {"building": True}
+
+    @app.get("/api/admin/publishing/queue")
+    def admin_publishing_queue(request: Request, archived: bool = False) -> dict:
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            items = publishing.list_queue(conn, include_archived=archived)
+        return {"building": _publish_build["building"], "last": _publish_build.get("result"),
+                "error": _publish_build.get("error"), "items": items}
+
+    @app.post("/api/admin/publishing/{sid}/status")
+    def admin_publishing_status(sid: int, body: PublishStatusBody, request: Request) -> dict:
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            ok = publishing.set_status(conn, sid, body.status, draft_text=body.draft_text,
+                                       final_text=body.final_text, post_url=body.post_url,
+                                       snooze_until=body.snooze_until)
+        if not ok:
+            raise HTTPException(status_code=400, detail="bad status or id")
+        return {"ok": True}
+
+    @app.post("/api/admin/publishing/{sid}/draft")
+    def admin_publishing_draft(sid: int, body: PublishDraftBody, request: Request) -> dict:
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            ok = publishing.save_draft(conn, sid, body.draft_text)
+        if not ok:
+            raise HTTPException(status_code=404, detail="no such share")
+        return {"ok": True}
+
+    @app.post("/api/admin/publishing/{sid}/restore")
+    def admin_publishing_restore(sid: int, request: Request) -> dict:
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            ok = publishing.restore(conn, sid)
+        if not ok:
+            raise HTTPException(status_code=400, detail="not a restorable (skipped/snoozed) share")
+        return {"ok": True}
+
+    @app.post("/api/admin/publishing/handles")
+    def admin_publishing_add_handle(body: EntityHandleBody, request: Request) -> dict:
+        # Save a verified handle (e.g. after confirming one via 'Find on X').
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            ok = publishing.add_entity_handle(conn, body.entity_name, body.handle, body.profile_url)
+        if not ok:
+            raise HTTPException(status_code=400, detail="bad entity or handle")
+        return {"ok": True}
+
    # --- CSV exports (admin-gated, for inspection / archiving) ---------------

    def _csv_cell(v):
@@ -1593,6 +1823,32 @@ def create_app() -> FastAPI:
            items=[Article.from_row(r) for r in rows],
        )

+    @app.get("/api/search", response_model=FeedResponse)
+    def search(response: Response, q: str = Query("", max_length=120),
+               prefs: str | None = Query(None), limit: int = Query(30, ge=1, le=60),
+               offset: int = Query(0, ge=0)) -> FeedResponse:
+        # Public article search across the visitor-facing corpus. Mirrors the feed's
+        # boundaries (accepted/visible/non-duplicate + the reader's Calm Filters /
+        # avoid-terms) but NOT a lane scope — you searched on purpose. Ranked by
+        # relevance (bm25), recency as a tie-break. Per-reader → never edge-cached.
+        response.headers["Cache-Control"] = _PRIVATE
+        fts = _fts_query(q)
+        if not fts:
+            return FeedResponse(topic=None, flavor=None, count=0, items=[])
+        fp = prefs_from_json(prefs)
+        now = datetime.now(timezone.utc)
+        kw = _prefs_sql_kw(fp, now)
+        with get_conn() as conn:
+            if not conn.execute("SELECT 1 FROM article_search LIMIT 1").fetchone():
+                queries.reindex_search(conn)  # lazy build (fresh deploy / before first cycle)
+            fetch_n = min(2000, (offset + limit) * 4 + 40) if fp.avoid_terms else (offset + limit)
+            raw = queries.feed(conn, accepted_only=True, limit=fetch_n, offset=0, match=fts, **kw)
+        kept = filter_articles(raw, fp, now) if fp.avoid_terms else raw  # word-boundary avoid-terms
+        items = kept[offset:offset + limit]
+        # Keep relevance order (don't paywall-reorder); the badge still shows true status.
+        return FeedResponse(topic=None, flavor=None, count=len(items),
+                            items=[Article.from_row(r) for r in items])
+
    @app.get("/api/puzzle/{game}")
    def daily_puzzle(game: str, variant: str = Query("5")) -> dict:
        with get_conn() as conn:
@@ -1600,8 +1856,29 @@ def create_app() -> FastAPI:
                return games.word_puzzle_response(conn, local_today(), variant)
            if game == "wordsearch":
                return games.wordsearch_response(conn, local_today(), variant)
+            if game == "bloom":
+                return bloom.bloom_response(conn, local_today())
        raise HTTPException(status_code=404, detail="no such puzzle")

+    @app.get("/api/puzzle/bloom/free")
+    def bloom_free(response: Response, format: str = "center", seed: str | None = None) -> dict:
+        # A free-play wheel: deterministic by `seed` (client stores it to resume),
+        # random when none is given. Center Circle or Wild Bloom. No DB, no sync.
+        fmt = "wild" if format == "wild" else "center"
+        s = seed if (seed and re.fullmatch(r"[A-Za-z0-9_-]{1,32}", seed)) else secrets.token_urlsafe(6)
+        response.headers["Cache-Control"] = "no-store"
+        with get_conn() as conn:
+            return bloom.bloom_free_response(conn, s, fmt)
+
+    @app.post("/api/bloom/report")
+    def bloom_report(body: BloomReportBody) -> dict:
+        # A player flagging a rejected word as "should count". Public + deduped;
+        # lands in the admin queue (approve→allow / block / dismiss).
+        with get_conn() as conn:
+            ok = bloom.add_report(conn, body.word, body.date, body.mode, body.format,
+                                  body.letters, body.reason)
+        return {"ok": bool(ok)}
+
    @app.post("/api/puzzle/word/guess")
    def word_guess(body: WordGuessRequest) -> dict:
        if body.variant not in games.WORD_VARIANTS:
@@ -1615,7 +1892,9 @@ def create_app() -> FastAPI:
    # --- Cross-device game state sync (signed-in only; merged server-side) ---
    def _game_ok(game: str, variant: str) -> bool:
        return (game == "word" and variant in games.WORD_VARIANTS) or \
-               (game == "wordsearch" and variant in games.WS_TIERS)
+               (game == "wordsearch" and variant in games.WS_TIERS) or \
+               (game == "bloom" and variant == "") or \
+               (game == "match" and variant in games.MATCH_VARIANTS)  # "<tier>-<format>"

    def _valid_pdate(d: str) -> bool:
        return bool(re.match(r"^\d{4}-\d{2}-\d{2}$", d or ""))  # plain YYYY-MM-DD, no junk rows
@@ -1647,6 +1926,27 @@ def create_app() -> FastAPI:
            merged = games.save_game_state(conn, user["id"], body.game, body.variant, body.date, body.state or {})
            return {"state": merged}

+    @app.put("/api/games/state/batch")
+    def game_state_put_batch(body: GameStateBatchBody, request: Request) -> dict:
+        """Reconcile many (game, variant) states for one date in a SINGLE request, so
+        the hub doesn't fan out a dozen calls on every /play load. Each item is
+        validated/sanitized/merged exactly like the single PUT; unknown or oversized
+        items are dropped (not fatal). Signed-out → echo (no sync), same as the single
+        endpoint, so cross-device pull is preserved for signed-in users."""
+        if not _valid_pdate(body.date):
+            raise HTTPException(status_code=400, detail="bad date")
+        items = [it for it in body.items[:32]
+                 if _game_ok(it.game, it.variant) and len(json.dumps(it.state)) <= 20000]
+        with get_conn() as conn:
+            user = _current_user(conn, request)
+            if not user:
+                return {"states": [{"game": it.game, "variant": it.variant, "state": it.state} for it in items]}
+            out = []
+            for it in items:
+                merged = games.save_game_state(conn, user["id"], it.game, it.variant, body.date, it.state or {})
+                out.append({"game": it.game, "variant": it.variant, "state": merged})
+            return {"states": out}
+
    @app.get("/api/games/stats")
    def game_stats_get(game: str, variant: str, request: Request) -> dict:
        if not _game_ok(game, variant):
@@ -1656,6 +1956,41 @@ def create_app() -> FastAPI:
            return {"stats": games.game_stats(conn, user["id"], game, variant) if user else None}

    # --- Admin: Daily Word pool curation ---
+    # --- Admin: Bloom word curation (runtime, no deploy) ---
+    @app.get("/api/admin/bloom/reports")
+    def admin_bloom_reports(request: Request, status: str = "pending") -> dict:
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            st = status if status in ("pending", "approved", "blocked", "dismissed") else "pending"
+            return {"status": st, "reports": bloom.list_reports(conn, st),
+                    "overrides": bloom.list_overrides(conn)}
+
+    @app.post("/api/admin/bloom/reports/{report_id}")
+    def admin_bloom_resolve(report_id: int, body: BloomReportActionBody, request: Request) -> dict:
+        with get_conn() as conn:
+            admin = _require_admin(conn, request)
+            ok = bloom.resolve_report(conn, report_id, body.action, by=admin["email"])
+        if not ok:
+            raise HTTPException(status_code=400, detail="bad report or action")
+        return {"ok": True}
+
+    @app.post("/api/admin/bloom/overrides")
+    def admin_bloom_override(body: BloomOverrideBody, request: Request) -> dict:
+        with get_conn() as conn:
+            admin = _require_admin(conn, request)
+            ok = bloom.set_override(conn, body.word, body.action, reason=body.reason, by=admin["email"])
+        if not ok:
+            raise HTTPException(status_code=422,
+                                detail="allow needs a real ≥4-letter word with no 'S'; block accepts any word")
+        return {"ok": True}
+
+    @app.delete("/api/admin/bloom/overrides/{word}")
+    def admin_bloom_override_clear(word: str, request: Request) -> dict:
+        with get_conn() as conn:
+            _require_admin(conn, request)
+            bloom.clear_override(conn, word)
+        return {"ok": True}
+
    @app.get("/api/admin/word/lookup")
    def admin_word_lookup(word: str, request: Request) -> dict:
        with get_conn() as conn:
@@ -0,0 +1,317 @@
+"""Bloom — the daily word wheel (Center Circle / Wild Bloom).
+
+DESIGN and ACCEPTANCE are decoupled:
+
+  • DESIGN (wheel selection, tiers, pangram, the Full-Bloom target) uses the small
+    COMMON list only — deterministic, stored in daily_puzzles, and unaffected by
+    curation. Tiers are scored on COMMON so "Flourishing" is always reachable with
+    everyday vocabulary, and "Full Bloom" = finding the whole *designed* puzzle
+    (the broad bonus words are extra credit beyond it, never required).
+
+  • ACCEPTANCE is BROAD and DYNAMIC — every valid dictionary word buildable from
+    the wheel, computed at RESPONSE TIME as: broad dict ∪ {allow} − {block}, where
+    allow/block are runtime admin overrides (bloom_word_overrides). So a missed
+    word can be allowed (or a junk word blocked) with NO deploy or regeneration.
+
+Accept words never sit in the network response: clients validate against salted
+hashes and compute their own score/tier/pangram from the 7 letters.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import random
+import sqlite3
+from itertools import combinations
+from pathlib import Path
+
+_DATA = Path(__file__).parent / "data"
+_W = json.loads((_DATA / "bloom_words.json").read_text())
+ACCEPT: list[str] = _W["accept"]                 # broad: all valid dictionary words
+_COMMON: set[str] = set(_W["common"])            # tight: design / tiers / pangrams only
+_COMMON_LS: list[tuple[str, frozenset]] = [(w, frozenset(w)) for w in _COMMON]
+_AVOID: set[str] = set(json.loads((_DATA / "bloom_avoid.json").read_text()))
+
+# Broad accept words bucketed by distinct-letter set, so the accepted set for a
+# 7-letter wheel is gathered by unioning its ≤127 letter-subsets (fast) — no scan
+# of the whole ~68k list per request.
+_BY_SET: dict[frozenset, list[str]] = {}
+for _w in ACCEPT:
+    _BY_SET.setdefault(frozenset(_w), []).append(_w)
+
+# Candidate wheels = letter-sets of 7-distinct-letter COMMON words (every wheel
+# has ≥1 recognizable pangram). Sorted for deterministic order.
+_PANGRAM_SETS: dict[frozenset, list[str]] = {}
+for _w in _COMMON:
+    _s = frozenset(_w)
+    if len(_s) == 7:
+        _PANGRAM_SETS.setdefault(_s, []).append(_w)
+_CANDIDATES: list[frozenset] = sorted(_PANGRAM_SETS, key=lambda s: "".join(sorted(s)))
+
+MIN_COMMON_WORDS, MAX_COMMON_WORDS = 14, 45
+PANGRAM_BONUS = 7
+# 8 / 30 / 70 — Flourishing at 70% keeps Bloom from becoming a completionist
+# grind. Do NOT raise Flourishing above 0.70 (Codex).
+TIER_PCTS: tuple[tuple[str, float], ...] = (
+    ("Sprouting", 0.0), ("Budding", 0.08), ("Blooming", 0.30), ("Flourishing", 0.70),
+)
+TOP_TIER_PCT = 0.70
+
+
+def score_word(word: str) -> int:
+    """4-letter word = 1 point; longer = its length. Pangram bonus added on top."""
+    return 1 if len(word) == 4 else len(word)
+
+
+def score_words(payload: dict, words) -> int:
+    """Score found words for a wheel (pangram = uses all 7 letters). Used for the
+    player's running score AND the Full-Bloom check (vs the design's max_score)."""
+    letters = frozenset(payload["center"]) | frozenset(payload["outer"])
+    total = 0
+    for w in words:
+        total += score_word(w)
+        if frozenset(w) == letters:
+            total += PANGRAM_BONUS
+    return total
+
+
+# --- DESIGN: common-only, deterministic, stored --------------------------------
+
+def tiers_for(common_max: int) -> list[dict]:
+    return [{"name": n, "score": int(p * common_max)} for n, p in TIER_PCTS]
+
+
+def _design(letters: frozenset, center: str):
+    """Center-mode design from the COMMON list only."""
+    commons = [w for (w, s) in _COMMON_LS if center in w and s <= letters]
+    pangrams = [w for w in commons if frozenset(w) == letters]
+    common_max = sum(score_word(w) for w in commons) + PANGRAM_BONUS * len(pangrams)
+    display = sorted((p for p in pangrams if p not in _AVOID), key=lambda p: (len(p), p))
+    return commons, display, common_max
+
+
+def _design_wild(letters: frozenset):
+    """Wild design (no required center) from the COMMON list only."""
+    commons = [w for (w, s) in _COMMON_LS if s <= letters]
+    pangrams = [w for w in commons if frozenset(w) == letters]
+    common_max = sum(score_word(w) for w in commons) + PANGRAM_BONUS * len(pangrams)
+    display = sorted((p for p in pangrams if p not in _AVOID), key=lambda p: (len(p), p))
+    vowels = [c for c in sorted(letters) if c in "aeiou"]
+    return commons, display, common_max, (vowels[0] if vowels else sorted(letters)[0])
+
+
+def _payload(letters: frozenset, center: str, display, common_max: int) -> dict:
+    return {
+        "center": center,
+        "outer": sorted(letters - {center}),
+        "pangram": display[0],
+        "tiers": tiers_for(common_max),
+        # Full Bloom = finding the whole designed (common) puzzle; broad bonus
+        # words push score past this but are never required.
+        "max_score": common_max,
+    }
+
+
+def _generate(seed_str: str, fmt: str) -> dict:
+    """Deterministically pick a wheel design for a seed + format."""
+    rng = random.Random(int(hashlib.sha256(seed_str.encode()).hexdigest(), 16))
+    order = _CANDIDATES[:]
+    rng.shuffle(order)
+    for letters in order:
+        if fmt == "wild":
+            commons, display, cmax, center = _design_wild(letters)
+            if len(commons) >= MIN_COMMON_WORDS and display:
+                return _payload(letters, center, display, cmax)
+        else:
+            centers = sorted(letters)
+            rng.shuffle(centers)
+            for center in centers:
+                commons, display, cmax = _design(letters, center)
+                if MIN_COMMON_WORDS <= len(commons) <= MAX_COMMON_WORDS and display:
+                    return _payload(letters, center, display, cmax)
+    raise RuntimeError("bloom: no valid wheel found")  # impossible with the vendored dict
+
+
+def build_puzzle(date: str) -> dict:
+    """The day's shared Center Circle wheel design (deterministic by date)."""
+    return {"date": date, **_generate(f"bloom:{date}", "center")}
+
+
+def build_free(seed: str, fmt: str = "center") -> dict:
+    """A free-play wheel design (deterministic by seed) — Center Circle or Wild."""
+    fmt = "wild" if fmt == "wild" else "center"
+    return {"seed": seed, "format": fmt, **_generate(f"free:{fmt}:{seed}", fmt)}
+
+
+# --- ACCEPTANCE: broad + runtime overrides, computed at response time ----------
+
+def overrides(conn: sqlite3.Connection) -> tuple[set, set]:
+    allow, block = set(), set()
+    for r in conn.execute("SELECT word, action FROM bloom_word_overrides"):
+        (allow if r["action"] == "allow" else block).add(r["word"])
+    return allow, block
+
+
+def _broad_words_for(letters: frozenset) -> list[str]:
+    """Every broad-dictionary word buildable from `letters` (distinct-set ⊆ letters)."""
+    ls = sorted(letters)
+    out = []
+    for r in range(1, len(ls) + 1):
+        for combo in combinations(ls, r):
+            out.extend(_BY_SET.get(frozenset(combo), ()))
+    return out
+
+
+def accepted_words(conn: sqlite3.Connection, center: str, outer, require_center: bool) -> list[str]:
+    """The wheel's accepted set RIGHT NOW: broad words buildable from the letters
+    (optionally requiring the center), plus allow-overrides, minus block-overrides."""
+    letters = frozenset(outer) | {center}
+    allow, block = overrides(conn)
+    seen, out = set(), []
+    for w in _broad_words_for(letters):
+        if w in seen or w in block:
+            continue
+        if require_center and center not in w:
+            continue
+        seen.add(w)
+        out.append(w)
+    for w in allow:   # allow words that may not be in the broad dict
+        if w in seen or w in block or len(w) < 4 or "s" in w:
+            continue
+        if not (frozenset(w) <= letters) or (require_center and center not in w):
+            continue
+        seen.add(w)
+        out.append(w)
+    return sorted(out)
+
+
+# --- daily_puzzles storage -----------------------------------------------------
+
+def generate_bloom_puzzle(conn: sqlite3.Connection, date: str) -> dict:
+    """Ensure the day's Bloom DESIGN exists in daily_puzzles. Idempotent, pure code."""
+    existing = conn.execute(
+        "SELECT payload_json FROM daily_puzzles WHERE puzzle_date=? AND game='bloom' AND variant=''", (date,)
+    ).fetchone()
+    if existing:
+        return json.loads(existing["payload_json"])
+    payload = build_puzzle(date)
+    conn.execute(
+        "INSERT OR IGNORE INTO daily_puzzles (puzzle_date, game, variant, payload_json) VALUES (?, 'bloom', '', ?)",
+        (date, json.dumps(payload)),
+    )
+    conn.commit()
+    row = conn.execute(
+        "SELECT payload_json FROM daily_puzzles WHERE puzzle_date=? AND game='bloom' AND variant=''", (date,)
+    ).fetchone()
+    return json.loads(row["payload_json"])
+
+
+def stored_payload(conn: sqlite3.Connection, date: str) -> dict | None:
+    """The day's design IF it already exists — never generates (used by the state
+    sanitizer, which must not trigger generation)."""
+    row = conn.execute(
+        "SELECT payload_json FROM daily_puzzles WHERE puzzle_date=? AND game='bloom' AND variant=''", (date,)
+    ).fetchone()
+    return json.loads(row["payload_json"]) if row else None
+
+
+def word_hash(salt: str, word: str) -> str:
+    return hashlib.sha256(f"{salt}:{word}".encode()).hexdigest()
+
+
+def _response(salt: str, p: dict, words: list[str], extra: dict) -> dict:
+    return {
+        "game": "bloom",
+        "center": p["center"],
+        "outer": p["outer"],
+        "accepted": [word_hash(salt, w) for w in words],   # NO plaintext words leak
+        "max_score": p["max_score"],                       # Full Bloom = designed puzzle
+        "tiers": p["tiers"],
+        **extra,
+    }
+
+
+def bloom_response(conn: sqlite3.Connection, date: str) -> dict:
+    """Daily Center Circle — accepted set computed live (broad + overrides)."""
+    p = generate_bloom_puzzle(conn, date)
+    words = accepted_words(conn, p["center"], p["outer"], require_center=True)
+    return _response(date, p, words, {"date": date})
+
+
+def bloom_free_response(conn: sqlite3.Connection, seed: str, fmt: str) -> dict:
+    """Free-play wheel keyed by `seed` (resumable). Accepted set computed live."""
+    p = build_free(seed, fmt)
+    words = accepted_words(conn, p["center"], p["outer"], require_center=p["format"] != "wild")
+    return _response(seed, p, words, {"mode": "free", "format": p["format"], "seed": p["seed"]})
+
+
+# --- runtime curation: overrides + player reports ------------------------------
+
+def set_override(conn: sqlite3.Connection, word: str, action: str, reason: str | None = None,
+                 by: str | None = None) -> bool:
+    word = (word or "").strip().lower()
+    if not (word.isalpha() and action in ("allow", "block")):
+        return False
+    # An ALLOW that violates Bloom's hard rules (≥4 letters, no 'S') could never
+    # count — reject it rather than store an inert override. BLOCK stays permissive.
+    if action == "allow" and (len(word) < 4 or "s" in word):
+        return False
+    conn.execute(
+        "INSERT INTO bloom_word_overrides (word, action, reason, created_by) VALUES (?,?,?,?) "
+        "ON CONFLICT(word) DO UPDATE SET action=excluded.action, reason=excluded.reason, "
+        "created_by=excluded.created_by, created_at=CURRENT_TIMESTAMP",
+        (word, action, reason, by),
+    )
+    conn.commit()
+    return True
+
+
+def clear_override(conn: sqlite3.Connection, word: str) -> None:
+    conn.execute("DELETE FROM bloom_word_overrides WHERE word=?", ((word or "").strip().lower(),))
+    conn.commit()
+
+
+def list_overrides(conn: sqlite3.Connection) -> list[dict]:
+    return [dict(r) for r in conn.execute(
+        "SELECT word, action, reason, created_by, created_at FROM bloom_word_overrides ORDER BY created_at DESC")]
+
+
+def add_report(conn: sqlite3.Connection, word: str, puzzle_date, mode, fmt, letters, reason) -> bool:
+    word = (word or "").strip().lower()
+    if not (word.isalpha() and 4 <= len(word) <= 24):
+        return False
+    # Don't pile up duplicate pending reports for the same word.
+    dup = conn.execute(
+        "SELECT 1 FROM bloom_word_reports WHERE word=? AND status='pending'", (word,)).fetchone()
+    if dup:
+        return True
+    conn.execute(
+        "INSERT INTO bloom_word_reports (word, puzzle_date, mode, format, letters, reason) "
+        "VALUES (?,?,?,?,?,?)",
+        (word, str(puzzle_date or "")[:16], str(mode or "")[:8], str(fmt or "")[:8],
+         str(letters or "")[:16], str(reason or "")[:60]),
+    )
+    conn.commit()
+    return True
+
+
+def list_reports(conn: sqlite3.Connection, status: str = "pending", limit: int = 100) -> list[dict]:
+    return [dict(r) for r in conn.execute(
+        "SELECT id, word, puzzle_date, mode, format, letters, reason, status, created_at "
+        "FROM bloom_word_reports WHERE status=? ORDER BY created_at DESC LIMIT ?", (status, limit))]
+
+
+def resolve_report(conn: sqlite3.Connection, report_id: int, action: str, by: str | None = None) -> bool:
+    """action: 'approve' (→ allow override) | 'block' (→ block override) | 'dismiss'."""
+    status = {"approve": "approved", "block": "blocked", "dismiss": "dismissed"}.get(action)
+    row = conn.execute("SELECT word FROM bloom_word_reports WHERE id=?", (report_id,)).fetchone()
+    if not row or not status:
+        return False
+    if action == "approve":
+        if not set_override(conn, row["word"], "allow", reason="report", by=by):
+            return False   # can't allow (hard rule) — leave pending; dismiss instead
+    elif action == "block":
+        set_override(conn, row["word"], "block", reason="report", by=by)
+    conn.execute("UPDATE bloom_word_reports SET status=? WHERE id=?", (status, report_id))
+    conn.commit()
+    return True
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import argparse
+import contextlib
 import os
 import sqlite3
 from pathlib import Path
@@ -10,7 +11,7 @@ from .db import connect, init_db
 from .digest import send_due_digests
 from .games import generate_daily_puzzles
 from .localtime import local_today
-from .dedup import DEFAULT_THRESHOLD, DEFAULT_WINDOW_DAYS, dedup as run_dedup
+from .dedup import DEFAULT_THRESHOLD, DEFAULT_WINDOW_DAYS, cluster_duplicates, dedup as run_dedup
 from .enrich import enrich_brief_images, enrich_recent_images, enrich_summarized_images
 from .summarize import generate_summary, get_summary
 from .feeds import (
@@ -39,9 +40,17 @@ DEFAULT_DB = ROOT / "data" / "goodnews.sqlite3"
 DEFAULT_SOURCES = ROOT / "config" / "sources.toml"


+def _default_db() -> Path:
+    # Honor GOODNEWS_DB like the rest of the app (db.connect) does, so `GOODNEWS_DB=… `
+    # actually targets that DB instead of being silently ignored — otherwise a copy-DB
+    # maintenance run (e.g. dedup --force-recluster) can land on production by surprise.
+    return Path(os.environ.get("GOODNEWS_DB") or DEFAULT_DB)
+
+
 def main() -> None:
    parser = argparse.ArgumentParser(prog="goodnews")
-    parser.add_argument("--db", type=Path, default=DEFAULT_DB, help="SQLite database path")
+    parser.add_argument("--db", type=Path, default=_default_db(),
+                        help="SQLite database path (defaults to $GOODNEWS_DB, else the bundled data/ DB)")
    subparsers = parser.add_subparsers(dest="command", required=True)

    subparsers.add_parser("init-db", help="Create or update the SQLite schema")
@@ -144,6 +153,9 @@ def main() -> None:
    dedup_parser.add_argument("--embed-limit", type=int, help="Cap how many missing embeddings to compute")
    dedup_parser.add_argument("--base-url", help="OpenAI-compatible base URL")
    dedup_parser.add_argument("--model", help="Chat model name (unused for embeddings)")
+    dedup_parser.add_argument("--force-recluster", action="store_true",
+                              help="Re-cluster the EXISTING corpus even if no new embeddings "
+                                   "(re-applies representative policy; cycle-locked, no model needed)")

    check_llm_parser = subparsers.add_parser("check-llm", help="Check local OpenAI-compatible model endpoint")
    check_llm_parser.add_argument("--base-url", help="OpenAI-compatible base URL, e.g. http://127.0.0.1:1234/v1")
@@ -221,7 +233,9 @@ def main() -> None:
                import json as _json

                p = _json.loads(r["preview_json"])
-                line += f"  (accept {round(p.get('acceptance_rate', 0) * 100)}%, sampled {p.get('sampled', 0)})"
+                _rate = p.get("acceptance_rate")
+                _rate_str = f"{round(_rate * 100)}%" if _rate is not None else "—"
+                line += f"  (accept {_rate_str}, sampled {p.get('sampled', 0)})"
            print(line)
    elif args.command == "promote-candidate":
        init_db(conn)
@@ -286,15 +300,31 @@ def main() -> None:
        print(f"enrich-images: {found} new image(s) for summarized articles")
    elif args.command == "dedup":
        init_db(conn)
-        client = llm_client_from_args(args)
-        stats = run_dedup(
-            conn, client, threshold=args.threshold, window_days=args.window_days, embed_limit=args.embed_limit
-        )
-        print(
-            f"dedup: embedded={stats['embedded']} articles={stats['articles']} "
-            f"clusters={stats['clusters']} duplicate_clusters={stats['duplicate_clusters']} "
-            f"duplicates_hidden={stats['duplicates']}"
-        )
+        if args.force_recluster:
+            # Re-apply representative policy to the EXISTING corpus. The normal path
+            # fast-skips when no new embeddings exist, so it would NOT pick up a policy
+            # change. Cycle-locked so it can't overlap the scheduled timer; no model
+            # needed (pure re-cluster over stored embeddings).
+            with cycle_lock(args.db) as acquired:
+                if not acquired:
+                    print("dedup: a cycle is already running; re-run --force-recluster after it finishes")
+                    return
+                stats = cluster_duplicates(conn, threshold=args.threshold, window_days=args.window_days)
+            print(
+                f"dedup (forced recluster): articles={stats['articles']} "
+                f"clusters={stats['clusters']} duplicate_clusters={stats['duplicate_clusters']} "
+                f"duplicates_hidden={stats['duplicates']}"
+            )
+        else:
+            client = llm_client_from_args(args)
+            stats = run_dedup(
+                conn, client, threshold=args.threshold, window_days=args.window_days, embed_limit=args.embed_limit
+            )
+            print(
+                f"dedup: embedded={stats['embedded']} articles={stats['articles']} "
+                f"clusters={stats['clusters']} duplicate_clusters={stats['duplicate_clusters']} "
+                f"duplicates_hidden={stats['duplicates']}"
+            )
    elif args.command == "check-llm":
        client = llm_client_from_args(args)
        try:
@@ -368,7 +398,9 @@ def list_recent(conn: sqlite3.Connection, limit: int, accepted_only: bool) -> No
 def print_preview(p: dict) -> None:
    mode = "model" if p["classified"] else "heuristic"
    print(f"Preview of {p['url']}  ({mode})")
-    print(f"  sampled={p['sampled']} accepted={p['accepted']} ({p['acceptance_rate']*100:.0f}%)")
+    rate = p.get("acceptance_rate")
+    rate_str = f"{rate * 100:.0f}%" if rate is not None else "— (all held)"
+    print(f"  sampled={p['sampled']} accepted={p['accepted']} ({rate_str})")
    print(f"  freshness: newest={p['newest_published'] or 'unknown'} in_last_7d={p['recent_7d']}")
    print(f"  averages: cortisol={p['avg_cortisol']} ragebait={p['avg_ragebait']} pr_risk={p['avg_pr_risk']}")
    if p["topic_mix"]:
@@ -398,6 +430,28 @@ def check_feeds(conn: sqlite3.Connection, include_inactive: bool = False) -> Non
    print(f"--- {ok}/{len(rows)} feeds healthy ---")


+@contextlib.contextmanager
+def cycle_lock(db_path):
+    """Exclusive, non-blocking lock shared by the scheduled cycle and any manual job
+    that mutates the corpus (e.g. a forced dedup re-cluster), so they can never overlap
+    and contend on the database/model. Yields True if acquired, False if already held."""
+    import fcntl
+
+    lock_path = Path(db_path).parent / ".goodnews-cycle.lock"
+    lock_file = open(lock_path, "w")
+    try:
+        fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
+    except OSError:
+        lock_file.close()
+        yield False
+        return
+    try:
+        yield True
+    finally:
+        fcntl.flock(lock_file, fcntl.LOCK_UN)
+        lock_file.close()
+
+
 def run_cycle(conn: sqlite3.Connection, args: argparse.Namespace) -> None:
    """One end-to-end pass for a scheduler: poll due sources, classify the new
    arrivals, dedup, rebuild today's brief. Each step is independent and
@@ -406,21 +460,11 @@ def run_cycle(conn: sqlite3.Connection, args: argparse.Namespace) -> None:
    Holds an exclusive lock so a manual run and the systemd timer (or two timer
    ticks) can never overlap and contend on the database and model.
    """
-    import fcntl
-
-    lock_path = Path(args.db).parent / ".goodnews-cycle.lock"
-    lock_file = open(lock_path, "w")
-    try:
-        fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
-    except OSError:
-        print("cycle: another cycle is already running; skipping")
-        lock_file.close()
-        return
-    try:
+    with cycle_lock(args.db) as acquired:
+        if not acquired:
+            print("cycle: another cycle is already running; skipping")
+            return
        _run_cycle_locked(conn, args)
-    finally:
-        fcntl.flock(lock_file, fcntl.LOCK_UN)
-        lock_file.close()


 def _run_cycle_locked(conn: sqlite3.Connection, args: argparse.Namespace) -> None:
@@ -505,6 +549,12 @@ def _run_cycle_locked(conn: sqlite3.Connection, args: argparse.Namespace) -> Non
        except Exception as exc:
            print(f"review: skipped ({exc})")

+    try:
+        from .queries import reindex_search
+        print(f"search: indexed {reindex_search(conn)} articles")
+    except Exception as exc:  # noqa: BLE001 — search index is non-critical
+        print(f"search: skipped ({exc})")
+
    if not args.no_digest:
        try:
            sent = send_due_digests(conn)  # morning-gated + deduped internally
@@ -0,0 +1 @@
+["vagina", "vulva", "nipple", "rectum", "anal", "fecal", "ejaculation", "eunuch", "nude", "nudity", "butt"]
@@ -0,0 +1 @@
+["death","dying","died","killed","killing","murder","murdered","corpse","coffin","funeral","grave","buried","burial","weapon","gunshot","warfare","violent","violence","deadly","lethal","poison","poisoned","suicide","slaughter","victim","bleeding","wound","wounded","vomit","vomiting","vomited","diarrhea","disease","diseased","cancer","tumor","illness","infection","infected","plague","disabled","lucifer","satan","demon","demonic","devil","damned","hatred","hateful","terror","terrorize","hostage","kidnap","kidnapped","abuse","abused","assault","trauma","traumatic","anxiety","depression","depressed","divorce","divorced","bankrupt","eviction","evicted","layoff","drowned","drowning","choking","suffocate","starving","famine","poverty","despair","misery","miserable","tragic","tragedy","horror","horrible","nightmare","panic","dread","grief","grieving","mourning","rotting","decay","decayed","maggot","vermin","filth","sewage","manure"]
@@ -28,6 +28,7 @@ CREATE TABLE IF NOT EXISTS sources (
    retry_after_at TEXT,
    review_flag INTEGER NOT NULL DEFAULT 0,
    review_reason TEXT,
+    x_handle TEXT,                                 -- the source's own verified X handle, if known
    created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
    updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
 );
@@ -69,6 +70,7 @@ CREATE TABLE IF NOT EXISTS article_scores (
    reason_text TEXT,
    topic TEXT,
    flavor TEXT,
+    language TEXT,
    model_name TEXT,
    scored_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
 );
@@ -300,6 +302,13 @@ CREATE TABLE IF NOT EXISTS daily_puzzles (
    UNIQUE (puzzle_date, game, variant)
 );

+-- Full-text search over the PUBLIC article corpus (title/description/source/tags).
+-- Standalone FTS5 (not external-content) since the searchable text spans tables;
+-- rebuilt from the accepted, non-duplicate set on each ingest cycle (+ lazily).
+CREATE VIRTUAL TABLE IF NOT EXISTS article_search USING fts5(
+    article_id UNINDEXED, title, body, source_name, tags
+);
+
 CREATE TABLE IF NOT EXISTS game_state (
    user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
    game TEXT NOT NULL,            -- 'word' | 'wordsearch'
@@ -310,6 +319,30 @@ CREATE TABLE IF NOT EXISTS game_state (
    PRIMARY KEY (user_id, game, variant, puzzle_date)
 );

+-- Bloom runtime word curation (no deploy needed). The accepted set is computed
+-- live as: broad dictionary ∪ {allow} − {block}. Admin-managed; one row per word.
+CREATE TABLE IF NOT EXISTS bloom_word_overrides (
+    word TEXT PRIMARY KEY,         -- lowercase
+    action TEXT NOT NULL,          -- 'allow' | 'block'
+    reason TEXT,
+    created_by TEXT,
+    created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Player "this should count" reports → admin queue (approve→allow / block / dismiss).
+CREATE TABLE IF NOT EXISTS bloom_word_reports (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    word TEXT NOT NULL,            -- lowercase
+    puzzle_date TEXT,
+    mode TEXT,                     -- 'daily' | 'free'
+    format TEXT,                   -- 'center' | 'wild'
+    letters TEXT,                  -- the wheel's 7 letters (for context)
+    reason TEXT,                   -- why it was rejected (e.g. 'not in the word list')
+    status TEXT NOT NULL DEFAULT 'pending',  -- 'pending' | 'approved' | 'blocked' | 'dismissed'
+    created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
+);
+CREATE INDEX IF NOT EXISTS idx_bloom_reports_status ON bloom_word_reports(status, created_at);
+
 CREATE TABLE IF NOT EXISTS user_follows (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
@@ -327,6 +360,49 @@ CREATE TABLE IF NOT EXISTS digest_sends (
    sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
    UNIQUE (user_id, brief_date)
 );
+
+-- Publishing Desk: a platform-NEUTRAL outbound-share record (X first; Bluesky /
+-- Threads / newsletter later reuse this). One row per (article, platform); the
+-- queue tops up without ever overwriting saved text/handles. opened != posted —
+-- Web Intents can't confirm a post, so the human confirms the terminal state.
+CREATE TABLE IF NOT EXISTS outbound_shares (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
+    platform TEXT NOT NULL DEFAULT 'x',
+    status TEXT NOT NULL DEFAULT 'queued',   -- queued|drafting|opened|posted|skipped|snoozed
+    social_score INTEGER,                    -- LLM "stop-scrolling" interest (0-10)
+    rationale TEXT,                          -- why someone would stop scrolling
+    talking_points TEXT,                     -- JSON array of factual points
+    angle TEXT,                              -- a suggested conversational angle
+    entities TEXT,                           -- JSON array of raw named entities (LLM-extracted)
+    suggested_handles TEXT,                  -- JSON array of {handle, profile_url, via}
+    draft_text TEXT,                         -- autosaved in-progress blurb (the human writes it)
+    final_text TEXT,                         -- what was actually posted (teaches voice later)
+    share_url TEXT,                          -- the exact /a/{id}?utm... link used
+    post_url TEXT,                           -- the resulting tweet URL, if captured
+    snooze_until TEXT,                       -- 'not right now' (re-eligible after this)
+    opened_at TEXT,
+    posted_at TEXT,
+    created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE (article_id, platform)
+);
+CREATE INDEX IF NOT EXISTS idx_outbound_shares_status ON outbound_shares(platform, status);
+
+-- Verified handle directory — the LLM only ever proposes NAMES; the @handle comes
+-- only from here (or a source's own x_handle). Aliases resolve consistently by each
+-- having its own row pointing at the same handle (e.g. "Johns Hopkins University"
+-- and "Johns Hopkins").
+CREATE TABLE IF NOT EXISTS entity_handles (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    entity_name TEXT NOT NULL,               -- display name as entered
+    normalized_name TEXT NOT NULL,           -- lowercased/stripped match key
+    platform TEXT NOT NULL DEFAULT 'x',
+    handle TEXT NOT NULL,                     -- e.g. @AnthropicAI
+    profile_url TEXT,
+    verified_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE (normalized_name, platform)
+);
 """


@@ -359,7 +435,7 @@ def _migrate(conn: sqlite3.Connection) -> None:
    need an explicit, idempotent ALTER guarded by the current column set.
    """
    score_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_scores)")}
-    for column in ("topic", "flavor"):
+    for column in ("topic", "flavor", "language"):
        if column not in score_cols:
            conn.execute(f"ALTER TABLE article_scores ADD COLUMN {column} TEXT")

@@ -397,6 +473,9 @@ def _migrate(conn: sqlite3.Connection) -> None:
    for column, decl in health_columns.items():
        if column not in source_cols:
            conn.execute(f"ALTER TABLE sources ADD COLUMN {column} {decl}")
+    # Publishing Desk: the source's own verified X handle (suggested when sharing).
+    if "x_handle" not in source_cols:
+        conn.execute("ALTER TABLE sources ADD COLUMN x_handle TEXT")

    # Lifecycle: status (active/paused/retired) + content_visible. `active` is
    # kept as a synced mirror so legacy code (scheduler/CLI) keeps working.
@@ -102,7 +102,8 @@ def cluster_duplicates(
            (COALESCE(s.constructive_score,0) + COALESCE(s.agency_score,0)
             + COALESCE(s.human_benefit_score,0) + src.trust_score
             - COALESCE(s.cortisol_score,0) - COALESCE(s.ragebait_score,0)
-             - COALESCE(s.pr_risk_score,0)) AS rank_score
+             - COALESCE(s.pr_risk_score,0)) AS rank_score,
+            COALESCE(s.accepted, 0) AS accepted
        FROM articles a
        JOIN article_embeddings e ON e.article_id = a.id
        JOIN sources src ON src.id = a.source_id
@@ -114,7 +115,8 @@ def cluster_duplicates(
    items = []
    for r in rows:
        vec = _unit(array("f", r["vector"]).tolist())
-        items.append({"id": r["id"], "ord": _day_ordinal(r["dt"]), "vec": vec, "score": r["rank_score"]})
+        items.append({"id": r["id"], "ord": _day_ordinal(r["dt"]), "vec": vec,
+                      "score": r["rank_score"], "accepted": bool(r["accepted"])})

    clusters: list[dict] = []  # {anchor_vec, anchor_ord, members:[item]}
    for it in items:
@@ -130,6 +132,14 @@ def cluster_duplicates(
        if not placed:
            clusters.append({"anchor_vec": it["vec"], "anchor_ord": it["ord"], "members": [it]})

+    # Which articles are CURRENTLY a representative (something points at them)? Captured
+    # BEFORE we reset, so we can keep an established canonical stable across runs.
+    prior_reps = {
+        row[0] for row in conn.execute(
+            "SELECT DISTINCT duplicate_of FROM articles WHERE duplicate_of IS NOT NULL"
+        )
+    }
+
    # Reset prior decisions for everything we considered, then re-apply.
    considered = [it["id"] for it in items]
    conn.executemany(
@@ -142,7 +152,19 @@ def cluster_duplicates(
        if len(cl["members"]) < 2:
            continue
        dup_clusters += 1
-        rep = max(cl["members"], key=lambda m: (m["score"], -m["id"]))
+        # Representative priority (highest wins), in order:
+        #   1. accepted/serveable — an accepted page must never be retired to a REJECTED
+        #      rep (that page would 404 with nothing to redirect to).
+        #   2. established rep — if a member is already the cluster's canonical, keep it,
+        #      so an indexed URL doesn't churn when a newer twin arrives.
+        #   3. quality score — decides genuinely-new clusters.
+        #   4. -id — deterministic final tiebreak (older wins).
+        rep = max(cl["members"], key=lambda m: (
+            1 if m["accepted"] else 0,
+            1 if m["id"] in prior_reps else 0,
+            m["score"],
+            -m["id"],
+        ))
        for m in cl["members"]:
            if m["id"] != rep["id"]:
                conn.execute(
@@ -243,6 +243,11 @@ def poll_source(conn: sqlite3.Connection, source: sqlite3.Row) -> dict:
        }


+# Deep-preview accessibility sample bounds (module-level so tests can shrink them).
+_ACCESS_FETCH_TIMEOUT = 6     # per-article socket timeout (seconds)
+_ACCESS_DEADLINE_S = 12.0     # hard wall-clock cap for the whole access phase
+
+
 def preview_feed(url: str, sample: int = 25, pr_risk_default: int = 3, client=None, fetcher=None) -> dict:
    """Fetch and score a sample of a feed WITHOUT persisting anything.

@@ -302,12 +307,85 @@ def preview_feed(url: str, sample: int = 25, pr_risk_default: int = 3, client=No
                    cortisol=ns["cortisol_score"],
                    ragebait=ns["ragebait_score"],
                    pr_risk=ns["pr_risk_score"],
+                    reason_code=ns["reason_code"],
+                    language=ns.get("language", ""),
                )
            except Exception:
                pass  # one bad item shouldn't sink the whole preview

    total = len(rows)
    accepted = sum(1 for r in rows if r["accepted"])
+    # Non-English items are HELD (English-only feed for now), not calm-filter
+    # rejections — surface the count and judge acceptance over English items only, so
+    # a multilingual wire (e.g. PR Newswire) isn't unfairly penalized in the preview.
+    non_english = sum(1 for r in rows if r.get("reason_code") == "non_english")
+    judged = total - non_english
+
+    # Accessibility sample — deep preview only (it already means "spend ~a minute to
+    # really know"). Layered per Codex: the instant DOMAIN rule + a small sampled
+    # article fetch, so a paywall verdict rests on evidence, not domain alone (NYT
+    # Learning proved domain rules false-positive).
+    from .paywall import check_article_access, is_paywalled
+    domain_paywalled = is_paywalled(url)
+    access = None
+    access_verdict = None
+    if classified and rows:
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        # prefer the URLs the model would actually surface, then fill from the rest
+        ordered = [r["url"] for r in rows if r["accepted"] and r["url"]] + \
+                  [r["url"] for r in rows if not r["accepted"] and r["url"]]
+        seen, sample_urls = set(), []
+        for u in ordered:
+            if u not in seen:
+                seen.add(u)
+                sample_urls.append(u)
+            if len(sample_urls) >= 6:
+                break
+        results = []
+        if sample_urls:
+            af = fetcher or fetch_feed
+            ex = ThreadPoolExecutor(max_workers=min(6, len(sample_urls)))
+            futs = {ex.submit(check_article_access, u, af, _ACCESS_FETCH_TIMEOUT): u for u in sample_urls}
+            done = {}
+            try:
+                # Hard wall-clock cap: the access step can NEVER stall the whole
+                # preview. Fetches run in parallel; whatever hasn't finished by the
+                # deadline is left 'unknown' (unverified — never counts as walled).
+                # shutdown(wait=False, cancel_futures=True) below means we don't block
+                # on stragglers (no `with ... as ex` join), so wall-clock == the cap.
+                for fut in as_completed(futs, timeout=_ACCESS_DEADLINE_S):
+                    done[futs[fut]] = fut.result()
+            except Exception:  # noqa: BLE001 — overall deadline hit; use what finished
+                pass
+            ex.shutdown(wait=False, cancel_futures=True)
+            results = [(u, done.get(u, "unknown")) for u in sample_urls]
+        counts = Counter(a for _, a in results)
+        readable, paywalled = counts.get("readable", 0), counts.get("paywalled", 0)
+        assessable = readable + paywalled
+        inacc = (paywalled / assessable) if assessable else None
+        # `blocked` is deliberately NOT counted as inaccessible: a bot-block isn't a
+        # reader paywall (it may open fine in a browser), so it can never push a
+        # source to reject-ready — only readable-vs-paywalled evidence does. Need a
+        # few clearly-assessable samples before judging confidently.
+        ENOUGH = 3
+        if assessable < ENOUGH:
+            access_verdict = "review"            # mostly blocked/unknown — can't confirm; click examples
+        elif domain_paywalled and inacc >= 0.7:
+            access_verdict = "reject-ready"      # domain rule AND sample agree it's walled
+        elif domain_paywalled:
+            access_verdict = "review"            # domain says walled but the sample isn't — likely a false positive, look
+        elif inacc >= 0.7:
+            access_verdict = "review"            # not on the list but mostly walled — candidate for the rule
+        elif inacc <= 0.3:
+            access_verdict = "fine"
+        else:
+            access_verdict = "review"            # mixed
+        access = {
+            "checked": len(results),
+            "readable": readable, "paywalled": paywalled,
+            "blocked": counts.get("blocked", 0), "unknown": counts.get("unknown", 0),
+            "examples": [{"url": u, "access": a} for u, a in results][:5],
+        }

    def _avg(key: str) -> float:
        return round(sum(r[key] for r in rows) / total, 1) if total else 0.0
@@ -329,12 +407,17 @@ def preview_feed(url: str, sample: int = 25, pr_risk_default: int = 3, client=No
        "sampled": total,
        "classified": classified,
        "accepted": accepted,
-        "acceptance_rate": round(accepted / total, 2) if total else 0.0,
+        "non_english": non_english,         # held for language (English-only feed for now)
+        # None (not 0%) when there are no English items to judge — "all held", not "all rejected".
+        "acceptance_rate": round(accepted / judged, 2) if judged else None,
        "avg_cortisol": _avg("cortisol"),
        "avg_ragebait": _avg("ragebait"),
        "avg_pr_risk": _avg("pr_risk"),
        "newest_published": newest,
        "recent_7d": recent_7d,
+        "paywall_rule": domain_paywalled,   # instant domain hint
+        "access": access,                   # sampled readable/paywalled/blocked/unknown (deep only)
+        "access_verdict": access_verdict,   # fine | review | reject-ready
        "topic_mix": dict(Counter(r["topic"] for r in rows if r["topic"])),
        "flavor_mix": dict(Counter(r["flavor"] for r in rows if r["flavor"])),
        "examples_accepted": [r["title"] for r in rows if r["accepted"]][:5],
@@ -17,6 +17,8 @@ import re
 import sqlite3
 from pathlib import Path

+from . import bloom
+
 _DATA = Path(__file__).parent / "data"
 _POOL = json.loads((_DATA / "wordpool.json").read_text())  # curated static answer pool
 # Guess dictionaries (same lists the client validates against) — used server-side to
@@ -26,6 +28,9 @@ _DICT = {v: set(json.loads((_DATA / f"words-{v}.json").read_text())) for v in ("
 # Daily Word: 5 letters / 6 guesses · Long Word: 6 letters / 7 guesses.
 WORD_VARIANTS = {"5": {"length": 5, "guesses": 6}, "6": {"length": 6, "guesses": 7}}

+# Memory Match daily sync variants = "<tier>-<format>" (free play stays local).
+MATCH_VARIANTS = {f"{t}-{f}" for t in ("gentle", "standard", "expert") for f in ("icons", "colors")}
+

 def _seed(*parts: str) -> int:
    return int(hashlib.sha256(":".join(parts).encode()).hexdigest(), 16)
@@ -625,12 +630,29 @@ def _merge_word(a: dict, b: dict) -> dict:
    return a if _word_rank(a) >= _word_rank(b) else b


+def _merge_bloom(a: dict, b: dict) -> dict:
+    """Union found words — a find is monotonic (you can't un-find one), so the
+    union across devices is always correct. Score is recomputed by the sanitizer."""
+    found, seen = [], set()
+    for w in list(a.get("found") or []) + list(b.get("found") or []):
+        if isinstance(w, str) and w not in seen:
+            seen.add(w)
+            found.append(w)
+    return {"found": found}
+
+
 def merge_game_state(game: str, a: dict | None, b: dict | None) -> dict:
    if not a:
        return dict(b or {})
    if not b:
        return dict(a or {})
-    return _merge_wordsearch(a, b) if game == "wordsearch" else _merge_word(a, b)
+    if game == "wordsearch":
+        return _merge_wordsearch(a, b)
+    if game == "bloom":
+        return _merge_bloom(a, b)
+    if game == "match":
+        return _merge_match(a, b)
+    return _merge_word(a, b)


 def load_game_state(conn: sqlite3.Connection, user_id: int, game: str, variant: str, date: str) -> dict | None:
@@ -729,10 +751,92 @@ def _sanitize_word(variant: str, state: dict) -> dict:
    return out


+def _sanitize_bloom(conn: sqlite3.Connection, date: str, state: dict) -> dict:
+    """Trust only finds real for THIS wheel — a word in the day's DYNAMIC accept
+    set (broad dict + overrides, computed live; shape-only if the puzzle doesn't
+    exist yet). Dedupes and recomputes score server-side; Full Bloom = reaching the
+    designed puzzle's total (max_score). Never trusts a client-sent score/full."""
+    payload = bloom.stored_payload(conn, date)
+    valid = (set(bloom.accepted_words(conn, payload["center"], payload["outer"], True))
+             if payload else None)
+    clean, seen = [], set()
+    for w in (state.get("found") or []):
+        if not isinstance(w, str):
+            continue
+        w = w.strip().lower()
+        if not w or w in seen:
+            continue
+        if valid is not None:
+            if w not in valid:
+                continue
+        elif not (len(w) >= 4 and w.isalpha() and "s" not in w):  # no puzzle yet → shape only
+            continue
+        seen.add(w)
+        clean.append(w)
+    clean.sort()
+    score = bloom.score_words(payload, clean) if payload else 0
+    out = {"found": clean, "score": score}
+    if payload and clean and score >= payload.get("max_score", 1):
+        out["full"] = True  # Full Bloom — found the whole designed puzzle
+    return out
+
+
+_MATCH_MAX_FACES = 12  # the largest board uses 8 faces; cap generously
+_MATCH_FACES = {"gentle": 6, "standard": 8, "expert": 8}  # faces per tier = completion target
+# Valid face keys — MIRRORS the frontend (icons.js ICON_KEYS + palette.js COLOR_KEYS).
+# Matched keys are validated against this so bogus/junk keys can't inflate the
+# completion count. Adding a face on the frontend? Add it here too; a missing key only
+# under-counts (benign, self-heals once synced), never crashes.
+_MATCH_FACE_KEYS = frozenset({
+    "sun", "moon", "star", "cloud", "raindrop", "wave", "leaf", "flower", "seedling",
+    "tree", "mountain", "shell", "feather", "acorn", "butterfly", "rainbow", "heart",
+    "sparkle", "home", "book", "teacup", "candle", "lantern", "compass", "kite", "note",
+    "boat", "fish", "bird", "mushroom", "bell", "snowflake", "clover",
+    "color-rose", "color-coral", "color-amber", "color-gold", "color-lime", "color-green",
+    "color-teal", "color-cyan", "color-sky", "color-blue", "color-indigo", "color-violet",
+    "color-plum", "color-brown", "color-sand", "color-slate", "color-charcoal", "color-cream",
+})
+
+
+def _match_faces(variant: str) -> int:
+    return _MATCH_FACES.get((variant or "").split("-", 1)[0], 8)
+
+
+def _sanitize_match(variant: str, state: dict) -> dict:
+    """Light, durability-only sanitize. Memory Match has nothing to cheat — the
+    board is deterministic and fully visible, with no score/leaderboard — so we
+    just drop malformed junk: matched FACE KEYS (icon name / color key, never raw
+    indices, so progress survives layout tweaks), validated against the real face set
+    (junk can't count), deduped, with a clamped move count. `done` is DERIVED from the
+    matched count vs the tier's face target — never trusted from the client, so a
+    stale/bogus flag can't mark a board cleared (matters once the ritual reads it)."""
+    seen: set[str] = set()
+    matched: list[str] = []
+    for k in (state.get("matched") or []):
+        if isinstance(k, str) and k in _MATCH_FACE_KEYS and k not in seen:
+            seen.add(k)
+            matched.append(k)
+        if len(matched) >= _MATCH_MAX_FACES:
+            break
+    return {"matched": matched, "moves": max(0, min(_int(state.get("moves")), 100_000)),
+            "done": len(matched) >= _match_faces(variant)}
+
+
+def _merge_match(a: dict, b: dict) -> dict:
+    """Union matched faces across devices, keep the larger move count. `done` is not
+    carried here — the post-merge sanitize re-derives it from the matched count."""
+    matched = list(dict.fromkeys([*(a.get("matched") or []), *(b.get("matched") or [])]))[:_MATCH_MAX_FACES]
+    return {"matched": matched, "moves": max(_int(a.get("moves")), _int(b.get("moves")))}
+
+
 def sanitize_game_state(conn: sqlite3.Connection, game: str, variant: str, date: str, state: dict) -> dict:
    """Never trust client JSON at the storage layer — normalize before merge/store."""
    if game == "wordsearch":
        return _sanitize_wordsearch(conn, variant, date, state or {})
+    if game == "bloom":
+        return _sanitize_bloom(conn, date, state or {})
+    if game == "match":
+        return _sanitize_match(variant, state or {})
    return _sanitize_word(variant, state or {})


@@ -770,6 +874,31 @@ def game_stats(conn: sqlite3.Connection, user_id: int, game: str, variant: str)
    if game == "wordsearch":
        times = [s.get("ms") for s in states if s.get("ms")]
        return {"completed": sum(1 for s in states if s.get("ms")), "best": min(times) if times else 0}
+    if game == "bloom":
+        # Calm, no-pressure record: days played, lifetime words, Full Blooms, and
+        # the best tier ever reached (computed per day from that wheel's tiers).
+        tier_names = [t[0] for t in bloom.TIER_PCTS]
+        played = words = full = 0
+        best_idx = -1
+        for r in rows:
+            try:
+                s = json.loads(r["state_json"])
+            except (ValueError, TypeError):
+                continue
+            found = s.get("found") or []
+            if not found:
+                continue
+            played += 1
+            words += len(found)
+            if s.get("full"):
+                full += 1
+            p = bloom.stored_payload(conn, r["puzzle_date"])
+            if p:
+                sc = s.get("score") or 0
+                idx = max((i for i, t in enumerate(p["tiers"]) if sc >= t["score"]), default=0)
+                best_idx = max(best_idx, idx)
+        return {"played": played, "words": words, "full_blooms": full,
+                "best_tier": tier_names[best_idx] if best_idx >= 0 else None}
    played = won = 0
    dist: dict[int, int] = {}
    streak = 0
@@ -823,4 +952,9 @@ def generate_daily_puzzles(conn: sqlite3.Connection, date: str, client=None) ->
    ).fetchone():
        generate_wordsearch_puzzle(conn, date, client=client)
        made += 1
+    if not conn.execute(
+        "SELECT 1 FROM daily_puzzles WHERE puzzle_date=? AND game='bloom' AND variant=''", (date,)
+    ).fetchone():
+        bloom.generate_bloom_puzzle(conn, date)  # pure code, no LLM
+        made += 1
    return made
@@ -49,6 +49,7 @@ CLASSIFICATION_SCHEMA = {
        "tags",
        "reason_code",
        "reason_text",
+        "language",
    ],
    "properties": {
        "constructive_score": _SCORE_FIELD,
@@ -64,6 +65,7 @@ CLASSIFICATION_SCHEMA = {
        "tags": {"type": "array", "items": {"type": "string", "enum": list(ALLOWED_TAGS)}, "maxItems": MAX_TAGS},
        "reason_code": {"type": "string"},
        "reason_text": {"type": "string"},
+        "language": {"type": "string"},  # ISO 639-1 of the article's own text (en, de, es…)
    },
 }

@@ -104,6 +106,11 @@ Grouping tags — choose ONLY from this controlled vocabulary:
 Tag discipline: assign 1-4 tags; prefer fewer, stronger ones; never tag by weak
 association; pick tags a reader would reasonably use to find this story later.

+Also report `language`: the ISO 639-1 code of the article's OWN text (the title and
+description), e.g. "en", "de", "es", "fr". Judge the language of the words, not the
+subject. This is detection only — score and accept the story on its merits as usual;
+the site decides separately what to do with non-English items.
+
 Return only JSON with this exact shape:
 {{
  "constructive_score": 0,
@@ -118,7 +125,8 @@ Return only JSON with this exact shape:
  "flavor": "one_of_the_allowed_flavors",
  "tags": ["one_to_four_allowed_tags"],
  "reason_code": "short_snake_case",
-  "reason_text": "one concise sentence"
+  "reason_text": "one concise sentence",
+  "language": "en"
 }}
 """.format(topics=topics_prompt_block(), flavors=flavors_prompt_block(), tags=tags_prompt_block())

@@ -222,6 +230,60 @@ class LocalModelClient:
        """
        return self._raw_content(self._build_payload(messages, None))

+    def rank_for_social(self, candidates: list[dict]) -> list[dict]:
+        """ONE bounded COMPARATIVE pass over a small candidate set (not N calls).
+        Returns a best-first list of {id, social_score 0-10, why, talking_points,
+        angle, entities}. Bounded by self.timeout; callers fall back to deterministic
+        ranking on ANY failure, so the Publishing Desk always works."""
+        if not candidates:
+            return []
+        lines = []
+        for c in candidates:
+            summ = " ".join((c.get("summary") or "").split())[:280]
+            lines.append(f'- id={int(c["id"])} | topic={c.get("topic")} | {c["title"]} :: {summ}')
+        user = (
+            "These are constructive-news articles. Compare them as candidates for a SHORT X "
+            "(Twitter) post from a calm good-news account, and rank best-first by SOCIAL "
+            "share-worthiness — would someone stop scrolling? That differs from how 'good' the "
+            "article is.\n\n" + "\n".join(lines) + "\n\n"
+            'Reply with JSON only, exactly this shape:\n'
+            '{"ranked": [{"id": <one of the ids above>, "social_score": <0-10>, '
+            '"why": "one sentence: why it stops the scroll", '
+            '"talking_points": ["3 short factual points a writer could use"], '
+            '"angle": "a possible conversational angle", '
+            '"entities": ["real org/person names mentioned, for tagging"]}]}\n'
+            "Only use ids from the list above. Order best-first."
+        )
+        messages = [
+            {"role": "system", "content": "You rank constructive news for social sharing. Reply with JSON only."},
+            {"role": "user", "content": user},
+        ]
+        data = parse_classifier_json(self.chat_text(messages))
+        ranked = data.get("ranked") if isinstance(data, dict) else None
+        if not isinstance(ranked, list):
+            raise RuntimeError("rank_for_social: missing 'ranked' list")
+        out = []
+        for r in ranked:
+            if not isinstance(r, dict):
+                continue
+            try:
+                rid = int(r.get("id"))
+            except (TypeError, ValueError):
+                continue
+            # Require ACTUAL lists — a model that returns a bare string must not be
+            # iterated into characters ("fact" → ["f","a","c","t"]).
+            tp = r.get("talking_points")
+            ents = r.get("entities")
+            out.append({
+                "id": rid,
+                "social_score": _bounded_int(r.get("social_score")),
+                "why": str(r.get("why") or "")[:300],
+                "talking_points": [str(p)[:200] for p in tp][:4] if isinstance(tp, list) else [],
+                "angle": str(r.get("angle") or "")[:300],
+                "entities": [str(e)[:80] for e in ents][:8] if isinstance(ents, list) else [],
+            })
+        return out
+
    def _raw_content(self, payload: dict) -> str:
        body = json.dumps(payload).encode("utf-8")
        headers = {"Content-Type": "application/json"}
@@ -304,7 +366,29 @@ def parse_classifier_json(content: str) -> dict:
        return json.loads(content[start : end + 1])


+def _is_english(language: str) -> bool:
+    """Conservative: HOLD only when the model clearly reports a non-English language.
+    Missing/blank/undetermined → treated as English, so a model hiccup never silently
+    drops genuine English content (the corpus is ~all English today)."""
+    lang = (language or "").strip().lower()
+    if not lang or lang in ("und", "unknown", "mul", "zxx"):
+        return True
+    return lang == "en" or lang.startswith("en-") or lang.startswith("en_")
+
+
 def normalize_scores(data: dict, model_name: str) -> dict:
+    language = str(data.get("language") or "").strip().lower()[:16]
+    accepted = 1 if bool(data.get("accepted")) else 0
+    reason_code = str(data.get("reason_code") or "model_no_reason")[:120]
+    reason_text = str(data.get("reason_text") or "")[:1000]
+    # Language gate (code disposes): the public feed is English-only for now. A
+    # non-English article is HELD — never shown — but PRESERVED with a distinct
+    # reason so it isn't counted as a calm-filter rejection or a source failure, and
+    # can be revisited when translation support lands (Phase 4 / GDELT).
+    if not _is_english(language):
+        accepted = 0
+        reason_code = "non_english"
+        reason_text = f"Held — non-English ({language}); awaiting translation support."
    return {
        "constructive_score": _bounded_int(data.get("constructive_score")),
        "cortisol_score": _bounded_int(data.get("cortisol_score")),
@@ -313,12 +397,13 @@ def normalize_scores(data: dict, model_name: str) -> dict:
        "human_benefit_score": _bounded_int(data.get("human_benefit_score")),
        "novelty_score": _bounded_int(data.get("novelty_score")),
        "pr_risk_score": _bounded_int(data.get("pr_risk_score")),
-        "accepted": 1 if bool(data.get("accepted")) else 0,
+        "accepted": accepted,
        "topic": coerce_topic(data.get("topic")),
        "flavor": coerce_flavor(data.get("flavor")),
        "tags": coerce_tags(data.get("tags")),
-        "reason_code": str(data.get("reason_code") or "model_no_reason")[:120],
-        "reason_text": str(data.get("reason_text") or "")[:1000],
+        "reason_code": reason_code,
+        "reason_text": reason_text,
+        "language": language,
        "model_name": model_name,
    }

@@ -329,9 +414,9 @@ def upsert_article_score(conn: sqlite3.Connection, article_id: int, scores: dict
        INSERT INTO article_scores (
            article_id, constructive_score, cortisol_score, ragebait_score,
            agency_score, human_benefit_score, novelty_score, pr_risk_score,
-            accepted, topic, flavor, reason_code, reason_text, model_name, scored_at
+            accepted, topic, flavor, reason_code, reason_text, language, model_name, scored_at
        )
-        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
        ON CONFLICT(article_id) DO UPDATE SET
            constructive_score = excluded.constructive_score,
            cortisol_score = excluded.cortisol_score,
@@ -345,6 +430,7 @@ def upsert_article_score(conn: sqlite3.Connection, article_id: int, scores: dict
            flavor = excluded.flavor,
            reason_code = excluded.reason_code,
            reason_text = excluded.reason_text,
+            language = excluded.language,
            model_name = excluded.model_name,
            scored_at = CURRENT_TIMESTAMP
        """,
@@ -362,6 +448,7 @@ def upsert_article_score(conn: sqlite3.Connection, article_id: int, scores: dict
            scores["flavor"],
            scores["reason_code"],
            scores["reason_text"],
+            scores.get("language"),
            scores["model_name"],
        ),
    )
@@ -8,6 +8,7 @@ and for replacements. It will never be perfect; it's an honest hint, not a gate.

 from __future__ import annotations

+import re
 from urllib.parse import urlsplit

 # Host suffixes considered paywalled. Subdomains match (news.nature.com → nature.com).
@@ -53,3 +54,54 @@ def is_paywalled_for_source(url: str | None, override: str | None = None) -> boo
    if override == "paywalled":
        return True
    return is_paywalled(url)
+
+
+# --- Content-level accessibility (deep-preview only; the live pipeline still never
+#     fetches article pages) -----------------------------------------------------
+
+# Wall phrases that appear in the rendered, walled state. Kept specific so a footer
+# "subscribe to our newsletter" doesn't read as a paywall.
+_WALL_MARKERS = (
+    "subscribe to continue", "subscribe to keep reading", "subscribe to read",
+    "to continue reading", "already a subscriber", "subscribers only",
+    "this article is for subscribers", "this content is for subscribers",
+    "create a free account to continue", "create an account to keep reading",
+    "unlock this article", "register to continue reading",
+)
+_ACCESS_FALSE = re.compile(r'"isaccessibleforfree"\s*:\s*("?)(false)\1', re.I)
+_ACCESS_TRUE = re.compile(r'"isaccessibleforfree"\s*:\s*("?)(true)\1', re.I)
+_CONTENT_LOCKED = re.compile(r'content[_-]tier"[^>]*content="locked', re.I)
+_STRIP_BLOCKS = re.compile(r"(?is)<(script|style|noscript|template)[^>]*>.*?</\1>")
+_STRIP_TAGS = re.compile(r"(?s)<[^>]+>")
+_WS = re.compile(r"\s+")
+
+
+def check_article_access(url: str, fetcher, timeout: int = 8) -> str:
+    """Best-effort readability of ONE article URL, for the deep-preview accessibility
+    sample. Returns 'readable' | 'paywalled' | 'blocked' | 'unknown'.
+
+    Conservative + evidence-led: an explicit signal (schema.org isAccessibleForFree,
+    content-tier=locked, or a clear wall phrase) marks 'paywalled'; otherwise a page
+    with substantial body text reads as 'readable'; thin/ambiguous pages stay
+    'unknown'. A fetch error is 'blocked'. Heuristic by nature — it informs the
+    verdict, it never auto-rejects (domain rules already proved they can lie)."""
+    try:
+        raw = fetcher(url, timeout=timeout)
+    except Exception:  # noqa: BLE001 — any fetch failure = can't read it right now
+        return "blocked"
+    try:
+        html = raw.decode("utf-8", "ignore")
+    except Exception:  # noqa: BLE001
+        return "unknown"
+    if _ACCESS_FALSE.search(html) or _CONTENT_LOCKED.search(html):
+        return "paywalled"
+    low = html.lower()
+    if any(m in low for m in _WALL_MARKERS):
+        return "paywalled"
+    # No wall signal — judge by how much real article text is present.
+    text = _WS.sub(" ", _STRIP_TAGS.sub(" ", _STRIP_BLOCKS.sub(" ", html))).strip()
+    if _ACCESS_TRUE.search(html) and len(text) >= 600:
+        return "readable"
+    if len(text) >= 1500:
+        return "readable"
+    return "unknown"
@@ -0,0 +1,400 @@
+"""Publishing Desk — the platform-neutral outbound-share queue (X first).
+
+Pattern (Claude + Codex): code reduces the corpus to a small set of strong,
+*eligible* candidates; ONE bounded comparative LLM call ranks them together and
+returns talking points / angle / entities; code validates, applies diversity, and
+tops the queue up to a target. If the model is down or returns junk, a deterministic
+ranking is the fallback — the Desk always works.
+
+The human writes every blurb; the LLM never writes the post and never invents a
+@handle (handles come only from the verified `entity_handles` table or a source's
+own `x_handle`).
+"""
+from __future__ import annotations
+
+import json
+import re
+import sqlite3
+from datetime import datetime, timezone
+
+from .paywall import is_paywalled_for_source
+
+PLATFORM_X = "x"
+QUEUE_TARGET = 8        # how many active items the Desk tries to keep ready
+_LLM_POOL = 15         # most candidates handed to the one comparative LLM call
+_RECENT = "-3 days"     # "timely" window for share candidates
+# Active = occupying a slot in the working queue (so we don't re-add or duplicate).
+_ACTIVE = ("queued", "drafting", "opened")
+
+# Legal suffixes are dropped ("Apple Inc" ≡ "Apple") but ONLY from the END, and "the"
+# is NEVER dropped. Removing them anywhere collapsed "The Who"→"who" (collides with
+# WHO) and "Inc. Magazine"→"magazine". Identity words (university, institute, lab…) are
+# preserved; short forms/abbreviations need explicit alias rows.
+_LEGAL_SUFFIXES = {"inc", "llc", "ltd", "corp", "corporation", "plc", "gmbh", "co"}
+
+
+def normalize_entity(name: str) -> str:
+    toks = re.sub(r"[^a-z0-9 ]", " ", (name or "").lower()).split()
+    while toks and toks[-1] in _LEGAL_SUFFIXES:   # trailing only
+        toks.pop()
+    return " ".join(toks)
+
+
+_HANDLE_RE = re.compile(r"^[A-Za-z0-9_]{1,15}$")   # X: 1-15 chars, letters/digits/underscore
+
+
+def valid_handle(handle: str | None) -> str | None:
+    """Canonical handle WITHOUT the @, or None. Tolerates one optional leading @;
+    rejects empty, spaces, URLs, and punctuation — so '@', '@not a handle',
+    '@https://x.com/NASA', '@NASA!' never get stored or suggested."""
+    h = (handle or "").strip()
+    if h.startswith("@"):
+        h = h[1:]
+    return h if _HANDLE_RE.match(h) else None
+
+
+# --- verified handle resolution -------------------------------------------------
+
+def resolve_handles(conn: sqlite3.Connection, entities: list[str], source_handle: str | None = None,
+                    platform: str = PLATFORM_X, cap: int = 2) -> list[dict]:
+    """Verified handles ONLY: the source's own handle first, then LLM-named entities
+    matched against the curated table. Deduped, capped. Unmatched entities are NOT
+    guessed — the UI offers a 'Find on X' search for those instead."""
+    out: list[dict] = []
+    seen: set[str] = set()
+
+    def add(handle: str | None, profile_url: str | None, via: str) -> None:
+        canon = valid_handle(handle)   # validate even verified/source handles before display
+        if not canon:
+            return
+        key = canon.lower()
+        if key in seen:
+            return
+        seen.add(key)
+        out.append({"handle": "@" + canon, "profile_url": profile_url or f"https://x.com/{canon}", "via": via})
+
+    if source_handle:
+        add(source_handle, None, "source")
+    for name in entities or []:
+        if len(out) >= cap:
+            break
+        norm = normalize_entity(name)
+        if not norm:
+            continue
+        row = conn.execute(
+            "SELECT handle, profile_url FROM entity_handles WHERE normalized_name=? AND platform=?",
+            (norm, platform),
+        ).fetchone()
+        if row:
+            add(row["handle"], row["profile_url"], "entity")
+    return out[:cap]
+
+
+def add_entity_handle(conn: sqlite3.Connection, entity_name: str, handle: str,
+                      profile_url: str | None = None, platform: str = PLATFORM_X) -> bool:
+    """Save a verified handle (e.g. after you confirm one via 'Find on X'), so it's
+    automatic next time. Idempotent on (normalized_name, platform)."""
+    norm = normalize_entity(entity_name)
+    canon = valid_handle(handle)
+    if not norm or not canon:   # reject junk handles before they're ever stored
+        return False
+    conn.execute(
+        """INSERT INTO entity_handles (entity_name, normalized_name, platform, handle, profile_url)
+           VALUES (?, ?, ?, ?, ?)
+           ON CONFLICT(normalized_name, platform) DO UPDATE SET
+               handle=excluded.handle, profile_url=excluded.profile_url,
+               entity_name=excluded.entity_name, verified_at=CURRENT_TIMESTAMP""",
+        (entity_name.strip(), norm, platform, canon, profile_url or f"https://x.com/{canon}"),
+    )
+    conn.commit()
+    return True
+
+
+# --- candidate eligibility + ranking --------------------------------------------
+
+def eligible_candidates(conn: sqlite3.Connection, platform: str = PLATFORM_X, limit: int = _LLM_POOL) -> list[dict]:
+    """Hard filters (code disposes): accepted · visible · non-duplicate · timely ·
+    complete share page · not already queued/posted/skipped/snoozed. Readable
+    (paywall) is checked in Python. Returns the deterministically pre-ranked top
+    `limit` to hand to the comparative LLM call."""
+    rows = conn.execute(
+        f"""
+        SELECT a.id, a.title, a.canonical_url, a.image_url, a.published_at, a.discovered_at,
+               a.source_id, src.name AS source_name, src.x_handle AS source_handle,
+               src.default_category AS category, src.paywall_override,
+               s.constructive_score, s.novelty_score, s.topic,
+               m.summary, m.what_happened, m.why_matters, m.why_belongs
+        FROM articles a
+        JOIN article_scores s ON s.article_id = a.id
+        JOIN sources src ON src.id = a.source_id
+        JOIN article_summaries m ON m.article_id = a.id
+        WHERE s.accepted = 1
+          AND a.duplicate_of IS NULL
+          AND src.content_visible = 1
+          AND m.summary IS NOT NULL AND m.what_happened IS NOT NULL
+              AND m.why_matters IS NOT NULL AND m.why_belongs IS NOT NULL
+          AND COALESCE(a.published_at, a.discovered_at) >= datetime('now', ?)
+          AND a.id NOT IN (
+              SELECT article_id FROM outbound_shares WHERE platform = ? AND (
+                  status IN ('queued','drafting','opened','posted','skipped')
+                  OR (status = 'snoozed' AND (snooze_until IS NULL OR snooze_until > datetime('now')))
+              )
+          )
+        ORDER BY COALESCE(a.published_at, a.discovered_at) DESC
+        LIMIT 200
+        """,
+        (_RECENT, platform),
+    ).fetchall()
+    cands = [dict(r) for r in rows
+             if not is_paywalled_for_source(r["canonical_url"], r["paywall_override"])]
+    cands.sort(key=_det_score, reverse=True)
+    return cands[:limit]
+
+
+def _det_score(c: dict) -> float:
+    """Deterministic shareability score — the pre-rank and the LLM-failure fallback.
+    'Good article' and 'good post' differ, so this favors novelty + a usable image
+    + freshness, not just the constructive score."""
+    score = 1.5 * (c.get("novelty_score") or 0) + 1.0 * (c.get("constructive_score") or 0)
+    if c.get("image_url"):
+        score += 2.0
+    return score
+
+
+def _diverse_pick(cands: list[dict], need: int, per_source: int = 1, per_topic: int = 2) -> list[dict]:
+    """Pick `need` items spreading across sources/topics (cands already ranked)."""
+    out, src_n, top_n = [], {}, {}
+    for c in cands:
+        if len(out) >= need:
+            break
+        sid, top = c.get("source_id"), c.get("topic")
+        if src_n.get(sid, 0) >= per_source or (top and top_n.get(top, 0) >= per_topic):
+            continue
+        out.append(c)
+        src_n[sid] = src_n.get(sid, 0) + 1
+        if top:
+            top_n[top] = top_n.get(top, 0) + 1
+    # If diversity caps left us short (small pool), fill from the remainder in rank order.
+    if len(out) < need:
+        chosen = {c["id"] for c in out}
+        out.extend(c for c in cands if c["id"] not in chosen)
+    return out[:need]
+
+
+# --- queue build (background job) -----------------------------------------------
+
+def _share_url(base_url: str, article_id: int, platform: str = PLATFORM_X) -> str:
+    base = (base_url or "").rstrip("/")
+    return f"{base}/a/{article_id}?utm_source={platform}&utm_medium=social&utm_campaign=publishing_desk"
+
+
+def build_queue(conn: sqlite3.Connection, base_url: str, client=None,
+                platform: str = PLATFORM_X, target: int = QUEUE_TARGET) -> dict:
+    """Top the active queue up to `target`. Comparative LLM ranks the eligible pool;
+    deterministic fallback if the model is unavailable or returns junk. Never
+    overwrites saved draft/final text on a re-queue."""
+    active = conn.execute(
+        "SELECT COUNT(*) FROM outbound_shares WHERE platform=? AND status IN (?,?,?)",
+        (platform, *_ACTIVE),
+    ).fetchone()[0]
+    need = target - active
+    if need <= 0:
+        return {"added": 0, "active": active, "ranked_by": "none"}
+
+    cands = eligible_candidates(conn, platform=platform, limit=_LLM_POOL)
+    if not cands:
+        return {"added": 0, "active": active, "ranked_by": "none"}
+
+    by_id = {c["id"]: c for c in cands}
+    ranked_by = "deterministic"
+    llm = None
+    if client is not None:
+        try:
+            llm = client.rank_for_social(
+                [{"id": c["id"], "title": c["title"], "summary": c.get("summary") or "",
+                  "topic": c.get("topic")} for c in cands]
+            )
+        except Exception:  # noqa: BLE001 — model down/slow/garbage → deterministic fallback
+            llm = None
+    if llm:
+        # validate ids against the eligible pool AND dedupe (a model that repeats an id
+        # must not inflate the chosen set); attach LLM fields; rank by social score.
+        seen_ids, ordered = set(), []
+        for r in llm:
+            rid = r.get("id")
+            if rid in by_id and rid not in seen_ids:
+                seen_ids.add(rid)
+                by_id[rid]["_llm"] = r
+                ordered.append(by_id[rid])
+        if ordered:
+            ranked_by = "llm"
+            ordered.sort(key=lambda c: c["_llm"].get("social_score", 0), reverse=True)
+            rest = sorted((c for c in cands if "_llm" not in c), key=_det_score, reverse=True)
+            cands = ordered + rest
+
+    chosen = _diverse_pick(cands, need)
+    before = conn.total_changes
+    for c in chosen:
+        m = c.get("_llm")
+        if m:
+            social, angle = m.get("social_score"), m.get("angle")
+            rationale = m.get("why") or m.get("rationale")
+            points = m.get("talking_points") if isinstance(m.get("talking_points"), list) else []
+            entities = m.get("entities") if isinstance(m.get("entities"), list) else []
+        else:
+            # Deterministic fallback (model down): seed the writing aids from the
+            # already-generated summary/explanation so the card is still useful.
+            # interest score + angle stay None on purpose — they're LLM-only judgments
+            # the UI hides when absent; we don't manufacture a fake angle/score.
+            social, angle, entities = None, None, []
+            rationale = c.get("summary")
+            points = [p for p in (c.get("what_happened"), c.get("why_matters"), c.get("why_belongs")) if p]
+        handles = resolve_handles(conn, entities, c.get("source_handle"), platform=platform)
+        # ON CONFLICT re-queues ONLY an (expired) snoozed row — eligibility already
+        # excludes active/posted/skipped, and the WHERE guard makes that defense-in-depth
+        # so a re-build can never clobber an active draft or a terminal status. draft_text
+        # / final_text are never in the SET, so saved work survives a re-queue.
+        conn.execute(
+            """INSERT INTO outbound_shares
+                 (article_id, platform, status, social_score, rationale, talking_points,
+                  angle, entities, suggested_handles, share_url)
+               VALUES (?, ?, 'queued', ?, ?, ?, ?, ?, ?, ?)
+               ON CONFLICT(article_id, platform) DO UPDATE SET
+                  status='queued', social_score=excluded.social_score,
+                  rationale=excluded.rationale, talking_points=excluded.talking_points,
+                  angle=excluded.angle, entities=excluded.entities,
+                  suggested_handles=excluded.suggested_handles, share_url=excluded.share_url,
+                  snooze_until=NULL, updated_at=CURRENT_TIMESTAMP
+               WHERE outbound_shares.status = 'snoozed'
+                 AND outbound_shares.snooze_until IS NOT NULL
+                 AND outbound_shares.snooze_until <= datetime('now')""",
+            (c["id"], platform, social, rationale,
+             json.dumps(points), angle,
+             json.dumps(entities), json.dumps(handles), _share_url(base_url, c["id"], platform)),
+        )
+    conn.commit()
+    # Counts come from ACTUAL persisted rows, not loop iterations (a skipped conflict
+    # changes nothing, so it can't falsely report a fuller queue).
+    added = conn.total_changes - before
+    active_now = conn.execute(
+        "SELECT COUNT(*) FROM outbound_shares WHERE platform=? AND status IN (?,?,?)",
+        (platform, *_ACTIVE),
+    ).fetchone()[0]
+    return {"added": added, "active": active_now, "ranked_by": ranked_by}
+
+
+# --- queue read + status transitions --------------------------------------------
+
+def _row_to_item(r: sqlite3.Row) -> dict:
+    d = dict(r)
+    for k in ("talking_points", "entities", "suggested_handles"):
+        try:
+            d[k] = json.loads(d[k]) if d.get(k) else []
+        except (ValueError, TypeError):
+            d[k] = []
+    return d
+
+
+def list_queue(conn: sqlite3.Connection, platform: str = PLATFORM_X, include_archived: bool = False) -> list[dict]:
+    """The working queue (queued/drafting/opened), newest-interest first. With
+    include_archived, also returns skipped/snoozed (the recoverable tray). Posted is
+    NEVER returned here — it's done, and including it would grow the payload forever
+    (a dedicated paginated history can come later if wanted)."""
+    statuses = list(_ACTIVE) + (["skipped", "snoozed"] if include_archived else [])
+    qs = ",".join("?" for _ in statuses)
+    rows = conn.execute(
+        f"""
+        SELECT o.id, o.article_id, o.platform, o.status, o.social_score, o.rationale,
+               o.talking_points, o.angle, o.entities, o.suggested_handles, o.draft_text,
+               o.final_text, o.share_url, o.post_url, o.snooze_until, o.opened_at, o.posted_at,
+               a.title, a.canonical_url, a.image_url, src.name AS source_name
+        FROM outbound_shares o
+        JOIN articles a ON a.id = o.article_id
+        JOIN sources src ON src.id = a.source_id
+        WHERE o.platform = ? AND o.status IN ({qs})
+        ORDER BY CASE o.status WHEN 'opened' THEN 0 WHEN 'drafting' THEN 1 ELSE 2 END,
+                 o.social_score DESC, o.created_at DESC
+        """,
+        (platform, *statuses),
+    ).fetchall()
+    return [_row_to_item(r) for r in rows]
+
+
+_ACTIVE_SET = {"queued", "drafting", "opened"}
+_VALID_STATUS = {"queued", "drafting", "opened", "posted", "skipped", "snoozed"}
+
+
+def _is_future(ts: str | None) -> bool:
+    if not ts:
+        return False
+    try:
+        dt = datetime.fromisoformat(str(ts).strip().replace("Z", "").replace("T", " "))
+    except (ValueError, TypeError):
+        return False
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt > datetime.now(timezone.utc)
+
+
+def set_status(conn: sqlite3.Connection, share_id: int, status: str, *,
+               draft_text: str | None = None, final_text: str | None = None,
+               post_url: str | None = None, snooze_until: str | None = None) -> bool:
+    """Transition an ACTIVE share. Enforces the lifecycle: only queued/drafting/opened
+    items transition here — `posted` is permanently terminal and skipped/snoozed recover
+    via restore() (so dedup can't be undone and an item can't be reposted). `snoozed`
+    requires a valid FUTURE timestamp (a null/past date would exclude it forever);
+    leaving snooze otherwise clears snooze_until. opened/posted stamp their times."""
+    if status not in _VALID_STATUS:
+        return False
+    if status == "snoozed" and not _is_future(snooze_until):
+        return False
+    row = conn.execute("SELECT status FROM outbound_shares WHERE id = ?", (share_id,)).fetchone()
+    if not row or row["status"] not in _ACTIVE_SET:   # terminal/archived → use restore()
+        return False
+    # snooze_until is set only when snoozing; cleared on every other transition.
+    sets = ["status = ?", "updated_at = CURRENT_TIMESTAMP", "snooze_until = ?"]
+    params: list = [status, snooze_until if status == "snoozed" else None]
+    if status == "opened":
+        sets.append("opened_at = CURRENT_TIMESTAMP")
+    if status == "posted":
+        sets.append("posted_at = CURRENT_TIMESTAMP")
+    if draft_text is not None:
+        sets.append("draft_text = ?")
+        params.append(draft_text)
+    if final_text is not None:
+        sets.append("final_text = ?")
+        params.append(final_text)
+    if post_url is not None:
+        sets.append("post_url = ?")
+        params.append(post_url)
+    params.append(share_id)
+    cur = conn.execute(
+        f"UPDATE outbound_shares SET {', '.join(sets)} WHERE id = ? "
+        "AND status IN ('queued','drafting','opened')",   # atomic: don't transition a row that just changed
+        params,
+    )
+    conn.commit()
+    return cur.rowcount > 0
+
+
+def save_draft(conn: sqlite3.Connection, share_id: int, draft_text: str) -> bool:
+    # Only ACTIVE rows accept a draft — a late debounced autosave that lands after
+    # Posted/Skip/Snooze must be a no-op (never write to a terminal/archived row).
+    cur = conn.execute(
+        "UPDATE outbound_shares SET draft_text = ?, status = CASE status WHEN 'queued' THEN 'drafting' ELSE status END, "
+        "updated_at = CURRENT_TIMESTAMP WHERE id = ? AND status IN ('queued','drafting','opened')",
+        (draft_text, share_id),
+    )
+    conn.commit()
+    return cur.rowcount > 0
+
+
+def restore(conn: sqlite3.Connection, share_id: int) -> bool:
+    """Bring a skipped/snoozed item back to the working queue (mistaken-click safety)."""
+    cur = conn.execute(
+        "UPDATE outbound_shares SET status='queued', snooze_until=NULL, updated_at=CURRENT_TIMESTAMP "
+        "WHERE id = ? AND status IN ('skipped','snoozed')",
+        (share_id,),
+    )
+    conn.commit()
+    return cur.rowcount > 0
@@ -11,6 +11,7 @@ import sqlite3
 from datetime import UTC, datetime, timedelta

 from .feeds import MAX_BACKOFF_MINUTES
+from .localtime import local_now
 from .paywall import is_paywalled, is_paywalled_for_source

 # UA substrings that mark automated clients. Crawlers run JS on a throttled
@@ -78,6 +79,7 @@ def feed(
    follow_sources: list[int] | None = None,
    follow_tags: list[str] | None = None,
    since: str | None = None,
+    match: str | None = None,
 ) -> list[dict]:
    """Return articles with categorical filters applied in SQL.

@@ -92,6 +94,14 @@ def feed(
    """
    clauses = ["a.duplicate_of IS NULL", "src.content_visible = 1"]
    params: list = []
+    # Full-text search: join the FTS index and MATCH first, so its bound param
+    # leads and relevance can drive the ordering. All the boundary clauses below
+    # still apply, so search mirrors exactly what the visitor feed would show.
+    fts_join = ""
+    if match:
+        fts_join = "JOIN article_search ON article_search.article_id = a.id"
+        clauses.append("article_search MATCH ?")
+        params.append(match)
    if accepted_only:
        clauses.append("s.accepted = 1")
    if topic:
@@ -155,17 +165,19 @@ def feed(
    where = "WHERE " + " AND ".join(clauses)
    params.extend([limit, offset])

-    order_by = (
-        "COALESCE(a.published_at, a.discovered_at) DESC, rank_score DESC"
-        if sort == "latest"
-        else "rank_score DESC, COALESCE(a.published_at, a.discovered_at) DESC"
-    )
+    if match:
+        order_by = "bm25(article_search), COALESCE(a.published_at, a.discovered_at) DESC"  # relevance, then recency
+    elif sort == "latest":
+        order_by = "COALESCE(a.published_at, a.discovered_at) DESC, rank_score DESC"
+    else:
+        order_by = "rank_score DESC, COALESCE(a.published_at, a.discovered_at) DESC"
    rows = conn.execute(
        f"""
        SELECT {_ARTICLE_COLUMNS}
        FROM articles a
        JOIN sources src ON src.id = a.source_id
        JOIN article_scores s ON s.article_id = a.id
+        {fts_join}
        {where}
        ORDER BY {order_by}
        LIMIT ? OFFSET ?
@@ -175,6 +187,27 @@ def feed(
    return [dict(row) for row in rows]


+def reindex_search(conn: sqlite3.Connection) -> int:
+    """Rebuild the article_search FTS index from the accepted, non-duplicate corpus
+    (title/description/source name/tags). A cheap full rebuild (a few thousand
+    rows); run on each ingest cycle and lazily on first search. Live visibility /
+    boundary filtering is applied at query time, so it doesn't need reindexing."""
+    conn.execute("DELETE FROM article_search")
+    conn.execute(
+        """
+        INSERT INTO article_search (article_id, title, body, source_name, tags)
+        SELECT a.id, a.title, COALESCE(a.description, ''), src.name,
+               COALESCE((SELECT group_concat(t.tag, ' ') FROM article_tags t WHERE t.article_id = a.id), '')
+        FROM articles a
+        JOIN sources src ON src.id = a.source_id
+        JOIN article_scores s ON s.article_id = a.id
+        WHERE s.accepted = 1 AND a.duplicate_of IS NULL
+        """
+    )
+    conn.commit()
+    return conn.execute("SELECT COUNT(*) FROM article_search").fetchone()[0]
+
+
 def brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> dict:
    """Return a stored daily brief (latest if no date) with its ranked items."""
    target_date = brief_date or _latest_brief_date(conn)
@@ -344,6 +377,8 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
            (SELECT COUNT(*) FROM articles a WHERE a.source_id = s.id) AS total_articles,
            (SELECT COUNT(*) FROM articles a JOIN article_scores sc ON sc.article_id = a.id
             WHERE a.source_id = s.id AND sc.accepted = 1) AS accepted_total,
+            (SELECT COUNT(*) FROM articles a JOIN article_scores sc ON sc.article_id = a.id
+             WHERE a.source_id = s.id AND sc.reason_code = 'non_english') AS non_english,
            (SELECT COUNT(*) FROM articles a WHERE a.source_id = s.id AND a.duplicate_of IS NOT NULL) AS duplicates,
            (SELECT COUNT(*) FROM articles a JOIN article_scores sc ON sc.article_id = a.id
             WHERE a.source_id = s.id AND sc.accepted = 1 AND a.duplicate_of IS NULL) AS served,
@@ -365,7 +400,14 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
        d = dict(r)
        total = d["total_articles"] or 0
        accepted = d["accepted_total"] or 0
-        d["acceptance_rate"] = round(100 * accepted / total) if total else None
+        non_english = d.get("non_english") or 0
+        # Acceptance is judged over articles actually scored in English — non-English
+        # items are HELD (awaiting translation), not calm-filter rejections, so they
+        # don't drag a multilingual source's rate down.
+        judged = total - non_english
+        d["acceptance_rate"] = round(100 * accepted / judged) if judged else None
+        d["non_english"] = non_english
+        d["non_english_rate"] = round(100 * non_english / total) if total else None
        d["duplicate_rate"] = round(100 * d["duplicates"] / total) if total else None
        # Curation quality: of what this source got ACCEPTED, how much was a
        # duplicate of content already served (accepted_total − served = accepted dupes).
@@ -459,7 +501,9 @@ def _attention(content: dict, sources: list[dict], feedback_unread: int, now: da

 _SRC_ART_FILTERS = {
    "accepted": "AND s.accepted = 1",
-    "rejected": "AND s.accepted = 0",
+    # 'rejected' = calm-filter rejections only; non-English is HELD, its own bucket.
+    "rejected": "AND s.accepted = 0 AND COALESCE(s.reason_code,'') != 'non_english'",
+    "held": "AND s.reason_code = 'non_english'",
    "no_image": "AND (a.image_url IS NULL OR a.image_url = '')",
    "duplicates": "AND a.duplicate_of IS NOT NULL",
 }
@@ -493,6 +537,7 @@ def source_articles(conn: sqlite3.Connection, source_id: int, filter: str = "all
            "published_at": r["published_at"] or r["discovered_at"],
            "accepted": r["accepted"],
            "reason": r["reason_text"] or r["reason_code"],  # the "why" behind accept/reject
+            "held": r["reason_code"] == "non_english",       # held for language, not rejected
            "topic": r["topic"],
            "flavor": r["flavor"],
            "paywalled": is_paywalled_for_source(r["canonical_url"], override),  # effective (domain rule + override)
@@ -510,7 +555,8 @@ def source_articles_summary(conn: sqlite3.Connection, source_id: int) -> dict:
        """
        SELECT COUNT(*) total,
               COALESCE(SUM(s.accepted = 1), 0) accepted,
-               COALESCE(SUM(s.accepted = 0), 0) rejected,
+               COALESCE(SUM(s.accepted = 0 AND COALESCE(s.reason_code,'') != 'non_english'), 0) rejected,
+               COALESCE(SUM(s.reason_code = 'non_english'), 0) non_english,
               COALESCE(SUM(a.image_url IS NULL OR a.image_url = ''), 0) no_image,
               COALESCE(SUM(a.duplicate_of IS NOT NULL), 0) duplicates
        FROM articles a LEFT JOIN article_scores s ON s.article_id = a.id
@@ -523,6 +569,7 @@ def source_articles_summary(conn: sqlite3.Connection, source_id: int) -> dict:
    url = (srow["homepage_url"] or srow["feed_url"]) if srow else None
    return {
        "total": agg["total"], "accepted": agg["accepted"], "rejected": agg["rejected"],
+        "non_english": agg["non_english"],   # held for language (not a calm-filter rejection)
        "no_image": agg["no_image"], "duplicates": agg["duplicates"],
        "paywalled": is_paywalled_for_source(url, override),  # effective
        "paywall_domain": is_paywalled(url),                  # what the domain rule alone says
@@ -533,6 +580,11 @@ def source_articles_summary(conn: sqlite3.Connection, source_id: int) -> dict:
 def admin_stats(conn: sqlite3.Connection, days: int = 30) -> dict:
    """Aggregate, non-personal usage stats for the admin dashboard."""
    since = f"-{days} days"
+    # "Today" for timestamp-based counters is the SITE-LOCAL day (GOODNEWS_TZ), not
+    # UTC: otherwise an evening error (e.g. 22:53 local) lands on the next UTC day and
+    # reads as a fresh "today" the following morning — the exact false-alarm we hit.
+    local_day_start = (local_now().replace(hour=0, minute=0, second=0, microsecond=0)
+                       .astimezone(UTC).strftime("%Y-%m-%d %H:%M:%S"))

    def scalar(sql, params=()):
        return conn.execute(sql, params).fetchone()[0] or 0
@@ -658,7 +710,8 @@ def admin_stats(conn: sqlite3.Connection, days: int = 30) -> dict:
        # check routinely and would read as real users seeing blank screens.
        "client_errors": {
            "today": scalar(
-                f"SELECT COUNT(*) FROM client_errors WHERE date(created_at)=date('now') AND {_NOT_BOT_SQL}"
+                f"SELECT COUNT(*) FROM client_errors WHERE created_at >= ? AND {_NOT_BOT_SQL}",
+                (local_day_start,),
            ),
            "window": scalar(
                f"SELECT COUNT(*) FROM client_errors WHERE created_at>=date('now',?) AND {_NOT_BOT_SQL}",
@@ -175,6 +175,18 @@ def reject_candidate(conn: sqlite3.Connection, candidate_id: int) -> bool:
    return cur.rowcount > 0


+def restore_candidate(conn: sqlite3.Connection, candidate_id: int) -> bool:
+    """Send a REJECTED candidate back to staging ('suggested') so it re-enters the
+    queue for another look. Only un-rejects — a promoted candidate is untouched."""
+    cur = conn.execute(
+        "UPDATE source_candidates SET status = 'suggested', updated_at = CURRENT_TIMESTAMP "
+        "WHERE id = ? AND status = 'rejected'",
+        (candidate_id,),
+    )
+    conn.commit()
+    return cur.rowcount > 0
+
+
 def promote_candidate(
    conn: sqlite3.Connection,
    candidate_id: int,
@@ -419,7 +419,9 @@
        box.append(d);
      };
      stat("Mode:", p.classified ? "model (accurate)" : "heuristic (quick, conservative)");
-      stat("Acceptance:", `${Math.round(p.acceptance_rate * 100)}% (${p.accepted}/${p.sampled})`);
+      stat("Acceptance:", p.acceptance_rate == null
+        ? `— (all held · ${p.accepted}/${p.sampled})`
+        : `${Math.round(p.acceptance_rate * 100)}% (${p.accepted}/${p.sampled})`);
      stat("Freshness:", `${p.recent_7d}/${p.sampled} in last 7 days · newest ${(p.newest_published||"unknown").slice(0,10)}`);
      stat("Calm averages:", `cortisol ${p.avg_cortisol} · ragebait ${p.avg_ragebait} · PR ${p.avg_pr_risk}`);
      const mix = (m) => Object.entries(m).map(([k, v]) => `${k} ${v}`).join(" · ") || "—";
				`@@ -0,0 +1 @@`
				`["vagina", "vulva", "nipple", "rectum", "anal", "fecal", "ejaculation", "eunuch", "nude", "nudity", "butt"]`
				`@@ -0,0 +1 @@`
				["death","dying","died","killed","killing","murder","murdered","corpse","coffin","funeral","grave","buried","burial","weapon","gunshot","warfare","violent","violence","deadly","lethal","poison","poisoned","suicide","slaughter","victim","bleeding","wound","wounded","vomit","vomiting","vomited","diarrhea","disease","diseased","cancer","tumor","illness","infection","infected","plague","disabled","lucifer","satan","demon","demonic","devil","damned","hatred","hateful","terror","terrorize","hostage","kidnap","kidnapped","abuse","abused","assault","trauma","traumatic","anxiety","depression","depressed","divorce","divorced","bankrupt","eviction","evicted","layoff","drowned","drowning","choking","suffocate","starving","famine","poverty","despair","misery","miserable","tragic","tragedy","horror","horrible","nightmare","panic","dread","grief","grieving","mourning","rotting","decay","decayed","maggot","vermin","filth","sewage","manure"]