Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker

The deploy pipeline runs from the working tree, so a wave of shipped features had never been committed. This snapshots git to what's actually running. SEO impression recovery (live + verified): - Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404 (a hard 404 silently dropped already-indexed URLs and tanked impressions). - Dedup representative selection reworked: accepted/serveable -> established rep (URL stability) -> quality score, so an accepted page never retires to a rejected rep and an indexed canonical doesn't churn when a newer twin arrives. - HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of falling through to the static mount and 404ing. - `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the policy to the existing corpus (shared cycle_lock context manager). - CLI honors GOODNEWS_DB for its default --db (was silently ignored). Publishing Desk (admin tool to post highlights to X via Web Intents): - publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji picker (bundled data, no CDN) for the blurb editor. Play games + site: - Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated). - English-only language gate; source prospecting; paywall + dedup hardening. Tests: full suite green (349). Ignores tightened (node_modules, data/*.db). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 11:32:27 -04:00
parent 2dbe73430c
commit 89c0fbe1f6
66 changed files with 6138 additions and 109 deletions
@@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+"""Prototype Bloom (Center Circle) generator — prints real sample wheels so we
+can feel the quality before building any UI. The validated logic here becomes
+goodnews/bloom.py."""
+import hashlib
+import json
+import random
+from pathlib import Path
+
+_DATA = Path(__file__).resolve().parents[1] / "goodnews" / "data"
+d = json.loads((_DATA / "bloom_words.json").read_text())
+ACCEPT = d["accept"]
+COMMON = set(d["common"])
+ACCEPT_LS = [(w, frozenset(w)) for w in ACCEPT]
+# Off-brand words we never CELEBRATE as the day's pangram (accept-list unaffected).
+AVOID = set(json.loads((_DATA / "bloom_avoid.json").read_text()))
+
+# Candidate wheels = letter-sets of COMMON 7-distinct-letter words (so the day's
+# pangram is always a recognizable word). No 'S' already guaranteed by the list.
+PANGRAM_SETS: dict[frozenset, list[str]] = {}
+for w in COMMON:
+    s = frozenset(w)
+    if len(s) == 7:
+        PANGRAM_SETS.setdefault(s, []).append(w)
+
+MIN_WORDS, MAX_WORDS, MIN_COMMON, TOP_TIER = 24, 60, 14, 0.70
+
+
+def score(w: str) -> int:
+    return 1 if len(w) == 4 else len(w)
+
+
+def build(letters: frozenset, center: str):
+    words = [w for w, s in ACCEPT_LS if center in w and s <= letters]
+    pangrams = [w for w in words if frozenset(w) == letters]
+    commons = [w for w in words if w in COMMON]
+    max_score = sum(score(w) for w in words) + 7 * len(pangrams)
+    common_score = sum(score(w) for w in commons) + 7 * len([w for w in pangrams if w in COMMON])
+    return words, pangrams, commons, max_score, common_score
+
+
+def valid(letters, center):
+    words, pangrams, commons, max_score, common_score = build(letters, center)
+    if not (MIN_WORDS <= len(words) <= MAX_WORDS):
+        return None
+    # The DISPLAY pangram must be calm + recognizable: common, not on the avoid
+    # list. (Off-brand pangrams like LUCIFER/VOMITING are still accepted if typed,
+    # just never the day's celebrated word.)
+    display = [p for p in pangrams if p in COMMON and p not in AVOID]
+    if not display or len(commons) < MIN_COMMON:
+        return None
+    if common_score < TOP_TIER * max_score:   # top tier reachable from common vocab
+        return None
+    return words, sorted(display, key=len), commons, max_score, common_score
+
+
+def generate(date: str):
+    rng = random.Random(int(hashlib.sha256(f"bloom:{date}".encode()).hexdigest(), 16))
+    sets = list(PANGRAM_SETS)
+    rng.shuffle(sets)
+    for s in sets:
+        centers = sorted(s)
+        rng.shuffle(centers)
+        for c in centers:
+            res = valid(s, c)
+            if res:
+                return s, c, res
+    return None
+
+
+print(f"loaded accept={len(ACCEPT)} common={len(COMMON)} | candidate wheels={len(PANGRAM_SETS)}\n")
+for date in ("2026-06-15", "2026-06-16", "2026-06-17", "2026-06-18", "2026-06-19"):
+    s, c, (words, pangrams, commons, max_score, common_score) = generate(date)
+    outer = sorted(s - {c})
+    tiers = {"Sprouting": 0, "Budding": int(0.08 * max_score),
+             "Blooming": int(0.30 * max_score), "Flourishing": int(0.70 * max_score)}
+    longest = sorted(words, key=len, reverse=True)[:3]
+    sample = sorted(random.Random(1).sample(words, min(16, len(words))))
+    print(f"── {date} ──  center [{c.upper()}]  outer {[x.upper() for x in outer]}")
+    print(f"   words={len(words)} (common={len(commons)})  pangram(s)={[p.upper() for p in pangrams]}")
+    print(f"   max_score={max_score}  tiers={tiers}")
+    print(f"   longest={longest}  sample={sample}\n")
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+"""Build Bloom's accepted-word dictionary (one-time / regenerable build step).
+
+The make-or-break of Bloom is the accepted-word list: large and natural enough
+that a normal word is never rejected, but free of obscure crossword-ese and of
+anything offensive (so a shared board can't be made abusive).
+
+Recipe:
+  base   = ENABLE (~173k word-game words, NO proper nouns)  → "is it a real word"
+  ∩ keep words with wordfreq zipf >= ZIPF_MIN              → "is it natural/common"
+  − profanity/slur blocklist (LDNOOBW en)                   → "is it safe to share"
+  − any word containing 's'   (the wheel never has S, so an S-word can never be
+                               formed → it can never be accepted → drop it)
+  − words < 4 letters
+
+Two tiers are vendored to goodnews/data/bloom_words.json:
+  "accept" (zipf >= ACCEPT_MIN)  — the generous set that COUNTS when typed
+  "common" (zipf >= COMMON_MIN)  — a tighter subset used only to DESIGN puzzles
+                                   (pangram is always recognizable; top tier is
+                                   reachable with everyday vocabulary)
+Pre-filtered + vendored so the game needs no wordfreq at runtime.
+
+Usage:
+  python scripts/build_bloom_words.py preview        # show sizes+samples per threshold
+  python scripts/build_bloom_words.py write          # vendor at the chosen thresholds
+"""
+from __future__ import annotations
+
+import json
+import random
+import sys
+from pathlib import Path
+
+import wordfreq
+
+ROOT = Path(__file__).resolve().parents[1]
+OUT = ROOT / "goodnews" / "data" / "bloom_words.json"
+BASE = Path("/tmp/enable1.txt")
+BAD = Path("/tmp/ldnoobw_en.txt")
+MIN_LEN = 4
+# Accept is VERY generous so a normal word (incl. inflected forms like "beefed",
+# "aced") is never rejected — a frequency cut splits inflections, so we keep the
+# floor low and only trim the genuinely obscure/archaic tail. Tiers are based on
+# `common` (below), NOT on accept, so generosity never makes the game harder.
+ACCEPT_MIN = 2.0
+COMMON_MIN = 3.3   # the DESIGNED puzzle: recognizable words; drives tiers + pangram
+
+
+def _load_candidates() -> list[str]:
+    base = {w.strip().lower() for w in BASE.read_text().splitlines() if w.strip()}
+    bad = {w.strip().lower() for w in BAD.read_text().splitlines() if w.strip()}
+    # LDNOOBW conflates clinical anatomy/biology with profanity — "block abuse,
+    # not biology": allow legitimate medical/anatomical/normal words back in.
+    allow = set(json.loads((ROOT / "goodnews" / "data" / "bloom_allow.json").read_text()))
+    bad -= allow
+    out = []
+    for w in base:
+        if len(w) < MIN_LEN or not w.isalpha():
+            continue
+        if "s" in w:          # wheel never contains S → an S-word is never makeable
+            continue
+        if w in bad:
+            continue
+        out.append(w)
+    return out, bad
+
+
+def _filter(cands: list[str], zipf_min: float) -> list[str]:
+    return sorted(w for w in cands if wordfreq.zipf_frequency(w, "en") >= zipf_min)
+
+
+def main() -> None:
+    cmd = sys.argv[1] if len(sys.argv) > 1 else "preview"
+    cands, bad = _load_candidates()
+    print(f"candidates (real, alpha, >=4, no-S, not-blocked): {len(cands)}  | blocklist {len(bad)}")
+    if cmd == "preview":
+        rng = random.Random(7)
+        for z in (2.5, 2.8, 3.0, 3.3):
+            words = _filter(cands, z)
+            sample = rng.sample(words, 18)
+            print(f"\nzipf>={z}: {len(words)} words")
+            print("  sample:", ", ".join(sorted(sample)))
+    elif cmd == "write":
+        # ACCEPT is now BROAD: every valid dictionary word (real ENABLE word, ≥4,
+        # no-S, not profane). No frequency floor — tiers are decoupled (common-based),
+        # so obscure-but-real words like "arraign" count automatically as bonus finds
+        # without ever becoming a pangram or making the game harder. Runtime curation
+        # (allow/block individual words) is DB-backed (bloom_word_overrides), no deploy.
+        accept = sorted(cands)
+        common = _filter(cands, COMMON_MIN)
+        OUT.write_text(json.dumps({"accept": accept, "common": common}))
+        print(f"\nwrote accept={len(accept)} (ALL valid words), "
+              f"common={len(common)} (zipf>={COMMON_MIN}) → {OUT}")
+    else:
+        print(f"unknown command: {cmd}")
+
+
+if __name__ == "__main__":
+    main()