Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker

The deploy pipeline runs from the working tree, so a wave of shipped features had never been committed. This snapshots git to what's actually running. SEO impression recovery (live + verified): - Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404 (a hard 404 silently dropped already-indexed URLs and tanked impressions). - Dedup representative selection reworked: accepted/serveable -> established rep (URL stability) -> quality score, so an accepted page never retires to a rejected rep and an indexed canonical doesn't churn when a newer twin arrives. - HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of falling through to the static mount and 404ing. - `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the policy to the existing corpus (shared cycle_lock context manager). - CLI honors GOODNEWS_DB for its default --db (was silently ignored). Publishing Desk (admin tool to post highlights to X via Web Intents): - publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji picker (bundled data, no CDN) for the blurb editor. Play games + site: - Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated). - English-only language gate; source prospecting; paywall + dedup hardening. Tests: full suite green (349). Ignores tightened (node_modules, data/*.db). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 11:32:27 -04:00
parent 2dbe73430c
commit 89c0fbe1f6
66 changed files with 6138 additions and 109 deletions
@@ -0,0 +1,134 @@
+"""Deep-preview accessibility check — content-level readable/paywalled/blocked/unknown,
+and the layered verdict (domain rule + sampled access, evidence over domain alone)."""
+import time
+
+from goodnews import feeds
+from goodnews.paywall import check_article_access
+
+READABLE = b"<html><body><article>" + (b"<p>Real article text here. </p>" * 80) + b"</article></body></html>"
+WALLED_SCHEMA = b'<html><head><script type="application/ld+json">{"isAccessibleForFree": false}</script></head><body><p>teaser</p></body></html>'
+WALLED_PHRASE = b"<html><body><p>Subscribe to continue reading this story.</p></body></html>"
+THIN = b"<html><body><p>hi</p></body></html>"
+
+
+def _fetcher(mapping):
+    def f(url, timeout=8):
+        if mapping.get(url) == "ERR":
+            raise RuntimeError("boom")
+        return mapping[url]
+    return f
+
+
+def test_classifies_each_access_state():
+    f = _fetcher({"r": READABLE, "s": WALLED_SCHEMA, "p": WALLED_PHRASE, "t": THIN, "b": "ERR"})
+    assert check_article_access("r", f) == "readable"
+    assert check_article_access("s", f) == "paywalled"   # schema.org isAccessibleForFree:false
+    assert check_article_access("p", f) == "paywalled"   # explicit wall phrase
+    assert check_article_access("t", f) == "unknown"     # too thin to tell
+    assert check_article_access("b", f) == "blocked"     # fetch failed
+
+
+def test_does_not_falseflag_a_readable_page():
+    # a long article that merely links "subscribe to our newsletter" in the footer
+    html = b"<html><body><article>" + (b"<p>Lots of real content. </p>" * 100) + \
+           b"<footer>Subscribe to our newsletter</footer></article></body></html>"
+    assert check_article_access("x", _fetcher({"x": html})) == "readable"
+
+
+def _items(urls):
+    return [feeds.FeedItem(title=f"T{i}", url=u, description="d", published_at=None)
+            for i, u in enumerate(urls)]
+
+
+def test_preview_verdict_layers_domain_and_sample(monkeypatch):
+    # a non-paywall-domain feed whose sampled articles mostly read fine -> "fine"
+    urls = ["https://good.example/a1", "https://good.example/a2", "https://good.example/a3"]
+    monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
+
+    class FakeClient:
+        model = "test"
+        def classify(self, art):
+            return {"accepted": True, "topic": "science", "flavor": "discovery",
+                    "cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
+
+    def fetcher(url, timeout=10):
+        return READABLE  # every sampled article reads fine
+
+    out = feeds.preview_feed("https://good.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
+    assert out["paywall_rule"] is False
+    assert out["access"]["readable"] >= 1 and out["access"]["paywalled"] == 0
+    assert out["access_verdict"] == "fine"
+
+
+def test_mostly_blocked_is_review_not_fine(monkeypatch):
+    # bot-blocked sites (readable in a browser, blocked to our fetcher) must NOT read
+    # as 'fine' off one sample, nor as 'reject-ready' — they land in 'review'.
+    urls = [f"https://blocky.example/a{i}" for i in range(6)]
+    monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
+
+    class FakeClient:
+        model = "test"
+        def classify(self, art):
+            return {"accepted": True, "topic": "science", "flavor": "discovery",
+                    "cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
+
+    def fetcher(url, timeout=10):
+        if url.endswith("/feed") or url.endswith("a0"):
+            return READABLE   # the feed fetch + one readable article
+        raise RuntimeError("403 blocked")  # the rest block (bot-blocked)
+
+    out = feeds.preview_feed("https://blocky.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
+    assert out["access"]["blocked"] >= 4 and out["access"]["readable"] == 1
+    assert out["access_verdict"] == "review"   # thin assessable evidence → not 'fine', not 'reject-ready'
+
+
+def test_source_preview_endpoint_handles_null_rate(tmp_path, monkeypatch):
+    # All-held (non-English) sample → acceptance_rate is None; the legacy
+    # /api/source-preview must not 500 on it (SourcePreview.acceptance_rate is nullable).
+    db = tmp_path / "t.sqlite3"
+    monkeypatch.setenv("GOODNEWS_DB", str(db))
+    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
+    import importlib
+    import goodnews.api as api
+    importlib.reload(api)
+    from goodnews.db import connect, init_db
+    c = connect(str(db)); init_db(c); c.commit(); c.close()
+    all_held = {
+        "url": "http://x/feed", "sampled": 4, "classified": True, "accepted": 0,
+        "non_english": 4, "acceptance_rate": None, "avg_cortisol": 0.0, "avg_ragebait": 0.0,
+        "avg_pr_risk": 0.0, "newest_published": None, "recent_7d": 0,
+        "topic_mix": {}, "flavor_mix": {}, "examples_accepted": [], "examples_rejected": [],
+    }
+    monkeypatch.setattr(feeds, "preview_feed", lambda *a, **k: all_held)
+    from fastapi.testclient import TestClient
+    r = TestClient(api.create_app()).get("/api/source-preview?url=http://x/feed")
+    assert r.status_code == 200                  # was 500: None rejected by float field
+    assert r.json()["acceptance_rate"] is None
+
+
+def test_one_hung_fetch_does_not_stall_the_preview(monkeypatch):
+    # Codex's wall-clock audit: one article that sleeps WAY past the deadline must
+    # not pin Deep Preview — it returns at the cap, with the slow one left 'unknown'.
+    monkeypatch.setattr(feeds, "_ACCESS_DEADLINE_S", 0.5)   # shrink the cap for the test
+    urls = [f"https://mixed.example/a{i}" for i in range(6)]
+    monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
+
+    class FakeClient:
+        model = "test"
+        def classify(self, art):
+            return {"accepted": True, "topic": "science", "flavor": "discovery",
+                    "cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
+
+    def fetcher(url, timeout=10):
+        if url.endswith("a0"):
+            time.sleep(5)          # one ugly site hangs far past the 0.5s cap
+        return READABLE
+
+    start = time.monotonic()
+    out = feeds.preview_feed("https://mixed.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
+    elapsed = time.monotonic() - start
+    assert elapsed < 2.5          # returned at the cap (~0.5s), NOT after the 5s sleep
+    # the hung one is 'unknown' (unverified), the rest read fine
+    slow = next(e for e in out["access"]["examples"] if e["url"].endswith("a0"))
+    assert slow["access"] == "unknown"
+    assert out["access"]["readable"] >= 4
@@ -114,3 +114,27 @@ def test_brief_cache_boundary(client):
    assert "public" in client.get("/api/brief").headers.get("cache-control", "")
    assert client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).headers.get("cache-control") == "private, no-store"
    assert client.get("/api/brief", params={"exclude": "3"}).headers.get("cache-control") == "private, no-store"
+
+
+def test_search_relevance_source_and_boundaries(client):
+    import os, sqlite3, json as _j
+    # A distinctively-named source proves source-name matching (the NYT use case).
+    c = sqlite3.connect(os.environ["GOODNEWS_DB"])
+    c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (2,'Nature Digest','http://n/f',7)")
+    c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) "
+              "VALUES (3,2,'http://n/3','Coral reefs rebound','2026-05-30T10:00:00+00:00','h3')")
+    c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (3,1,'environment','hopeful')")
+    c.commit(); c.close()
+    # title match (index builds lazily on first search)
+    assert client.get("/api/search?q=coral").json()["items"][0]["id"] == 3
+    # SOURCE-NAME match — searching the publication finds its articles (Codex's requirement)
+    assert 3 in [it["id"] for it in client.get("/api/search?q=nature").json()["items"]]
+    # empty / junk query → empty, no error
+    assert client.get("/api/search?q=").json()["count"] == 0
+    assert client.get("/api/search?q=%20%21%21").json()["count"] == 0
+    # boundary: a muted topic is excluded from search too (mirrors the visitor view)
+    muted = client.get("/api/search", params={"q": "coral", "prefs": _j.dumps({"mute_topics": ["environment"]})}).json()
+    assert muted["count"] == 0
+    # boundary: a hard avoid-term filters a textual match
+    avoided = client.get("/api/search", params={"q": "election", "prefs": _j.dumps({"avoid_terms": ["election"]})}).json()
+    assert all(it["id"] != 2 for it in avoided["items"])
@@ -0,0 +1,255 @@
+"""Bloom — the daily word wheel. Locks the design/acceptance split:
+
+  • DESIGN (deterministic, stored): wheel + tiers + pangram + Full-Bloom target,
+    from the COMMON list. The PERMANENT guardrail — Flourishing reachable with
+    common words — still holds.
+  • ACCEPTANCE (broad + dynamic): every valid word buildable from the wheel,
+    computed live as broad dict ∪ {allow} − {block}; runtime admin overrides +
+    player reports drive curation with no deploy.
+"""
+import os
+from pathlib import Path
+
+import pytest
+from fastapi.testclient import TestClient
+
+from goodnews import bloom, games
+from goodnews.db import connect, init_db
+
+DATES = [f"2026-06-{d:02d}" for d in range(10, 25)]  # 15 sample days
+
+
+@pytest.fixture(scope="module")
+def designs():
+    return {d: bloom.build_puzzle(d) for d in DATES}
+
+
+@pytest.fixture
+def conn(tmp_path):
+    c = connect(str(tmp_path / "t.db"))
+    init_db(c)
+    c.execute("INSERT INTO users (email) VALUES ('a@b.c')")
+    c.commit()
+    return c
+
+
+def _letters(p):
+    return frozenset(p["center"]) | frozenset(p["outer"])
+
+
+def _commons_for(p):
+    """COMMON words for a center-mode wheel (the designed puzzle)."""
+    L = _letters(p)
+    return [w for w in bloom._COMMON if p["center"] in w and frozenset(w) <= L]
+
+
+def _assert_no_answer_leak(resp):
+    assert "words" not in resp
+    assert resp["accepted"] and all(
+        isinstance(h, str) and len(h) == 64 and set(h) <= set("0123456789abcdef")
+        for h in resp["accepted"])
+
+
+# --- DESIGN (deterministic, common-based) --------------------------------------
+
+def test_build_is_deterministic():
+    assert bloom.build_puzzle("2026-06-15") == bloom.build_puzzle("2026-06-15")
+
+
+@pytest.mark.parametrize("date", DATES)
+def test_design_shape(designs, date):
+    p = designs[date]
+    L = _letters(p)
+    assert len(L) == 7 and "s" not in L
+    assert p["center"] in L and len(p["outer"]) == 6
+    assert bloom.MIN_COMMON_WORDS <= len(_commons_for(p)) <= bloom.MAX_COMMON_WORDS
+    assert frozenset(p["pangram"]) == L                  # display pangram uses all 7
+    assert p["pangram"] in bloom._COMMON and p["pangram"] not in bloom._AVOID
+
+
+@pytest.mark.parametrize("date", DATES)
+def test_PERMANENT_top_tier_reachable_with_common_words(designs, date):
+    """Flourishing reachable from COMMON words alone — never obscure-word hunting."""
+    p = designs[date]
+    flourishing = next(t["score"] for t in p["tiers"] if t["name"] == "Flourishing")
+    assert bloom.score_words(p, _commons_for(p)) >= flourishing
+
+
+def test_tiers_are_8_30_70_of_common_and_max_is_common_total():
+    p = bloom.build_puzzle("2026-06-15")
+    assert [t["name"] for t in p["tiers"]] == ["Sprouting", "Budding", "Blooming", "Flourishing"]
+    common_total = bloom.score_words(p, _commons_for(p))
+    assert p["max_score"] == common_total                # Full Bloom = the designed puzzle
+    flour = next(t["score"] for t in p["tiers"] if t["name"] == "Flourishing")
+    assert flour == int(0.70 * common_total) and flour <= p["max_score"]
+
+
+# --- ACCEPTANCE (broad + dynamic) ----------------------------------------------
+
+def test_accept_is_broad_and_obeys_center_rule(conn):
+    p = bloom.build_puzzle("2026-06-15")
+    acc = bloom.accepted_words(conn, p["center"], p["outer"], require_center=True)
+    L = _letters(p)
+    for w in acc:
+        assert len(w) >= 4 and "s" not in w and frozenset(w) <= L and p["center"] in w
+    # broad accept is a SUPERSET of the common puzzle (bonus words beyond design)
+    assert set(_commons_for(p)) <= set(acc)
+    assert len(acc) > len(_commons_for(p))
+
+def test_arraign_class_words_auto_accepted():
+    # broad dict includes real-but-rare words without any include-list
+    for w in ("arraign", "feign", "crwth"):
+        assert w in set(bloom.ACCEPT)
+
+def test_overrides_block_and_allow(conn):
+    p = bloom.build_puzzle("2026-06-15")
+    acc0 = set(bloom.accepted_words(conn, p["center"], p["outer"], True))
+    victim = sorted(acc0)[0]
+    bloom.set_override(conn, victim, "block", by="t")
+    assert victim not in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
+    # allow a made-up letter-combo that fits the wheel + center
+    fake = (p["center"] + "".join(p["outer"][:3]))[:5]
+    if "s" not in fake and len(fake) >= 4:
+        bloom.set_override(conn, fake, "allow", by="t")
+        assert fake in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
+    bloom.clear_override(conn, victim)
+    assert victim in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
+
+def test_allow_override_rejects_inert_hard_rule_words(conn):
+    # an allow that could never count (too short / has 's') is rejected, not stored
+    assert bloom.set_override(conn, "cat", "allow") is False      # < 4 letters
+    assert bloom.set_override(conn, "roses", "allow") is False    # contains 's'
+    assert bloom.set_override(conn, "bloom", "allow") is True     # valid → stored
+    allow, _ = bloom.overrides(conn)
+    assert allow == {"bloom"}
+    # block stays permissive (can block anything)
+    assert bloom.set_override(conn, "roses", "block") is True
+
+
+def test_wild_accepts_words_without_center(conn):
+    p = bloom.build_free("seed-w", "wild")
+    acc = bloom.accepted_words(conn, p["center"], p["outer"], require_center=False)
+    assert any(p["center"] not in w for w in acc)        # Wild's defining trait
+    assert all(frozenset(w) <= _letters(p) for w in acc)
+
+
+# --- responses + storage -------------------------------------------------------
+
+def test_generate_is_idempotent_and_stored(conn):
+    a = bloom.generate_bloom_puzzle(conn, "2026-06-15")
+    assert a == bloom.generate_bloom_puzzle(conn, "2026-06-15") == bloom.stored_payload(conn, "2026-06-15")
+    assert "words" not in a                              # design payload holds no answers
+
+def test_response_no_leak_and_hash_roundtrip(conn):
+    r = bloom.bloom_response(conn, "2026-06-15")
+    _assert_no_answer_leak(r)
+    p = bloom.stored_payload(conn, "2026-06-15")
+    real = bloom.accepted_words(conn, p["center"], p["outer"], True)[0]
+    assert bloom.word_hash("2026-06-15", real) in set(r["accepted"])
+    assert bloom.word_hash("2026-06-15", "zzzzq") not in set(r["accepted"])
+    assert r["max_score"] == p["max_score"]
+
+def test_free_endpoint_resumes_and_leaks_nothing(api_app):
+    tc = TestClient(api_app)
+    r1 = tc.get("/api/puzzle/bloom/free?format=wild").json()
+    seed = r1["seed"]
+    assert r1["mode"] == "free" and r1["format"] == "wild" and seed
+    r2 = tc.get(f"/api/puzzle/bloom/free?format=wild&seed={seed}").json()
+    assert r2["center"] == r1["center"] and r2["outer"] == r1["outer"]
+    _assert_no_answer_leak(r1)
+
+
+# --- server-side state ---------------------------------------------------------
+
+def test_sanitize_drops_junk_recomputes_score_and_full(conn):
+    p = bloom.generate_bloom_puzzle(conn, "2026-06-15")
+    acc = bloom.accepted_words(conn, p["center"], p["outer"], True)
+    good = acc[:3]
+    clean = games.sanitize_game_state(conn, "bloom", "", "2026-06-15",
+                                      {"found": good + ["zzzz", "ab", good[0], 9], "score": 9999})
+    assert sorted(clean["found"]) == sorted(set(good))
+    assert clean["score"] == bloom.score_words(p, good)
+    assert "full" not in clean
+    # finding the whole common puzzle ⇒ Full Bloom (score ≥ max_score)
+    full = games.sanitize_game_state(conn, "bloom", "", "2026-06-15", {"found": _commons_for(p)})
+    assert full.get("full") is True
+
+def test_merge_unions_found():
+    m = games.merge_game_state("bloom", {"found": ["able", "bake"]}, {"found": ["bake", "tale"]})
+    assert sorted(m["found"]) == ["able", "bake", "tale"]
+
+def test_block_override_takes_effect_without_regen(conn):
+    # the live response reflects an override with no puzzle regeneration
+    p = bloom.generate_bloom_puzzle(conn, "2026-06-15")
+    victim = bloom.accepted_words(conn, p["center"], p["outer"], True)[0]
+    before = set(bloom.bloom_response(conn, "2026-06-15")["accepted"])
+    bloom.set_override(conn, victim, "block", by="t")
+    after = set(bloom.bloom_response(conn, "2026-06-15")["accepted"])
+    assert bloom.word_hash("2026-06-15", victim) in before
+    assert bloom.word_hash("2026-06-15", victim) not in after
+
+
+# --- reports → admin queue → overrides -----------------------------------------
+
+def test_report_then_approve_creates_allow_override(conn):
+    assert bloom.add_report(conn, "arraign", "2026-06-15", "daily", "center", "aceglnr", "not in the word list")
+    assert bloom.add_report(conn, "arraign", "2026-06-15", "daily", "center", "aceglnr", "x")  # dedup pending
+    pending = bloom.list_reports(conn, "pending")
+    assert len(pending) == 1 and pending[0]["word"] == "arraign"
+    assert bloom.resolve_report(conn, pending[0]["id"], "approve", by="admin")
+    allow, _ = bloom.overrides(conn)
+    assert "arraign" in allow
+    assert not bloom.list_reports(conn, "pending")
+    assert bloom.list_reports(conn, "approved")
+
+def test_report_block_creates_block_override(conn):
+    bloom.add_report(conn, "uglyword", None, "free", "wild", "abcdefg", "x")
+    rid = bloom.list_reports(conn, "pending")[0]["id"]
+    bloom.resolve_report(conn, rid, "block", by="admin")
+    _, block = bloom.overrides(conn)
+    assert "uglyword" in block
+
+
+# --- API: public report + admin endpoints --------------------------------------
+
+@pytest.fixture
+def api_app(tmp_path, monkeypatch):
+    db = tmp_path / "t.sqlite3"
+    monkeypatch.setenv("GOODNEWS_DB", str(db))
+    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
+    monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
+    import importlib
+    import goodnews.api as api
+    importlib.reload(api)
+    c = connect(str(db)); init_db(c); c.commit(); c.close()
+    return api.create_app()
+
+
+def _admin(app):
+    tc = TestClient(app)
+    sent = {}
+    import goodnews.email_send as es
+    orig = es.send_magic_link
+    es.send_magic_link = lambda to, link: sent.update(link=link)
+    try:
+        tc.post("/api/auth/email/start", json={"email": "admin@b.com"})
+        tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
+    finally:
+        es.send_magic_link = orig
+    return tc
+
+
+def test_public_report_then_admin_queue_flow(api_app):
+    pub = TestClient(api_app)
+    assert pub.post("/api/bloom/report", json={"word": "arraign", "date": "2026-06-15",
+                    "mode": "daily", "format": "center", "letters": "aceglnr",
+                    "reason": "not in the word list"}).json()["ok"]
+    # admin-only queue
+    assert TestClient(api_app).get("/api/admin/bloom/reports").status_code == 401
+    tc = _admin(api_app)
+    q = tc.get("/api/admin/bloom/reports").json()
+    assert len(q["reports"]) == 1
+    rid = q["reports"][0]["id"]
+    assert tc.post(f"/api/admin/bloom/reports/{rid}", json={"action": "approve"}).json()["ok"]
+    ovr = tc.get("/api/admin/bloom/reports").json()["overrides"]
+    assert any(o["word"] == "arraign" and o["action"] == "allow" for o in ovr)
@@ -5,6 +5,7 @@ from goodnews.sources import (
    list_candidates,
    promote_candidate,
    reject_candidate,
+    restore_candidate,
    save_candidate,
 )

@@ -32,6 +33,18 @@ def test_re_preview_preserves_curator_status(conn):
    assert list_candidates(conn)[0]["status"] == "rejected"


+def test_restore_sends_rejected_back_to_staging(conn):
+    save_candidate(conn, "http://x/feed")
+    cid = list_candidates(conn)[0]["id"]
+    reject_candidate(conn, cid)
+    assert list_candidates(conn)[0]["status"] == "rejected"
+    # restore → back to staging ('suggested'), re-enters the pending queue
+    assert restore_candidate(conn, cid) is True
+    assert list_candidates(conn)[0]["status"] == "suggested"
+    # restoring a non-rejected candidate is a no-op (only un-rejects)
+    assert restore_candidate(conn, cid) is False
+
+
 def test_promote_creates_inactive_source_and_marks_promoted(conn):
    cand = save_candidate(conn, "http://x/feed", name="Lovely Feed")
    source_id = promote_candidate(conn, cand["id"])  # inactive by default
@@ -0,0 +1,15 @@
+"""CLI honors GOODNEWS_DB for its default --db, matching db.connect. Without this, a
+copy-DB maintenance run (e.g. `dedup --force-recluster`) silently targets production."""
+from pathlib import Path
+
+from goodnews.cli import DEFAULT_DB, _default_db
+
+
+def test_default_db_honors_env(monkeypatch):
+    monkeypatch.setenv("GOODNEWS_DB", "/tmp/some-copy.sqlite3")
+    assert _default_db() == Path("/tmp/some-copy.sqlite3")
+
+
+def test_default_db_falls_back_to_bundled(monkeypatch):
+    monkeypatch.delenv("GOODNEWS_DB", raising=False)
+    assert _default_db() == DEFAULT_DB
@@ -35,7 +35,7 @@ def conn():
    c.close()


-def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00"):
+def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00", accepted=1):
    conn.execute(
        "INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
        "VALUES (?, 1, ?, ?, ?, ?)",
@@ -44,8 +44,8 @@ def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00
    conn.execute(
        "INSERT INTO article_scores (article_id, constructive_score, agency_score, "
        "human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted) "
-        "VALUES (?, ?, 0, 0, 0, 0, 0, 1)",
-        (article_id, constructive),
+        "VALUES (?, ?, 0, 0, 0, 0, 0, ?)",
+        (article_id, constructive, accepted),
    )
    conn.execute(
        "INSERT INTO article_embeddings (article_id, vector, dim, model) VALUES (?, ?, ?, 'test')",
@@ -69,6 +69,34 @@ def test_near_duplicates_collapse_to_highest_ranked(conn):
    assert dup_of[3] is None  # C stands alone


+def test_accepted_member_beats_a_higher_quality_rejected_one(conn):
+    # The rep must be SERVEABLE: an accepted page may never be retired to a rejected
+    # representative (that page would 404 with nothing to 301 to). Accepted wins even
+    # though the rejected twin scores higher on quality.
+    _add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=9, accepted=0)   # higher quality, REJECTED
+    _add(conn, 2, [0.99, 0.02, 0.0, 0.0], constructive=3, accepted=1) # lower quality, accepted
+    cluster_duplicates(conn, threshold=0.86, window_days=3)
+    dup_of = {r["id"]: r["duplicate_of"] for r in conn.execute("SELECT id, duplicate_of FROM articles")}
+    assert dup_of[2] is None   # the accepted article is the representative (serves 200)
+    assert dup_of[1] == 2      # the rejected one points at it
+
+
+def test_established_rep_stays_stable_when_a_better_twin_arrives(conn):
+    # An already-indexed canonical shouldn't churn just because a higher-quality near
+    # duplicate shows up later. Establish 1 as rep (with follower 3), then a stronger 2
+    # arrives — 1 must remain the representative for URL stability.
+    _add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=5)
+    _add(conn, 3, [0.99, 0.01, 0.0, 0.0], constructive=1)
+    cluster_duplicates(conn, threshold=0.86, window_days=3)   # run 1: 1 is rep (score 5 > 1)
+    assert conn.execute("SELECT duplicate_of FROM articles WHERE id=1").fetchone()[0] is None
+
+    _add(conn, 2, [0.995, 0.01, 0.0, 0.0], constructive=9)   # higher quality newcomer
+    cluster_duplicates(conn, threshold=0.86, window_days=3)   # run 2
+    dup_of = {r["id"]: r["duplicate_of"] for r in conn.execute("SELECT id, duplicate_of FROM articles")}
+    assert dup_of[1] is None   # incumbent stays canonical despite 2's higher score
+    assert dup_of[2] == 1 and dup_of[3] == 1
+
+
 def test_distinct_articles_are_not_clustered(conn):
    _add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=5)
    _add(conn, 2, [0.0, 1.0, 0.0, 0.0], constructive=5)
@@ -0,0 +1,63 @@
+"""English-only gate: non-English articles are HELD (reason_code='non_english'),
+preserved (not deleted) and distinct from calm-filter rejections, so they don't
+penalize a multilingual source and can be revisited when translation lands."""
+from goodnews import queries
+from goodnews.db import connect, init_db
+from goodnews.llm import normalize_scores, upsert_article_score
+
+
+def _data(**kw):
+    base = {
+        "constructive_score": 7, "cortisol_score": 1, "ragebait_score": 1, "agency_score": 5,
+        "human_benefit_score": 6, "novelty_score": 4, "pr_risk_score": 2, "accepted": True,
+        "topic": "science", "flavor": "discovery", "tags": [],
+        "reason_code": "ok", "reason_text": "good",
+    }
+    base.update(kw)
+    return base
+
+
+def test_english_passes_through():
+    s = normalize_scores(_data(language="en"), "m")
+    assert s["accepted"] == 1 and s["reason_code"] == "ok" and s["language"] == "en"
+
+
+def test_en_variants_count_as_english():
+    for lang in ("en-US", "EN", "en_us", "en-GB"):
+        assert normalize_scores(_data(language=lang), "m")["accepted"] == 1
+
+
+def test_non_english_is_held_not_a_rejection():
+    s = normalize_scores(_data(language="de"), "m")
+    assert s["accepted"] == 0
+    assert s["reason_code"] == "non_english"      # distinct bucket, not a calm-filter reject
+    assert s["language"] == "de"
+    assert "non-English" in s["reason_text"]
+
+
+def test_missing_or_unknown_language_defaults_to_english():
+    # a model hiccup must never silently drop genuine English content
+    assert normalize_scores(_data(language=""), "m")["accepted"] == 1
+    assert normalize_scores(_data(language="und"), "m")["accepted"] == 1
+    assert normalize_scores(_data(), "m")["accepted"] == 1   # no language key at all
+
+
+def test_non_english_buckets_even_a_content_reject():
+    # a non-English item that was also content-rejected is still 'held', so source
+    # metrics can separate language-holds from calm rejections cleanly
+    s = normalize_scores(_data(language="es", accepted=False, reason_code="ragebait"), "m")
+    assert s["accepted"] == 0 and s["reason_code"] == "non_english"
+
+
+def test_language_persisted_structurally_and_inspector_marks_held():
+    c = connect(":memory:"); init_db(c)
+    c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'S','http://s/f',5)")
+    c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash) VALUES (1,1,'http://x','T','h1')")
+    c.commit()
+    upsert_article_score(c, 1, normalize_scores(_data(language="de"), "m"))
+    row = c.execute("SELECT accepted, reason_code, language FROM article_scores WHERE article_id=1").fetchone()
+    assert row["language"] == "de" and row["reason_code"] == "non_english" and row["accepted"] == 0  # structured, not parsed
+    # inspector: shows under 'held', flagged held=True, and NOT under 'rejected'
+    held = queries.source_articles(c, 1, filter="held")
+    assert len(held) == 1 and held[0]["held"] is True
+    assert queries.source_articles(c, 1, filter="rejected") == []
@@ -0,0 +1,134 @@
+"""Memory Match server state — light, durability-only (no anti-cheat; the board is
+deterministic and fully visible). Locks: malformed keys dropped, matched stored as
+deduped face KEYS, `done` DERIVED from the matched count vs the tier's target (never
+trusted from the client), cross-device merge unions matched, and the sync endpoint
+accepts only valid match variants."""
+import pytest
+from fastapi.testclient import TestClient
+
+from goodnews import games
+from goodnews.db import connect, init_db
+
+
+def _san(variant, state):
+    return games.sanitize_game_state(None, "match", variant, "2026-06-16", state)
+
+
+def test_sanitize_drops_junk_and_dedupes():
+    s = _san("standard-icons", {
+        "matched": ["leaf", "leaf", "color-rose", "banana", "BAD KEY!", 42, "x" * 40, "sun"],
+        "moves": -5, "done": "yes",
+    })
+    # deduped + validated against the real face set ("banana"/junk dropped), order kept
+    assert s["matched"] == ["leaf", "color-rose", "sun"]
+    assert s["moves"] == 0                                 # clamped ≥ 0
+    assert s["done"] is False                              # 3 < 8 faces — client's "yes" ignored
+
+
+def test_done_is_derived_from_matched_count_not_client_flag():
+    real8 = ["leaf", "sun", "star", "moon", "cloud", "wave", "tree", "heart"]   # real faces
+    # client lies that it's done with no progress → server says not done
+    assert _san("standard-icons", {"matched": [], "done": True})["done"] is False
+    # reaching the tier's face target (standard = 8) → done
+    assert _san("standard-icons", {"matched": real8, "done": False})["done"] is True
+    # gentle target is only 6
+    assert _san("gentle-icons", {"matched": real8[:6]})["done"] is True
+    assert _san("standard-icons", {"matched": real8[:6]})["done"] is False
+
+
+def test_sanitize_caps_face_count():
+    many = ["color-rose", "color-coral", "color-amber", "color-gold", "color-lime",
+            "color-green", "color-teal", "color-cyan", "color-sky", "color-blue",
+            "color-indigo", "color-violet", "color-plum", "color-brown", "color-sand"]  # 15 real
+    s = _san("expert-colors", {"matched": many})
+    assert len(s["matched"]) == 12                          # _MATCH_MAX_FACES
+
+
+def test_merge_unions_matched_and_keeps_moves_without_trusting_done():
+    a = {"matched": ["leaf", "sun"], "moves": 7, "done": False}
+    b = {"matched": ["sun", "star"], "moves": 4, "done": True}
+    m = games.merge_game_state("match", a, b)
+    assert sorted(m["matched"]) == ["leaf", "star", "sun"]  # union
+    assert m["moves"] == 7                                  # larger move count
+    assert "done" not in m                                  # merge doesn't carry done; sanitize derives it
+
+
+@pytest.fixture
+def api_app(tmp_path, monkeypatch):
+    db = tmp_path / "t.sqlite3"
+    monkeypatch.setenv("GOODNEWS_DB", str(db))
+    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
+    monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
+    import importlib
+    import goodnews.api as api
+    importlib.reload(api)
+    c = connect(str(db)); init_db(c); c.commit(); c.close()
+    return api.create_app()
+
+
+def _signin(app, email="p@b.com"):
+    tc = TestClient(app)
+    sent = {}
+    import goodnews.email_send as es
+    orig = es.send_magic_link
+    es.send_magic_link = lambda to, link: sent.update(link=link)
+    try:
+        tc.post("/api/auth/email/start", json={"email": email})
+        tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
+    finally:
+        es.send_magic_link = orig
+    return tc
+
+
+def _put(tc, variant, state):
+    return tc.put("/api/games/state", json={
+        "game": "match", "variant": variant, "date": "2026-06-16", "state": state})
+
+
+def test_sync_endpoint_flow(api_app):
+    tc = _signin(api_app)
+    r1 = _put(tc, "standard-icons", {"matched": ["leaf", "sun"], "moves": 3, "done": False})
+    assert r1.status_code == 200
+    assert sorted(r1.json()["state"]["matched"]) == ["leaf", "sun"]
+    assert r1.json()["state"]["done"] is False
+    # a second device merges in (still partial → not done)
+    r2 = _put(tc, "standard-icons", {"matched": ["star"], "moves": 1, "done": True})
+    assert sorted(r2.json()["state"]["matched"]) == ["leaf", "star", "sun"]
+    assert r2.json()["state"]["done"] is False              # 3 < 8, client's done ignored
+    # completing the board (8 real faces) → done
+    r3 = _put(tc, "standard-icons",
+              {"matched": ["leaf", "sun", "star", "moon", "cloud", "wave", "tree", "heart"], "moves": 12})
+    assert r3.json()["state"]["done"] is True
+    # unknown variant rejected
+    assert _put(tc, "huge-icons", {}).status_code == 404
+
+
+def test_batch_endpoint_reconciles_many_and_drops_bad(api_app):
+    tc = _signin(api_app)
+    body = {"date": "2026-06-16", "items": [
+        {"game": "match", "variant": "standard-icons", "state": {"matched": ["leaf", "sun"], "moves": 2}},
+        {"game": "bloom", "variant": "", "state": {"found": []}},
+        {"game": "match", "variant": "bogus-xyz", "state": {}},        # unknown variant → dropped
+    ]}
+    r = tc.put("/api/games/state/batch", json=body)
+    assert r.status_code == 200
+    states = r.json()["states"]
+    variants = {(s["game"], s["variant"]) for s in states}
+    assert ("match", "standard-icons") in variants
+    assert ("bloom", "") in variants
+    assert ("match", "bogus-xyz") not in variants            # invalid item dropped, not fatal
+    m = next(s for s in states if s["variant"] == "standard-icons")
+    assert sorted(m["state"]["matched"]) == ["leaf", "sun"]   # merged + sanitized
+    # a second device merges via the same batch path
+    r2 = tc.put("/api/games/state/batch", json={"date": "2026-06-16", "items": [
+        {"game": "match", "variant": "standard-icons", "state": {"matched": ["star"], "moves": 5}}]})
+    m2 = r2.json()["states"][0]["state"]
+    assert sorted(m2["matched"]) == ["leaf", "star", "sun"] and m2["moves"] == 5
+
+
+def test_batch_endpoint_signed_out_echoes(api_app):
+    from fastapi.testclient import TestClient
+    r = TestClient(api_app).put("/api/games/state/batch", json={"date": "2026-06-16", "items": [
+        {"game": "match", "variant": "gentle-colors", "state": {"matched": ["color-rose"]}}]})
+    assert r.status_code == 200
+    assert r.json()["states"][0]["state"] == {"matched": ["color-rose"]}  # echo, no sync
@@ -0,0 +1,329 @@
+"""Publishing Desk Phase 1 — queue logic, top-up/dedup semantics, comparative LLM
+ranking with deterministic fallback, verified handle resolution, status transitions."""
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from goodnews import publishing
+from goodnews.db import connect, init_db
+
+BASE = "https://ub.test"
+
+
+def _ts(hours_ago: float) -> str:
+    return (datetime.now(timezone.utc) - timedelta(hours=hours_ago)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+@pytest.fixture
+def conn():
+    c = connect(":memory:"); init_db(c)
+    yield c
+    c.close()
+
+
+def _src(c, sid, x_handle=None, paywall_override=None, content_visible=1):
+    c.execute(
+        "INSERT INTO sources (id,name,feed_url,trust_score,content_visible,x_handle,paywall_override) "
+        "VALUES (?,?,?,?,?,?,?)",
+        (sid, f"Source {sid}", f"http://s{sid}/feed", 5, content_visible, x_handle, paywall_override),
+    )
+
+
+def _article(c, aid, sid, *, accepted=1, dup=None, novelty=5, constructive=5, topic="science",
+             url=None, image="http://img/x.jpg", hours_ago=1.0, complete=True):
+    c.execute(
+        "INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url,published_at) "
+        "VALUES (?,?,?,?,?,?,?)",
+        (aid, sid, url or f"https://ex{aid}.com/a", f"Title {aid}", f"h{aid}", image, _ts(hours_ago)),
+    )
+    if dup is not None:
+        c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
+    c.execute(
+        "INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score,topic,reason_code) "
+        "VALUES (?,?,?,?,?, 'ok')", (aid, accepted, novelty, constructive, topic),
+    )
+    if complete:
+        c.execute(
+            "INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
+            "VALUES (?,?,?,?,?)", (aid, f"Summary {aid}", "wh", "wm", "wb"),
+        )
+
+
+def _seed_n(c, n):
+    """n eligible articles, each from its own source (so diversity caps don't interfere)."""
+    for i in range(1, n + 1):
+        _src(c, i)
+        _article(c, i, i, novelty=10 - i, topic=f"t{i}")
+    c.commit()
+
+
+class FakeClient:
+    def __init__(self, ranked):
+        self._ranked = ranked
+    def rank_for_social(self, candidates):
+        return self._ranked
+
+
+class BoomClient:
+    def rank_for_social(self, candidates):
+        raise RuntimeError("model down")
+
+
+# --- handle resolution ----------------------------------------------------------
+
+def test_handles_source_first_then_entities_deduped_capped(conn):
+    publishing.add_entity_handle(conn, "Anthropic", "AnthropicAI", "https://x.com/AnthropicAI")
+    publishing.add_entity_handle(conn, "NASA", "NASA")
+    out = publishing.resolve_handles(conn, ["Anthropic", "NASA", "Unknown Org"], source_handle="Phys_org")
+    assert out[0]["via"] == "source" and out[0]["handle"] == "@Phys_org"
+    assert len(out) == 2                                   # capped at 2
+    assert out[1]["handle"] == "@AnthropicAI"              # first matched entity; NASA dropped by cap
+    assert all(h["handle"].startswith("@") for h in out)
+
+
+def test_handles_aliases_resolve_consistently(conn):
+    publishing.add_entity_handle(conn, "Johns Hopkins University", "HopkinsMedicine")
+    publishing.add_entity_handle(conn, "Johns Hopkins", "HopkinsMedicine")   # alias row, same handle
+    a = publishing.resolve_handles(conn, ["Johns Hopkins University"])
+    b = publishing.resolve_handles(conn, ["johns hopkins"])
+    assert a and b and a[0]["handle"] == b[0]["handle"] == "@HopkinsMedicine"
+
+
+def test_handles_unknown_entity_is_not_guessed(conn):
+    assert publishing.resolve_handles(conn, ["Some Random Startup"]) == []
+
+
+def test_normalization_does_not_collide_identity_words(conn):
+    # a handle stored for the SCHOOL must not get suggested for the STATE
+    publishing.add_entity_handle(conn, "University of California", "UCBerkeley")
+    assert publishing.resolve_handles(conn, ["California"]) == []                 # no false match
+    got = publishing.resolve_handles(conn, ["University of California"])
+    assert got and got[0]["handle"] == "@UCBerkeley"                              # exact still resolves
+
+
+def test_normalization_preserves_the_and_strips_only_trailing_legal(conn):
+    # "the" is never dropped, and legal suffixes only strip from the END
+    assert publishing.normalize_entity("The Who") == "the who"        # not "who"
+    assert publishing.normalize_entity("Inc. Magazine") == "inc magazine"  # leading legal kept
+    assert publishing.normalize_entity("Apple Inc") == "apple"        # trailing legal stripped
+    # so "The Who" and "WHO" resolve to their OWN handles, no cross-match
+    publishing.add_entity_handle(conn, "The Who", "TheWho")
+    publishing.add_entity_handle(conn, "WHO", "WHO")
+    assert publishing.resolve_handles(conn, ["The Who"])[0]["handle"] == "@TheWho"
+    assert publishing.resolve_handles(conn, ["WHO"])[0]["handle"] == "@WHO"
+
+
+def test_invalid_handles_are_rejected_not_stored(conn):
+    for bad in ("", "@", "not a handle", "https://x.com/NASA", "NASA!", "way_too_long_handle_x"):
+        assert publishing.valid_handle(bad) is None
+        assert publishing.add_entity_handle(conn, "Some Org", bad) is False
+    # good ones: tolerate one leading @, store canonical
+    assert publishing.valid_handle("@NASA") == "NASA"
+    assert publishing.add_entity_handle(conn, "NASA", "@NASA") is True
+    assert publishing.resolve_handles(conn, ["NASA"])[0]["handle"] == "@NASA"
+    # a junk source handle is never suggested either
+    assert publishing.resolve_handles(conn, [], source_handle="@bad handle!") == []
+
+
+# --- eligibility ----------------------------------------------------------------
+
+def test_eligibility_excludes_the_unfit(conn):
+    _src(c=conn, sid=1)
+    _article(conn, 1, 1)                       # eligible
+    _article(conn, 2, 1, accepted=0)           # rejected
+    _article(conn, 3, 1, dup=1)                # duplicate
+    _article(conn, 4, 1, complete=False)       # no complete summary
+    _article(conn, 5, 1, hours_ago=24 * 10)    # too old
+    _src(conn, 2, content_visible=0)
+    _article(conn, 6, 2)                       # source hidden
+    _src(conn, 3, paywall_override="paywalled")
+    _article(conn, 7, 3)                       # paywalled
+    conn.commit()
+    ids = {c["id"] for c in publishing.eligible_candidates(conn)}
+    assert ids == {1}
+
+
+# --- build: deterministic fallback + top-up/dedup -------------------------------
+
+def test_build_tops_up_to_target_and_dedups(conn):
+    _seed_n(conn, 6)
+    r1 = publishing.build_queue(conn, BASE, client=None, target=3)
+    assert r1["added"] == 3 and r1["ranked_by"] == "deterministic"
+    q = publishing.list_queue(conn)
+    assert len(q) == 3 and all(i["share_url"].startswith(BASE + "/a/") for i in q)
+    assert "utm_source=x" in q[0]["share_url"]
+
+    # rebuild at same target → already full → adds nothing (no duplicates)
+    assert publishing.build_queue(conn, BASE, client=None, target=3)["added"] == 0
+
+    # post one → a slot frees → next rebuild tops up with a NEW article, never the posted one
+    posted_id = q[0]["id"]; posted_article = q[0]["article_id"]
+    publishing.set_status(conn, posted_id, "posted")
+    r3 = publishing.build_queue(conn, BASE, client=None, target=3)
+    assert r3["added"] == 1
+    active_articles = {i["article_id"] for i in publishing.list_queue(conn)}
+    assert posted_article not in active_articles            # posted never re-queued
+
+
+def test_build_preserves_saved_draft_on_requeue(conn):
+    # a snoozed item that becomes eligible again must keep its draft text
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    sid = publishing.list_queue(conn)[0]["id"]
+    publishing.save_draft(conn, sid, "my carefully written blurb")
+    # force an EXPIRED snooze directly (set_status rightly refuses a past date)
+    conn.execute("UPDATE outbound_shares SET status='snoozed', snooze_until=? WHERE id=?", (_ts(1), sid))
+    conn.commit()
+    publishing.build_queue(conn, BASE, client=None, target=1)          # re-queues it
+    row = conn.execute("SELECT status, draft_text FROM outbound_shares WHERE id=?", (sid,)).fetchone()
+    assert row["status"] == "queued" and row["draft_text"] == "my carefully written blurb"
+
+
+# --- build: comparative LLM ranking + fallback ----------------------------------
+
+def test_build_uses_llm_ranking_and_attaches_fields(conn):
+    _seed_n(conn, 3)
+    publishing.add_entity_handle(conn, "NASA", "NASA")
+    ranked = [
+        {"id": 3, "social_score": 9, "why": "wow", "talking_points": ["a", "b", "c"],
+         "angle": "ask a question", "entities": ["NASA"]},
+        {"id": 1, "social_score": 4, "why": "ok", "talking_points": [], "angle": "", "entities": []},
+    ]
+    r = publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=2)
+    assert r["ranked_by"] == "llm" and r["added"] == 2
+    q = publishing.list_queue(conn)
+    top = q[0]
+    assert top["article_id"] == 3 and top["social_score"] == 9       # LLM order wins
+    assert top["talking_points"] == ["a", "b", "c"] and top["angle"] == "ask a question"
+    assert any(h["handle"] == "@NASA" for h in top["suggested_handles"])
+
+
+def test_build_falls_back_when_llm_errors(conn):
+    _seed_n(conn, 3)
+    r = publishing.build_queue(conn, BASE, client=BoomClient(), target=2)
+    assert r["ranked_by"] == "deterministic" and r["added"] == 2     # model down ≠ broken Desk
+
+
+def test_deterministic_fallback_seeds_aids_but_leaves_score_and_angle_empty(conn):
+    # Codex Fix-1: with no LLM, the card still carries writing aids (rationale +
+    # talking points from the already-generated summary), but interest score and
+    # angle stay None on purpose — those are LLM-only judgments, never manufactured.
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    item = publishing.list_queue(conn)[0]
+    assert item["rationale"] == "Summary 1"                  # seeded from the summary
+    assert item["talking_points"] == ["wh", "wm", "wb"]      # seeded from the explanation
+    assert item["social_score"] is None and item["angle"] is None   # LLM-only, left empty
+
+
+# --- adversarial: malformed LLM output ------------------------------------------
+
+def test_duplicate_llm_ids_do_not_inflate_the_queue(conn):
+    # the model repeats id 1; only 2 real articles exist. added/active must reflect
+    # ACTUAL unique rows, never the inflated loop count Codex saw.
+    _seed_n(conn, 2)
+    ranked = [{"id": 1, "social_score": 9}, {"id": 1, "social_score": 9},
+              {"id": 1, "social_score": 9}, {"id": 2, "social_score": 5}]
+    r = publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=5)
+    q = publishing.list_queue(conn)
+    assert r["added"] == len(q) == 2                          # not 5, not 3
+    assert len({i["article_id"] for i in q}) == 2            # unique articles
+
+
+def test_string_fields_do_not_become_char_arrays(conn):
+    # model returns strings where lists are expected; build must store [], not ['f','a'..]
+    _seed_n(conn, 1)
+    ranked = [{"id": 1, "social_score": 7, "talking_points": "fact", "entities": "NASA"}]
+    publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=1)
+    item = publishing.list_queue(conn)[0]
+    assert item["talking_points"] == [] and item["entities"] == []
+
+
+# --- lifecycle enforcement ------------------------------------------------------
+
+def test_posted_is_terminal_and_cannot_be_requeued(conn):
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    sid = publishing.list_queue(conn)[0]["id"]
+    assert publishing.set_status(conn, sid, "posted") is True
+    assert publishing.set_status(conn, sid, "queued") is False        # no resurrection
+    assert publishing.restore(conn, sid) is False                     # restore won't revive posted
+    assert conn.execute("SELECT status FROM outbound_shares WHERE id=?", (sid,)).fetchone()["status"] == "posted"
+
+
+def test_late_autosave_is_rejected_after_terminal(conn):
+    # Codex Fix-2: a debounced autosave that lands AFTER the item is posted must
+    # not write to the terminal row (no clobbering what was actually published).
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    sid = publishing.list_queue(conn)[0]["id"]
+    assert publishing.save_draft(conn, sid, "draft while active") is True
+    publishing.set_status(conn, sid, "posted")
+    assert publishing.save_draft(conn, sid, "late autosave") is False   # no-op on terminal
+    row = conn.execute("SELECT draft_text FROM outbound_shares WHERE id=?", (sid,)).fetchone()
+    assert row["draft_text"] == "draft while active"        # the late write was ignored
+
+
+def test_posted_rows_never_appear_in_queue_or_archived_tray(conn):
+    # Codex Fix-4: posted history is terminal and excluded everywhere the UI lists
+    # rows — neither the working queue nor the archived tray ever grows with it.
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    sid = publishing.list_queue(conn)[0]["id"]
+    publishing.set_status(conn, sid, "posted")
+    assert publishing.list_queue(conn) == []                            # not in working queue
+    assert publishing.list_queue(conn, include_archived=True) == []     # not in archived tray
+
+
+def test_snooze_requires_a_future_date(conn):
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    sid = publishing.list_queue(conn)[0]["id"]
+    assert publishing.set_status(conn, sid, "snoozed", snooze_until=None) is False    # null
+    assert publishing.set_status(conn, sid, "snoozed", snooze_until=_ts(1)) is False  # past
+    assert publishing.set_status(conn, sid, "snoozed", snooze_until=_ts(-48)) is True # future
+    # leaving snooze later (via restore) clears the date
+    publishing.restore(conn, sid)
+    assert conn.execute("SELECT snooze_until FROM outbound_shares WHERE id=?", (sid,)).fetchone()["snooze_until"] is None
+
+
+# --- status transitions + restore + snooze --------------------------------------
+
+def test_skip_is_reversible_and_snooze_is_separate(conn):
+    _seed_n(conn, 2)
+    publishing.build_queue(conn, BASE, client=None, target=2)
+    q = publishing.list_queue(conn)
+    a, b = q[0]["id"], q[1]["id"]
+    publishing.set_status(conn, a, "skipped")
+    assert a not in {i["id"] for i in publishing.list_queue(conn)}    # gone from working queue
+    assert a in {i["id"] for i in publishing.list_queue(conn, include_archived=True)}  # but in the tray
+    assert publishing.restore(conn, a) is True
+    assert a in {i["id"] for i in publishing.list_queue(conn)}        # restored
+
+    # snooze: not in working queue, holds a snooze_until, restorable
+    publishing.set_status(conn, b, "snoozed", snooze_until=_ts(-24))  # 24h in the future
+    row = conn.execute("SELECT status, snooze_until FROM outbound_shares WHERE id=?", (b,)).fetchone()
+    assert row["status"] == "snoozed" and row["snooze_until"]
+    assert b not in {i["id"] for i in publishing.list_queue(conn)}
+
+
+def test_inflight_build_does_not_clobber_a_freshly_extended_snooze(conn):
+    # Build snapshots eligibility, then the model ranks. If the user RE-SNOOZES to the
+    # future mid-rank, the finished build must NOT revive it (only EXPIRED snoozes revive).
+    _seed_n(conn, 1)
+    publishing.build_queue(conn, BASE, client=None, target=1)
+    sid = publishing.list_queue(conn)[0]["id"]
+    conn.execute("UPDATE outbound_shares SET status='snoozed', snooze_until=? WHERE id=?", (_ts(1), sid))  # expired → eligible
+    conn.commit()
+    future = _ts(-48)   # 48h ahead
+
+    class RaceClient:
+        def rank_for_social(self, candidates):
+            # mid-build interleave: user extends the snooze into the future
+            conn.execute("UPDATE outbound_shares SET snooze_until=? WHERE id=?", (future, sid))
+            conn.commit()
+            return [{"id": 1, "social_score": 9}]
+
+    publishing.build_queue(conn, BASE, client=RaceClient(), target=1)
+    row = conn.execute("SELECT status, snooze_until FROM outbound_shares WHERE id=?", (sid,)).fetchone()
+    assert row["status"] == "snoozed" and row["snooze_until"] == future   # left alone, not re-queued
@@ -0,0 +1,118 @@
+"""Publishing Desk Phase 1 — admin API: gating, background build (deterministic
+fallback), lifecycle enforcement, snooze validation, draft preservation, restore."""
+from datetime import datetime, timedelta, timezone
+
+import pytest
+from fastapi.testclient import TestClient
+
+from goodnews.db import connect, init_db
+
+
+def _future(hours: int = 24) -> str:
+    return (datetime.now(timezone.utc) + timedelta(hours=hours)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+def _recent() -> str:
+    return (datetime.now(timezone.utc) - timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+@pytest.fixture
+def api_app(tmp_path, monkeypatch):
+    db = tmp_path / "t.sqlite3"
+    monkeypatch.setenv("GOODNEWS_DB", str(db))
+    # http (not https) so the session cookie isn't Secure-only — TestClient runs over http
+    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
+    monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
+    monkeypatch.setenv("GOODNEWS_LLM_BASE_URL", "http://127.0.0.1:9")  # dead → deterministic fallback, fast
+    import importlib
+    import goodnews.api as api
+    importlib.reload(api)
+    c = connect(str(db)); init_db(c)
+    # one eligible article (accepted, visible, complete summary, recent, readable)
+    c.execute("INSERT INTO sources (id,name,feed_url,trust_score,content_visible) VALUES (1,'S','http://s/f',5,1)")
+    c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url,published_at) "
+              "VALUES (1,1,'https://ex.com/a','Title','h1','http://img/x.jpg',?)", (_recent(),))
+    c.execute("INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score,topic,reason_code) "
+              "VALUES (1,1,7,7,'science','ok')")
+    c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
+              "VALUES (1,'Sum','wh','wm','wb')")
+    c.commit(); c.close()
+    return api.create_app()
+
+
+def _admin(app):
+    tc = TestClient(app)
+    sent = {}
+    import goodnews.email_send as es
+    orig = es.send_magic_link
+    es.send_magic_link = lambda to, link: sent.update(link=link)
+    try:
+        tc.post("/api/auth/email/start", json={"email": "admin@b.com"})
+        tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
+    finally:
+        es.send_magic_link = orig
+    return tc
+
+
+def test_admin_gating(api_app):
+    anon = TestClient(api_app)
+    assert anon.get("/api/admin/publishing/queue").status_code == 401
+    assert anon.post("/api/admin/publishing/build").status_code == 401
+
+
+def test_build_then_queue_deterministic(api_app):
+    tc = _admin(api_app)
+    assert tc.post("/api/admin/publishing/build").json() == {"building": True}
+    # TestClient runs the background task before returning; LLM URL is dead → fallback.
+    q = tc.get("/api/admin/publishing/queue").json()
+    assert q["building"] is False and q["last"]["ranked_by"] == "deterministic"
+    assert len(q["items"]) == 1 and q["items"][0]["article_id"] == 1
+    assert "utm_source=x" in q["items"][0]["share_url"]
+    # a second build is a no-op (already full) — never duplicates
+    tc.post("/api/admin/publishing/build")
+    assert len(tc.get("/api/admin/publishing/queue").json()["items"]) == 1
+
+
+def _one(tc):
+    tc.post("/api/admin/publishing/build")
+    return tc.get("/api/admin/publishing/queue").json()["items"][0]["id"]
+
+
+def test_invalid_transition_rejected(api_app):
+    tc = _admin(api_app)
+    sid = _one(tc)
+    assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "posted"}).status_code == 200
+    # posted is terminal — resurrection refused
+    assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "queued"}).status_code == 400
+
+
+def test_snooze_validation(api_app):
+    tc = _admin(api_app)
+    sid = _one(tc)
+    assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "snoozed"}).status_code == 400  # null
+    assert tc.post(f"/api/admin/publishing/{sid}/status",
+                   json={"status": "snoozed", "snooze_until": "2000-01-01 00:00:00"}).status_code == 400  # past
+    assert tc.post(f"/api/admin/publishing/{sid}/status",
+                   json={"status": "snoozed", "snooze_until": _future()}).status_code == 200
+
+
+def test_draft_preserved_through_skip_and_restore(api_app):
+    tc = _admin(api_app)
+    sid = _one(tc)
+    assert tc.post(f"/api/admin/publishing/{sid}/draft", json={"draft_text": "my blurb"}).status_code == 200
+    assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "skipped"}).status_code == 200
+    assert sid not in {i["id"] for i in tc.get("/api/admin/publishing/queue").json()["items"]}  # left the queue
+    assert tc.post(f"/api/admin/publishing/{sid}/restore").status_code == 200
+    items = tc.get("/api/admin/publishing/queue").json()["items"]
+    back = next(i for i in items if i["id"] == sid)
+    assert back["draft_text"] == "my blurb"     # work survived skip→restore
+
+
+def test_save_handle_validates(api_app):
+    tc = _admin(api_app)
+    assert tc.post("/api/admin/publishing/handles",
+                   json={"entity_name": "NASA", "handle": "@not a handle"}).status_code == 400
+    assert tc.post("/api/admin/publishing/handles",
+                   json={"entity_name": "NASA", "handle": "https://x.com/NASA"}).status_code == 400
+    assert tc.post("/api/admin/publishing/handles",
+                   json={"entity_name": "NASA", "handle": "@NASA"}).status_code == 200
@@ -42,9 +42,56 @@ def test_share_page_missing_and_malformed(client):
    assert tc.get("/a/999").status_code == 404      # unknown
    assert tc.get("/a/not-a-number").status_code == 404  # malformed → calm 404
    assert tc.get("/a/2").status_code == 404         # rejected article
-    assert tc.get("/a/3").status_code == 404         # duplicate
+
+
+def test_share_page_duplicate_redirects_to_canonical(client):
+    # article 3 is a duplicate of the live article 1 — its URL may be indexed, so it
+    # 301s to the canonical (consolidates) rather than 404ing and dropping from Google.
+    r = TestClient(client).get("/a/3", follow_redirects=False)
+    assert r.status_code == 301 and r.headers["location"] == "/a/1"


 def test_share_page_no_image_uses_summary_card(client, tmp_path, monkeypatch):
    # article 1 has an image → large card
    assert 'summary_large_image' in TestClient(client).get("/a/1").text
+
+
+def test_incomplete_page_is_not_cached(client):
+    # article 1 has no summary/explanation → "generating" page must not be cached,
+    # and carries no-cache so it re-fetches once the summary lands.
+    import goodnews.api as api
+    r = TestClient(client).get("/a/1")
+    assert r.status_code == 200
+    assert r.headers.get("cache-control") == "no-cache"
+    assert 1 not in api._SHARE_CACHE
+
+
+@pytest.fixture
+def app_complete(tmp_path, monkeypatch):
+    db = tmp_path / "t.sqlite3"
+    monkeypatch.setenv("GOODNEWS_DB", str(db))
+    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
+    import importlib
+    import goodnews.api as api
+    importlib.reload(api)
+    from goodnews.db import connect, init_db
+    c = connect(str(db)); init_db(c)
+    c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
+    c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) "
+              "VALUES (1,1,'https://bbc.com/x','Water voles return','h1','https://img/v.jpg')")
+    c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (1,1,'Hopeful.')")
+    c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
+              "VALUES (1,'Voles are back.','They returned to the river.','Biodiversity rebound.','Quietly hopeful.')")
+    c.commit(); c.close()
+    return api
+
+
+def test_complete_page_is_cached_and_served_from_cache(app_complete):
+    api = app_complete
+    tc = TestClient(api.create_app())
+    r1 = tc.get("/a/1")
+    assert r1.status_code == 200
+    assert r1.headers.get("cache-control") == "public, max-age=300"
+    assert 1 in api._SHARE_CACHE                       # finished page cached
+    r2 = tc.get("/a/1")                                # second hit served from cache
+    assert r2.status_code == 200 and r2.text == r1.text
@@ -0,0 +1,79 @@
+"""Share page /a/{id}: a duplicate article 301-redirects to its canonical twin
+instead of 404ing. A hard 404 silently drops already-indexed URLs from Google and
+tanked impressions when a newer duplicate retired an older, indexed page."""
+import pytest
+from fastapi.testclient import TestClient
+
+
+@pytest.fixture
+def client(tmp_path, monkeypatch):
+    db = tmp_path / "t.sqlite3"
+    monkeypatch.setenv("GOODNEWS_DB", str(db))
+    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
+    import importlib
+    import goodnews.api as api
+    importlib.reload(api)
+    from goodnews.db import connect, init_db
+    c = connect(str(db)); init_db(c)
+    c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
+
+    def art(aid, *, accepted=1, dup=None, summary=True):
+        c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
+                  "VALUES (?,1,?,?,?,'2026-06-05T08:00:00')",
+                  (aid, f"https://bbc.com/{aid}", f"Story {aid}", f"h{aid}"))
+        if dup is not None:
+            c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
+        c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (?,?,'x')", (aid, accepted))
+        if summary:
+            c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"Summary {aid}."))
+
+    art(10)                       # canonical, live  -> 200
+    art(11, dup=10)               # duplicate of a live canonical -> 301 /a/10
+    art(12, accepted=0, dup=10)   # REJECTED follower of an accepted rep -> still 301 /a/10
+    art(20, accepted=0)           # rejected canonical
+    art(21, dup=20)               # dup of a REJECTED canonical -> 404 (genuinely gone)
+    art(30, accepted=0)           # rejected, not a duplicate -> 404
+    c.commit(); c.close()
+    return api.create_app()
+
+
+def test_canonical_serves_200(client):
+    r = TestClient(client).get("/a/10")
+    assert r.status_code == 200 and "Story 10" in r.text
+
+
+def test_duplicate_301s_to_canonical(client):
+    r = TestClient(client).get("/a/11", follow_redirects=False)
+    assert r.status_code == 301
+    assert r.headers["location"] == "/a/10"     # consolidates onto the survivor
+
+
+def test_rejected_follower_of_accepted_rep_still_301s(client):
+    # Policy: the route resolves duplicate_of BEFORE the follower's own acceptance, so a
+    # rejected article that points at an ACCEPTED representative 301s to it rather than
+    # 404ing. That's intentional — it sends the visitor/crawler to a serveable equivalent.
+    r = TestClient(client).get("/a/12", follow_redirects=False)
+    assert r.status_code == 301 and r.headers["location"] == "/a/10"
+
+
+def test_duplicate_of_rejected_canonical_404s(client):
+    r = TestClient(client).get("/a/21", follow_redirects=False)
+    assert r.status_code == 404                  # nothing serveable to redirect to
+
+
+def test_rejected_article_404s(client):
+    assert TestClient(client).get("/a/30").status_code == 404
+
+
+def test_missing_article_404s(client):
+    assert TestClient(client).get("/a/9999").status_code == 404
+
+
+def test_head_matches_get_status(client):
+    # HEAD must return the same status as GET (not fall through to the static mount and
+    # 404). Some crawlers/link-checkers probe with HEAD.
+    tc = TestClient(client)
+    assert tc.head("/a/10").status_code == 200                              # canonical
+    r = tc.head("/a/11", follow_redirects=False)
+    assert r.status_code == 301 and r.headers["location"] == "/a/10"        # duplicate
+    assert tc.head("/a/9999").status_code == 404                            # missing