Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker

The deploy pipeline runs from the working tree, so a wave of shipped features
had never been committed. This snapshots git to what's actually running.

SEO impression recovery (live + verified):
- Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404
  (a hard 404 silently dropped already-indexed URLs and tanked impressions).
- Dedup representative selection reworked: accepted/serveable -> established
  rep (URL stability) -> quality score, so an accepted page never retires to a
  rejected rep and an indexed canonical doesn't churn when a newer twin arrives.
- HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of
  falling through to the static mount and 404ing.
- `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the
  policy to the existing corpus (shared cycle_lock context manager).
- CLI honors GOODNEWS_DB for its default --db (was silently ignored).

Publishing Desk (admin tool to post highlights to X via Web Intents):
- publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji
  picker (bundled data, no CDN) for the blurb editor.

Play games + site:
- Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated).
- English-only language gate; source prospecting; paywall + dedup hardening.

Tests: full suite green (349). Ignores tightened (node_modules, data/*.db).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-18 11:32:27 -04:00
parent 2dbe73430c
commit 89c0fbe1f6
66 changed files with 6138 additions and 109 deletions
+134
View File
@@ -0,0 +1,134 @@
"""Deep-preview accessibility check — content-level readable/paywalled/blocked/unknown,
and the layered verdict (domain rule + sampled access, evidence over domain alone)."""
import time
from goodnews import feeds
from goodnews.paywall import check_article_access
READABLE = b"<html><body><article>" + (b"<p>Real article text here. </p>" * 80) + b"</article></body></html>"
WALLED_SCHEMA = b'<html><head><script type="application/ld+json">{"isAccessibleForFree": false}</script></head><body><p>teaser</p></body></html>'
WALLED_PHRASE = b"<html><body><p>Subscribe to continue reading this story.</p></body></html>"
THIN = b"<html><body><p>hi</p></body></html>"
def _fetcher(mapping):
def f(url, timeout=8):
if mapping.get(url) == "ERR":
raise RuntimeError("boom")
return mapping[url]
return f
def test_classifies_each_access_state():
f = _fetcher({"r": READABLE, "s": WALLED_SCHEMA, "p": WALLED_PHRASE, "t": THIN, "b": "ERR"})
assert check_article_access("r", f) == "readable"
assert check_article_access("s", f) == "paywalled" # schema.org isAccessibleForFree:false
assert check_article_access("p", f) == "paywalled" # explicit wall phrase
assert check_article_access("t", f) == "unknown" # too thin to tell
assert check_article_access("b", f) == "blocked" # fetch failed
def test_does_not_falseflag_a_readable_page():
# a long article that merely links "subscribe to our newsletter" in the footer
html = b"<html><body><article>" + (b"<p>Lots of real content. </p>" * 100) + \
b"<footer>Subscribe to our newsletter</footer></article></body></html>"
assert check_article_access("x", _fetcher({"x": html})) == "readable"
def _items(urls):
return [feeds.FeedItem(title=f"T{i}", url=u, description="d", published_at=None)
for i, u in enumerate(urls)]
def test_preview_verdict_layers_domain_and_sample(monkeypatch):
# a non-paywall-domain feed whose sampled articles mostly read fine -> "fine"
urls = ["https://good.example/a1", "https://good.example/a2", "https://good.example/a3"]
monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
class FakeClient:
model = "test"
def classify(self, art):
return {"accepted": True, "topic": "science", "flavor": "discovery",
"cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
def fetcher(url, timeout=10):
return READABLE # every sampled article reads fine
out = feeds.preview_feed("https://good.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
assert out["paywall_rule"] is False
assert out["access"]["readable"] >= 1 and out["access"]["paywalled"] == 0
assert out["access_verdict"] == "fine"
def test_mostly_blocked_is_review_not_fine(monkeypatch):
# bot-blocked sites (readable in a browser, blocked to our fetcher) must NOT read
# as 'fine' off one sample, nor as 'reject-ready' — they land in 'review'.
urls = [f"https://blocky.example/a{i}" for i in range(6)]
monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
class FakeClient:
model = "test"
def classify(self, art):
return {"accepted": True, "topic": "science", "flavor": "discovery",
"cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
def fetcher(url, timeout=10):
if url.endswith("/feed") or url.endswith("a0"):
return READABLE # the feed fetch + one readable article
raise RuntimeError("403 blocked") # the rest block (bot-blocked)
out = feeds.preview_feed("https://blocky.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
assert out["access"]["blocked"] >= 4 and out["access"]["readable"] == 1
assert out["access_verdict"] == "review" # thin assessable evidence → not 'fine', not 'reject-ready'
def test_source_preview_endpoint_handles_null_rate(tmp_path, monkeypatch):
# All-held (non-English) sample → acceptance_rate is None; the legacy
# /api/source-preview must not 500 on it (SourcePreview.acceptance_rate is nullable).
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
import importlib
import goodnews.api as api
importlib.reload(api)
from goodnews.db import connect, init_db
c = connect(str(db)); init_db(c); c.commit(); c.close()
all_held = {
"url": "http://x/feed", "sampled": 4, "classified": True, "accepted": 0,
"non_english": 4, "acceptance_rate": None, "avg_cortisol": 0.0, "avg_ragebait": 0.0,
"avg_pr_risk": 0.0, "newest_published": None, "recent_7d": 0,
"topic_mix": {}, "flavor_mix": {}, "examples_accepted": [], "examples_rejected": [],
}
monkeypatch.setattr(feeds, "preview_feed", lambda *a, **k: all_held)
from fastapi.testclient import TestClient
r = TestClient(api.create_app()).get("/api/source-preview?url=http://x/feed")
assert r.status_code == 200 # was 500: None rejected by float field
assert r.json()["acceptance_rate"] is None
def test_one_hung_fetch_does_not_stall_the_preview(monkeypatch):
# Codex's wall-clock audit: one article that sleeps WAY past the deadline must
# not pin Deep Preview — it returns at the cap, with the slow one left 'unknown'.
monkeypatch.setattr(feeds, "_ACCESS_DEADLINE_S", 0.5) # shrink the cap for the test
urls = [f"https://mixed.example/a{i}" for i in range(6)]
monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
class FakeClient:
model = "test"
def classify(self, art):
return {"accepted": True, "topic": "science", "flavor": "discovery",
"cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
def fetcher(url, timeout=10):
if url.endswith("a0"):
time.sleep(5) # one ugly site hangs far past the 0.5s cap
return READABLE
start = time.monotonic()
out = feeds.preview_feed("https://mixed.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
elapsed = time.monotonic() - start
assert elapsed < 2.5 # returned at the cap (~0.5s), NOT after the 5s sleep
# the hung one is 'unknown' (unverified), the rest read fine
slow = next(e for e in out["access"]["examples"] if e["url"].endswith("a0"))
assert slow["access"] == "unknown"
assert out["access"]["readable"] >= 4
+24
View File
@@ -114,3 +114,27 @@ def test_brief_cache_boundary(client):
assert "public" in client.get("/api/brief").headers.get("cache-control", "")
assert client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).headers.get("cache-control") == "private, no-store"
assert client.get("/api/brief", params={"exclude": "3"}).headers.get("cache-control") == "private, no-store"
def test_search_relevance_source_and_boundaries(client):
import os, sqlite3, json as _j
# A distinctively-named source proves source-name matching (the NYT use case).
c = sqlite3.connect(os.environ["GOODNEWS_DB"])
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (2,'Nature Digest','http://n/f',7)")
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) "
"VALUES (3,2,'http://n/3','Coral reefs rebound','2026-05-30T10:00:00+00:00','h3')")
c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (3,1,'environment','hopeful')")
c.commit(); c.close()
# title match (index builds lazily on first search)
assert client.get("/api/search?q=coral").json()["items"][0]["id"] == 3
# SOURCE-NAME match — searching the publication finds its articles (Codex's requirement)
assert 3 in [it["id"] for it in client.get("/api/search?q=nature").json()["items"]]
# empty / junk query → empty, no error
assert client.get("/api/search?q=").json()["count"] == 0
assert client.get("/api/search?q=%20%21%21").json()["count"] == 0
# boundary: a muted topic is excluded from search too (mirrors the visitor view)
muted = client.get("/api/search", params={"q": "coral", "prefs": _j.dumps({"mute_topics": ["environment"]})}).json()
assert muted["count"] == 0
# boundary: a hard avoid-term filters a textual match
avoided = client.get("/api/search", params={"q": "election", "prefs": _j.dumps({"avoid_terms": ["election"]})}).json()
assert all(it["id"] != 2 for it in avoided["items"])
+255
View File
@@ -0,0 +1,255 @@
"""Bloom — the daily word wheel. Locks the design/acceptance split:
• DESIGN (deterministic, stored): wheel + tiers + pangram + Full-Bloom target,
from the COMMON list. The PERMANENT guardrail — Flourishing reachable with
common words — still holds.
• ACCEPTANCE (broad + dynamic): every valid word buildable from the wheel,
computed live as broad dict {allow} {block}; runtime admin overrides +
player reports drive curation with no deploy.
"""
import os
from pathlib import Path
import pytest
from fastapi.testclient import TestClient
from goodnews import bloom, games
from goodnews.db import connect, init_db
DATES = [f"2026-06-{d:02d}" for d in range(10, 25)] # 15 sample days
@pytest.fixture(scope="module")
def designs():
return {d: bloom.build_puzzle(d) for d in DATES}
@pytest.fixture
def conn(tmp_path):
c = connect(str(tmp_path / "t.db"))
init_db(c)
c.execute("INSERT INTO users (email) VALUES ('a@b.c')")
c.commit()
return c
def _letters(p):
return frozenset(p["center"]) | frozenset(p["outer"])
def _commons_for(p):
"""COMMON words for a center-mode wheel (the designed puzzle)."""
L = _letters(p)
return [w for w in bloom._COMMON if p["center"] in w and frozenset(w) <= L]
def _assert_no_answer_leak(resp):
assert "words" not in resp
assert resp["accepted"] and all(
isinstance(h, str) and len(h) == 64 and set(h) <= set("0123456789abcdef")
for h in resp["accepted"])
# --- DESIGN (deterministic, common-based) --------------------------------------
def test_build_is_deterministic():
assert bloom.build_puzzle("2026-06-15") == bloom.build_puzzle("2026-06-15")
@pytest.mark.parametrize("date", DATES)
def test_design_shape(designs, date):
p = designs[date]
L = _letters(p)
assert len(L) == 7 and "s" not in L
assert p["center"] in L and len(p["outer"]) == 6
assert bloom.MIN_COMMON_WORDS <= len(_commons_for(p)) <= bloom.MAX_COMMON_WORDS
assert frozenset(p["pangram"]) == L # display pangram uses all 7
assert p["pangram"] in bloom._COMMON and p["pangram"] not in bloom._AVOID
@pytest.mark.parametrize("date", DATES)
def test_PERMANENT_top_tier_reachable_with_common_words(designs, date):
"""Flourishing reachable from COMMON words alone — never obscure-word hunting."""
p = designs[date]
flourishing = next(t["score"] for t in p["tiers"] if t["name"] == "Flourishing")
assert bloom.score_words(p, _commons_for(p)) >= flourishing
def test_tiers_are_8_30_70_of_common_and_max_is_common_total():
p = bloom.build_puzzle("2026-06-15")
assert [t["name"] for t in p["tiers"]] == ["Sprouting", "Budding", "Blooming", "Flourishing"]
common_total = bloom.score_words(p, _commons_for(p))
assert p["max_score"] == common_total # Full Bloom = the designed puzzle
flour = next(t["score"] for t in p["tiers"] if t["name"] == "Flourishing")
assert flour == int(0.70 * common_total) and flour <= p["max_score"]
# --- ACCEPTANCE (broad + dynamic) ----------------------------------------------
def test_accept_is_broad_and_obeys_center_rule(conn):
p = bloom.build_puzzle("2026-06-15")
acc = bloom.accepted_words(conn, p["center"], p["outer"], require_center=True)
L = _letters(p)
for w in acc:
assert len(w) >= 4 and "s" not in w and frozenset(w) <= L and p["center"] in w
# broad accept is a SUPERSET of the common puzzle (bonus words beyond design)
assert set(_commons_for(p)) <= set(acc)
assert len(acc) > len(_commons_for(p))
def test_arraign_class_words_auto_accepted():
# broad dict includes real-but-rare words without any include-list
for w in ("arraign", "feign", "crwth"):
assert w in set(bloom.ACCEPT)
def test_overrides_block_and_allow(conn):
p = bloom.build_puzzle("2026-06-15")
acc0 = set(bloom.accepted_words(conn, p["center"], p["outer"], True))
victim = sorted(acc0)[0]
bloom.set_override(conn, victim, "block", by="t")
assert victim not in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
# allow a made-up letter-combo that fits the wheel + center
fake = (p["center"] + "".join(p["outer"][:3]))[:5]
if "s" not in fake and len(fake) >= 4:
bloom.set_override(conn, fake, "allow", by="t")
assert fake in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
bloom.clear_override(conn, victim)
assert victim in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
def test_allow_override_rejects_inert_hard_rule_words(conn):
# an allow that could never count (too short / has 's') is rejected, not stored
assert bloom.set_override(conn, "cat", "allow") is False # < 4 letters
assert bloom.set_override(conn, "roses", "allow") is False # contains 's'
assert bloom.set_override(conn, "bloom", "allow") is True # valid → stored
allow, _ = bloom.overrides(conn)
assert allow == {"bloom"}
# block stays permissive (can block anything)
assert bloom.set_override(conn, "roses", "block") is True
def test_wild_accepts_words_without_center(conn):
p = bloom.build_free("seed-w", "wild")
acc = bloom.accepted_words(conn, p["center"], p["outer"], require_center=False)
assert any(p["center"] not in w for w in acc) # Wild's defining trait
assert all(frozenset(w) <= _letters(p) for w in acc)
# --- responses + storage -------------------------------------------------------
def test_generate_is_idempotent_and_stored(conn):
a = bloom.generate_bloom_puzzle(conn, "2026-06-15")
assert a == bloom.generate_bloom_puzzle(conn, "2026-06-15") == bloom.stored_payload(conn, "2026-06-15")
assert "words" not in a # design payload holds no answers
def test_response_no_leak_and_hash_roundtrip(conn):
r = bloom.bloom_response(conn, "2026-06-15")
_assert_no_answer_leak(r)
p = bloom.stored_payload(conn, "2026-06-15")
real = bloom.accepted_words(conn, p["center"], p["outer"], True)[0]
assert bloom.word_hash("2026-06-15", real) in set(r["accepted"])
assert bloom.word_hash("2026-06-15", "zzzzq") not in set(r["accepted"])
assert r["max_score"] == p["max_score"]
def test_free_endpoint_resumes_and_leaks_nothing(api_app):
tc = TestClient(api_app)
r1 = tc.get("/api/puzzle/bloom/free?format=wild").json()
seed = r1["seed"]
assert r1["mode"] == "free" and r1["format"] == "wild" and seed
r2 = tc.get(f"/api/puzzle/bloom/free?format=wild&seed={seed}").json()
assert r2["center"] == r1["center"] and r2["outer"] == r1["outer"]
_assert_no_answer_leak(r1)
# --- server-side state ---------------------------------------------------------
def test_sanitize_drops_junk_recomputes_score_and_full(conn):
p = bloom.generate_bloom_puzzle(conn, "2026-06-15")
acc = bloom.accepted_words(conn, p["center"], p["outer"], True)
good = acc[:3]
clean = games.sanitize_game_state(conn, "bloom", "", "2026-06-15",
{"found": good + ["zzzz", "ab", good[0], 9], "score": 9999})
assert sorted(clean["found"]) == sorted(set(good))
assert clean["score"] == bloom.score_words(p, good)
assert "full" not in clean
# finding the whole common puzzle ⇒ Full Bloom (score ≥ max_score)
full = games.sanitize_game_state(conn, "bloom", "", "2026-06-15", {"found": _commons_for(p)})
assert full.get("full") is True
def test_merge_unions_found():
m = games.merge_game_state("bloom", {"found": ["able", "bake"]}, {"found": ["bake", "tale"]})
assert sorted(m["found"]) == ["able", "bake", "tale"]
def test_block_override_takes_effect_without_regen(conn):
# the live response reflects an override with no puzzle regeneration
p = bloom.generate_bloom_puzzle(conn, "2026-06-15")
victim = bloom.accepted_words(conn, p["center"], p["outer"], True)[0]
before = set(bloom.bloom_response(conn, "2026-06-15")["accepted"])
bloom.set_override(conn, victim, "block", by="t")
after = set(bloom.bloom_response(conn, "2026-06-15")["accepted"])
assert bloom.word_hash("2026-06-15", victim) in before
assert bloom.word_hash("2026-06-15", victim) not in after
# --- reports → admin queue → overrides -----------------------------------------
def test_report_then_approve_creates_allow_override(conn):
assert bloom.add_report(conn, "arraign", "2026-06-15", "daily", "center", "aceglnr", "not in the word list")
assert bloom.add_report(conn, "arraign", "2026-06-15", "daily", "center", "aceglnr", "x") # dedup pending
pending = bloom.list_reports(conn, "pending")
assert len(pending) == 1 and pending[0]["word"] == "arraign"
assert bloom.resolve_report(conn, pending[0]["id"], "approve", by="admin")
allow, _ = bloom.overrides(conn)
assert "arraign" in allow
assert not bloom.list_reports(conn, "pending")
assert bloom.list_reports(conn, "approved")
def test_report_block_creates_block_override(conn):
bloom.add_report(conn, "uglyword", None, "free", "wild", "abcdefg", "x")
rid = bloom.list_reports(conn, "pending")[0]["id"]
bloom.resolve_report(conn, rid, "block", by="admin")
_, block = bloom.overrides(conn)
assert "uglyword" in block
# --- API: public report + admin endpoints --------------------------------------
@pytest.fixture
def api_app(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
import importlib
import goodnews.api as api
importlib.reload(api)
c = connect(str(db)); init_db(c); c.commit(); c.close()
return api.create_app()
def _admin(app):
tc = TestClient(app)
sent = {}
import goodnews.email_send as es
orig = es.send_magic_link
es.send_magic_link = lambda to, link: sent.update(link=link)
try:
tc.post("/api/auth/email/start", json={"email": "admin@b.com"})
tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
finally:
es.send_magic_link = orig
return tc
def test_public_report_then_admin_queue_flow(api_app):
pub = TestClient(api_app)
assert pub.post("/api/bloom/report", json={"word": "arraign", "date": "2026-06-15",
"mode": "daily", "format": "center", "letters": "aceglnr",
"reason": "not in the word list"}).json()["ok"]
# admin-only queue
assert TestClient(api_app).get("/api/admin/bloom/reports").status_code == 401
tc = _admin(api_app)
q = tc.get("/api/admin/bloom/reports").json()
assert len(q["reports"]) == 1
rid = q["reports"][0]["id"]
assert tc.post(f"/api/admin/bloom/reports/{rid}", json={"action": "approve"}).json()["ok"]
ovr = tc.get("/api/admin/bloom/reports").json()["overrides"]
assert any(o["word"] == "arraign" and o["action"] == "allow" for o in ovr)
+13
View File
@@ -5,6 +5,7 @@ from goodnews.sources import (
list_candidates,
promote_candidate,
reject_candidate,
restore_candidate,
save_candidate,
)
@@ -32,6 +33,18 @@ def test_re_preview_preserves_curator_status(conn):
assert list_candidates(conn)[0]["status"] == "rejected"
def test_restore_sends_rejected_back_to_staging(conn):
save_candidate(conn, "http://x/feed")
cid = list_candidates(conn)[0]["id"]
reject_candidate(conn, cid)
assert list_candidates(conn)[0]["status"] == "rejected"
# restore → back to staging ('suggested'), re-enters the pending queue
assert restore_candidate(conn, cid) is True
assert list_candidates(conn)[0]["status"] == "suggested"
# restoring a non-rejected candidate is a no-op (only un-rejects)
assert restore_candidate(conn, cid) is False
def test_promote_creates_inactive_source_and_marks_promoted(conn):
cand = save_candidate(conn, "http://x/feed", name="Lovely Feed")
source_id = promote_candidate(conn, cand["id"]) # inactive by default
+15
View File
@@ -0,0 +1,15 @@
"""CLI honors GOODNEWS_DB for its default --db, matching db.connect. Without this, a
copy-DB maintenance run (e.g. `dedup --force-recluster`) silently targets production."""
from pathlib import Path
from goodnews.cli import DEFAULT_DB, _default_db
def test_default_db_honors_env(monkeypatch):
monkeypatch.setenv("GOODNEWS_DB", "/tmp/some-copy.sqlite3")
assert _default_db() == Path("/tmp/some-copy.sqlite3")
def test_default_db_falls_back_to_bundled(monkeypatch):
monkeypatch.delenv("GOODNEWS_DB", raising=False)
assert _default_db() == DEFAULT_DB
+31 -3
View File
@@ -35,7 +35,7 @@ def conn():
c.close()
def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00"):
def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00", accepted=1):
conn.execute(
"INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
"VALUES (?, 1, ?, ?, ?, ?)",
@@ -44,8 +44,8 @@ def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00
conn.execute(
"INSERT INTO article_scores (article_id, constructive_score, agency_score, "
"human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted) "
"VALUES (?, ?, 0, 0, 0, 0, 0, 1)",
(article_id, constructive),
"VALUES (?, ?, 0, 0, 0, 0, 0, ?)",
(article_id, constructive, accepted),
)
conn.execute(
"INSERT INTO article_embeddings (article_id, vector, dim, model) VALUES (?, ?, ?, 'test')",
@@ -69,6 +69,34 @@ def test_near_duplicates_collapse_to_highest_ranked(conn):
assert dup_of[3] is None # C stands alone
def test_accepted_member_beats_a_higher_quality_rejected_one(conn):
# The rep must be SERVEABLE: an accepted page may never be retired to a rejected
# representative (that page would 404 with nothing to 301 to). Accepted wins even
# though the rejected twin scores higher on quality.
_add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=9, accepted=0) # higher quality, REJECTED
_add(conn, 2, [0.99, 0.02, 0.0, 0.0], constructive=3, accepted=1) # lower quality, accepted
cluster_duplicates(conn, threshold=0.86, window_days=3)
dup_of = {r["id"]: r["duplicate_of"] for r in conn.execute("SELECT id, duplicate_of FROM articles")}
assert dup_of[2] is None # the accepted article is the representative (serves 200)
assert dup_of[1] == 2 # the rejected one points at it
def test_established_rep_stays_stable_when_a_better_twin_arrives(conn):
# An already-indexed canonical shouldn't churn just because a higher-quality near
# duplicate shows up later. Establish 1 as rep (with follower 3), then a stronger 2
# arrives — 1 must remain the representative for URL stability.
_add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=5)
_add(conn, 3, [0.99, 0.01, 0.0, 0.0], constructive=1)
cluster_duplicates(conn, threshold=0.86, window_days=3) # run 1: 1 is rep (score 5 > 1)
assert conn.execute("SELECT duplicate_of FROM articles WHERE id=1").fetchone()[0] is None
_add(conn, 2, [0.995, 0.01, 0.0, 0.0], constructive=9) # higher quality newcomer
cluster_duplicates(conn, threshold=0.86, window_days=3) # run 2
dup_of = {r["id"]: r["duplicate_of"] for r in conn.execute("SELECT id, duplicate_of FROM articles")}
assert dup_of[1] is None # incumbent stays canonical despite 2's higher score
assert dup_of[2] == 1 and dup_of[3] == 1
def test_distinct_articles_are_not_clustered(conn):
_add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=5)
_add(conn, 2, [0.0, 1.0, 0.0, 0.0], constructive=5)
+63
View File
@@ -0,0 +1,63 @@
"""English-only gate: non-English articles are HELD (reason_code='non_english'),
preserved (not deleted) and distinct from calm-filter rejections, so they don't
penalize a multilingual source and can be revisited when translation lands."""
from goodnews import queries
from goodnews.db import connect, init_db
from goodnews.llm import normalize_scores, upsert_article_score
def _data(**kw):
base = {
"constructive_score": 7, "cortisol_score": 1, "ragebait_score": 1, "agency_score": 5,
"human_benefit_score": 6, "novelty_score": 4, "pr_risk_score": 2, "accepted": True,
"topic": "science", "flavor": "discovery", "tags": [],
"reason_code": "ok", "reason_text": "good",
}
base.update(kw)
return base
def test_english_passes_through():
s = normalize_scores(_data(language="en"), "m")
assert s["accepted"] == 1 and s["reason_code"] == "ok" and s["language"] == "en"
def test_en_variants_count_as_english():
for lang in ("en-US", "EN", "en_us", "en-GB"):
assert normalize_scores(_data(language=lang), "m")["accepted"] == 1
def test_non_english_is_held_not_a_rejection():
s = normalize_scores(_data(language="de"), "m")
assert s["accepted"] == 0
assert s["reason_code"] == "non_english" # distinct bucket, not a calm-filter reject
assert s["language"] == "de"
assert "non-English" in s["reason_text"]
def test_missing_or_unknown_language_defaults_to_english():
# a model hiccup must never silently drop genuine English content
assert normalize_scores(_data(language=""), "m")["accepted"] == 1
assert normalize_scores(_data(language="und"), "m")["accepted"] == 1
assert normalize_scores(_data(), "m")["accepted"] == 1 # no language key at all
def test_non_english_buckets_even_a_content_reject():
# a non-English item that was also content-rejected is still 'held', so source
# metrics can separate language-holds from calm rejections cleanly
s = normalize_scores(_data(language="es", accepted=False, reason_code="ragebait"), "m")
assert s["accepted"] == 0 and s["reason_code"] == "non_english"
def test_language_persisted_structurally_and_inspector_marks_held():
c = connect(":memory:"); init_db(c)
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'S','http://s/f',5)")
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash) VALUES (1,1,'http://x','T','h1')")
c.commit()
upsert_article_score(c, 1, normalize_scores(_data(language="de"), "m"))
row = c.execute("SELECT accepted, reason_code, language FROM article_scores WHERE article_id=1").fetchone()
assert row["language"] == "de" and row["reason_code"] == "non_english" and row["accepted"] == 0 # structured, not parsed
# inspector: shows under 'held', flagged held=True, and NOT under 'rejected'
held = queries.source_articles(c, 1, filter="held")
assert len(held) == 1 and held[0]["held"] is True
assert queries.source_articles(c, 1, filter="rejected") == []
+134
View File
@@ -0,0 +1,134 @@
"""Memory Match server state — light, durability-only (no anti-cheat; the board is
deterministic and fully visible). Locks: malformed keys dropped, matched stored as
deduped face KEYS, `done` DERIVED from the matched count vs the tier's target (never
trusted from the client), cross-device merge unions matched, and the sync endpoint
accepts only valid match variants."""
import pytest
from fastapi.testclient import TestClient
from goodnews import games
from goodnews.db import connect, init_db
def _san(variant, state):
return games.sanitize_game_state(None, "match", variant, "2026-06-16", state)
def test_sanitize_drops_junk_and_dedupes():
s = _san("standard-icons", {
"matched": ["leaf", "leaf", "color-rose", "banana", "BAD KEY!", 42, "x" * 40, "sun"],
"moves": -5, "done": "yes",
})
# deduped + validated against the real face set ("banana"/junk dropped), order kept
assert s["matched"] == ["leaf", "color-rose", "sun"]
assert s["moves"] == 0 # clamped ≥ 0
assert s["done"] is False # 3 < 8 faces — client's "yes" ignored
def test_done_is_derived_from_matched_count_not_client_flag():
real8 = ["leaf", "sun", "star", "moon", "cloud", "wave", "tree", "heart"] # real faces
# client lies that it's done with no progress → server says not done
assert _san("standard-icons", {"matched": [], "done": True})["done"] is False
# reaching the tier's face target (standard = 8) → done
assert _san("standard-icons", {"matched": real8, "done": False})["done"] is True
# gentle target is only 6
assert _san("gentle-icons", {"matched": real8[:6]})["done"] is True
assert _san("standard-icons", {"matched": real8[:6]})["done"] is False
def test_sanitize_caps_face_count():
many = ["color-rose", "color-coral", "color-amber", "color-gold", "color-lime",
"color-green", "color-teal", "color-cyan", "color-sky", "color-blue",
"color-indigo", "color-violet", "color-plum", "color-brown", "color-sand"] # 15 real
s = _san("expert-colors", {"matched": many})
assert len(s["matched"]) == 12 # _MATCH_MAX_FACES
def test_merge_unions_matched_and_keeps_moves_without_trusting_done():
a = {"matched": ["leaf", "sun"], "moves": 7, "done": False}
b = {"matched": ["sun", "star"], "moves": 4, "done": True}
m = games.merge_game_state("match", a, b)
assert sorted(m["matched"]) == ["leaf", "star", "sun"] # union
assert m["moves"] == 7 # larger move count
assert "done" not in m # merge doesn't carry done; sanitize derives it
@pytest.fixture
def api_app(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
import importlib
import goodnews.api as api
importlib.reload(api)
c = connect(str(db)); init_db(c); c.commit(); c.close()
return api.create_app()
def _signin(app, email="p@b.com"):
tc = TestClient(app)
sent = {}
import goodnews.email_send as es
orig = es.send_magic_link
es.send_magic_link = lambda to, link: sent.update(link=link)
try:
tc.post("/api/auth/email/start", json={"email": email})
tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
finally:
es.send_magic_link = orig
return tc
def _put(tc, variant, state):
return tc.put("/api/games/state", json={
"game": "match", "variant": variant, "date": "2026-06-16", "state": state})
def test_sync_endpoint_flow(api_app):
tc = _signin(api_app)
r1 = _put(tc, "standard-icons", {"matched": ["leaf", "sun"], "moves": 3, "done": False})
assert r1.status_code == 200
assert sorted(r1.json()["state"]["matched"]) == ["leaf", "sun"]
assert r1.json()["state"]["done"] is False
# a second device merges in (still partial → not done)
r2 = _put(tc, "standard-icons", {"matched": ["star"], "moves": 1, "done": True})
assert sorted(r2.json()["state"]["matched"]) == ["leaf", "star", "sun"]
assert r2.json()["state"]["done"] is False # 3 < 8, client's done ignored
# completing the board (8 real faces) → done
r3 = _put(tc, "standard-icons",
{"matched": ["leaf", "sun", "star", "moon", "cloud", "wave", "tree", "heart"], "moves": 12})
assert r3.json()["state"]["done"] is True
# unknown variant rejected
assert _put(tc, "huge-icons", {}).status_code == 404
def test_batch_endpoint_reconciles_many_and_drops_bad(api_app):
tc = _signin(api_app)
body = {"date": "2026-06-16", "items": [
{"game": "match", "variant": "standard-icons", "state": {"matched": ["leaf", "sun"], "moves": 2}},
{"game": "bloom", "variant": "", "state": {"found": []}},
{"game": "match", "variant": "bogus-xyz", "state": {}}, # unknown variant → dropped
]}
r = tc.put("/api/games/state/batch", json=body)
assert r.status_code == 200
states = r.json()["states"]
variants = {(s["game"], s["variant"]) for s in states}
assert ("match", "standard-icons") in variants
assert ("bloom", "") in variants
assert ("match", "bogus-xyz") not in variants # invalid item dropped, not fatal
m = next(s for s in states if s["variant"] == "standard-icons")
assert sorted(m["state"]["matched"]) == ["leaf", "sun"] # merged + sanitized
# a second device merges via the same batch path
r2 = tc.put("/api/games/state/batch", json={"date": "2026-06-16", "items": [
{"game": "match", "variant": "standard-icons", "state": {"matched": ["star"], "moves": 5}}]})
m2 = r2.json()["states"][0]["state"]
assert sorted(m2["matched"]) == ["leaf", "star", "sun"] and m2["moves"] == 5
def test_batch_endpoint_signed_out_echoes(api_app):
from fastapi.testclient import TestClient
r = TestClient(api_app).put("/api/games/state/batch", json={"date": "2026-06-16", "items": [
{"game": "match", "variant": "gentle-colors", "state": {"matched": ["color-rose"]}}]})
assert r.status_code == 200
assert r.json()["states"][0]["state"] == {"matched": ["color-rose"]} # echo, no sync
+329
View File
@@ -0,0 +1,329 @@
"""Publishing Desk Phase 1 — queue logic, top-up/dedup semantics, comparative LLM
ranking with deterministic fallback, verified handle resolution, status transitions."""
from datetime import datetime, timedelta, timezone
import pytest
from goodnews import publishing
from goodnews.db import connect, init_db
BASE = "https://ub.test"
def _ts(hours_ago: float) -> str:
return (datetime.now(timezone.utc) - timedelta(hours=hours_ago)).strftime("%Y-%m-%d %H:%M:%S")
@pytest.fixture
def conn():
c = connect(":memory:"); init_db(c)
yield c
c.close()
def _src(c, sid, x_handle=None, paywall_override=None, content_visible=1):
c.execute(
"INSERT INTO sources (id,name,feed_url,trust_score,content_visible,x_handle,paywall_override) "
"VALUES (?,?,?,?,?,?,?)",
(sid, f"Source {sid}", f"http://s{sid}/feed", 5, content_visible, x_handle, paywall_override),
)
def _article(c, aid, sid, *, accepted=1, dup=None, novelty=5, constructive=5, topic="science",
url=None, image="http://img/x.jpg", hours_ago=1.0, complete=True):
c.execute(
"INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url,published_at) "
"VALUES (?,?,?,?,?,?,?)",
(aid, sid, url or f"https://ex{aid}.com/a", f"Title {aid}", f"h{aid}", image, _ts(hours_ago)),
)
if dup is not None:
c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
c.execute(
"INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score,topic,reason_code) "
"VALUES (?,?,?,?,?, 'ok')", (aid, accepted, novelty, constructive, topic),
)
if complete:
c.execute(
"INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
"VALUES (?,?,?,?,?)", (aid, f"Summary {aid}", "wh", "wm", "wb"),
)
def _seed_n(c, n):
"""n eligible articles, each from its own source (so diversity caps don't interfere)."""
for i in range(1, n + 1):
_src(c, i)
_article(c, i, i, novelty=10 - i, topic=f"t{i}")
c.commit()
class FakeClient:
def __init__(self, ranked):
self._ranked = ranked
def rank_for_social(self, candidates):
return self._ranked
class BoomClient:
def rank_for_social(self, candidates):
raise RuntimeError("model down")
# --- handle resolution ----------------------------------------------------------
def test_handles_source_first_then_entities_deduped_capped(conn):
publishing.add_entity_handle(conn, "Anthropic", "AnthropicAI", "https://x.com/AnthropicAI")
publishing.add_entity_handle(conn, "NASA", "NASA")
out = publishing.resolve_handles(conn, ["Anthropic", "NASA", "Unknown Org"], source_handle="Phys_org")
assert out[0]["via"] == "source" and out[0]["handle"] == "@Phys_org"
assert len(out) == 2 # capped at 2
assert out[1]["handle"] == "@AnthropicAI" # first matched entity; NASA dropped by cap
assert all(h["handle"].startswith("@") for h in out)
def test_handles_aliases_resolve_consistently(conn):
publishing.add_entity_handle(conn, "Johns Hopkins University", "HopkinsMedicine")
publishing.add_entity_handle(conn, "Johns Hopkins", "HopkinsMedicine") # alias row, same handle
a = publishing.resolve_handles(conn, ["Johns Hopkins University"])
b = publishing.resolve_handles(conn, ["johns hopkins"])
assert a and b and a[0]["handle"] == b[0]["handle"] == "@HopkinsMedicine"
def test_handles_unknown_entity_is_not_guessed(conn):
assert publishing.resolve_handles(conn, ["Some Random Startup"]) == []
def test_normalization_does_not_collide_identity_words(conn):
# a handle stored for the SCHOOL must not get suggested for the STATE
publishing.add_entity_handle(conn, "University of California", "UCBerkeley")
assert publishing.resolve_handles(conn, ["California"]) == [] # no false match
got = publishing.resolve_handles(conn, ["University of California"])
assert got and got[0]["handle"] == "@UCBerkeley" # exact still resolves
def test_normalization_preserves_the_and_strips_only_trailing_legal(conn):
# "the" is never dropped, and legal suffixes only strip from the END
assert publishing.normalize_entity("The Who") == "the who" # not "who"
assert publishing.normalize_entity("Inc. Magazine") == "inc magazine" # leading legal kept
assert publishing.normalize_entity("Apple Inc") == "apple" # trailing legal stripped
# so "The Who" and "WHO" resolve to their OWN handles, no cross-match
publishing.add_entity_handle(conn, "The Who", "TheWho")
publishing.add_entity_handle(conn, "WHO", "WHO")
assert publishing.resolve_handles(conn, ["The Who"])[0]["handle"] == "@TheWho"
assert publishing.resolve_handles(conn, ["WHO"])[0]["handle"] == "@WHO"
def test_invalid_handles_are_rejected_not_stored(conn):
for bad in ("", "@", "not a handle", "https://x.com/NASA", "NASA!", "way_too_long_handle_x"):
assert publishing.valid_handle(bad) is None
assert publishing.add_entity_handle(conn, "Some Org", bad) is False
# good ones: tolerate one leading @, store canonical
assert publishing.valid_handle("@NASA") == "NASA"
assert publishing.add_entity_handle(conn, "NASA", "@NASA") is True
assert publishing.resolve_handles(conn, ["NASA"])[0]["handle"] == "@NASA"
# a junk source handle is never suggested either
assert publishing.resolve_handles(conn, [], source_handle="@bad handle!") == []
# --- eligibility ----------------------------------------------------------------
def test_eligibility_excludes_the_unfit(conn):
_src(c=conn, sid=1)
_article(conn, 1, 1) # eligible
_article(conn, 2, 1, accepted=0) # rejected
_article(conn, 3, 1, dup=1) # duplicate
_article(conn, 4, 1, complete=False) # no complete summary
_article(conn, 5, 1, hours_ago=24 * 10) # too old
_src(conn, 2, content_visible=0)
_article(conn, 6, 2) # source hidden
_src(conn, 3, paywall_override="paywalled")
_article(conn, 7, 3) # paywalled
conn.commit()
ids = {c["id"] for c in publishing.eligible_candidates(conn)}
assert ids == {1}
# --- build: deterministic fallback + top-up/dedup -------------------------------
def test_build_tops_up_to_target_and_dedups(conn):
_seed_n(conn, 6)
r1 = publishing.build_queue(conn, BASE, client=None, target=3)
assert r1["added"] == 3 and r1["ranked_by"] == "deterministic"
q = publishing.list_queue(conn)
assert len(q) == 3 and all(i["share_url"].startswith(BASE + "/a/") for i in q)
assert "utm_source=x" in q[0]["share_url"]
# rebuild at same target → already full → adds nothing (no duplicates)
assert publishing.build_queue(conn, BASE, client=None, target=3)["added"] == 0
# post one → a slot frees → next rebuild tops up with a NEW article, never the posted one
posted_id = q[0]["id"]; posted_article = q[0]["article_id"]
publishing.set_status(conn, posted_id, "posted")
r3 = publishing.build_queue(conn, BASE, client=None, target=3)
assert r3["added"] == 1
active_articles = {i["article_id"] for i in publishing.list_queue(conn)}
assert posted_article not in active_articles # posted never re-queued
def test_build_preserves_saved_draft_on_requeue(conn):
# a snoozed item that becomes eligible again must keep its draft text
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
sid = publishing.list_queue(conn)[0]["id"]
publishing.save_draft(conn, sid, "my carefully written blurb")
# force an EXPIRED snooze directly (set_status rightly refuses a past date)
conn.execute("UPDATE outbound_shares SET status='snoozed', snooze_until=? WHERE id=?", (_ts(1), sid))
conn.commit()
publishing.build_queue(conn, BASE, client=None, target=1) # re-queues it
row = conn.execute("SELECT status, draft_text FROM outbound_shares WHERE id=?", (sid,)).fetchone()
assert row["status"] == "queued" and row["draft_text"] == "my carefully written blurb"
# --- build: comparative LLM ranking + fallback ----------------------------------
def test_build_uses_llm_ranking_and_attaches_fields(conn):
_seed_n(conn, 3)
publishing.add_entity_handle(conn, "NASA", "NASA")
ranked = [
{"id": 3, "social_score": 9, "why": "wow", "talking_points": ["a", "b", "c"],
"angle": "ask a question", "entities": ["NASA"]},
{"id": 1, "social_score": 4, "why": "ok", "talking_points": [], "angle": "", "entities": []},
]
r = publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=2)
assert r["ranked_by"] == "llm" and r["added"] == 2
q = publishing.list_queue(conn)
top = q[0]
assert top["article_id"] == 3 and top["social_score"] == 9 # LLM order wins
assert top["talking_points"] == ["a", "b", "c"] and top["angle"] == "ask a question"
assert any(h["handle"] == "@NASA" for h in top["suggested_handles"])
def test_build_falls_back_when_llm_errors(conn):
_seed_n(conn, 3)
r = publishing.build_queue(conn, BASE, client=BoomClient(), target=2)
assert r["ranked_by"] == "deterministic" and r["added"] == 2 # model down ≠ broken Desk
def test_deterministic_fallback_seeds_aids_but_leaves_score_and_angle_empty(conn):
# Codex Fix-1: with no LLM, the card still carries writing aids (rationale +
# talking points from the already-generated summary), but interest score and
# angle stay None on purpose — those are LLM-only judgments, never manufactured.
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
item = publishing.list_queue(conn)[0]
assert item["rationale"] == "Summary 1" # seeded from the summary
assert item["talking_points"] == ["wh", "wm", "wb"] # seeded from the explanation
assert item["social_score"] is None and item["angle"] is None # LLM-only, left empty
# --- adversarial: malformed LLM output ------------------------------------------
def test_duplicate_llm_ids_do_not_inflate_the_queue(conn):
# the model repeats id 1; only 2 real articles exist. added/active must reflect
# ACTUAL unique rows, never the inflated loop count Codex saw.
_seed_n(conn, 2)
ranked = [{"id": 1, "social_score": 9}, {"id": 1, "social_score": 9},
{"id": 1, "social_score": 9}, {"id": 2, "social_score": 5}]
r = publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=5)
q = publishing.list_queue(conn)
assert r["added"] == len(q) == 2 # not 5, not 3
assert len({i["article_id"] for i in q}) == 2 # unique articles
def test_string_fields_do_not_become_char_arrays(conn):
# model returns strings where lists are expected; build must store [], not ['f','a'..]
_seed_n(conn, 1)
ranked = [{"id": 1, "social_score": 7, "talking_points": "fact", "entities": "NASA"}]
publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=1)
item = publishing.list_queue(conn)[0]
assert item["talking_points"] == [] and item["entities"] == []
# --- lifecycle enforcement ------------------------------------------------------
def test_posted_is_terminal_and_cannot_be_requeued(conn):
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
sid = publishing.list_queue(conn)[0]["id"]
assert publishing.set_status(conn, sid, "posted") is True
assert publishing.set_status(conn, sid, "queued") is False # no resurrection
assert publishing.restore(conn, sid) is False # restore won't revive posted
assert conn.execute("SELECT status FROM outbound_shares WHERE id=?", (sid,)).fetchone()["status"] == "posted"
def test_late_autosave_is_rejected_after_terminal(conn):
# Codex Fix-2: a debounced autosave that lands AFTER the item is posted must
# not write to the terminal row (no clobbering what was actually published).
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
sid = publishing.list_queue(conn)[0]["id"]
assert publishing.save_draft(conn, sid, "draft while active") is True
publishing.set_status(conn, sid, "posted")
assert publishing.save_draft(conn, sid, "late autosave") is False # no-op on terminal
row = conn.execute("SELECT draft_text FROM outbound_shares WHERE id=?", (sid,)).fetchone()
assert row["draft_text"] == "draft while active" # the late write was ignored
def test_posted_rows_never_appear_in_queue_or_archived_tray(conn):
# Codex Fix-4: posted history is terminal and excluded everywhere the UI lists
# rows — neither the working queue nor the archived tray ever grows with it.
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
sid = publishing.list_queue(conn)[0]["id"]
publishing.set_status(conn, sid, "posted")
assert publishing.list_queue(conn) == [] # not in working queue
assert publishing.list_queue(conn, include_archived=True) == [] # not in archived tray
def test_snooze_requires_a_future_date(conn):
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
sid = publishing.list_queue(conn)[0]["id"]
assert publishing.set_status(conn, sid, "snoozed", snooze_until=None) is False # null
assert publishing.set_status(conn, sid, "snoozed", snooze_until=_ts(1)) is False # past
assert publishing.set_status(conn, sid, "snoozed", snooze_until=_ts(-48)) is True # future
# leaving snooze later (via restore) clears the date
publishing.restore(conn, sid)
assert conn.execute("SELECT snooze_until FROM outbound_shares WHERE id=?", (sid,)).fetchone()["snooze_until"] is None
# --- status transitions + restore + snooze --------------------------------------
def test_skip_is_reversible_and_snooze_is_separate(conn):
_seed_n(conn, 2)
publishing.build_queue(conn, BASE, client=None, target=2)
q = publishing.list_queue(conn)
a, b = q[0]["id"], q[1]["id"]
publishing.set_status(conn, a, "skipped")
assert a not in {i["id"] for i in publishing.list_queue(conn)} # gone from working queue
assert a in {i["id"] for i in publishing.list_queue(conn, include_archived=True)} # but in the tray
assert publishing.restore(conn, a) is True
assert a in {i["id"] for i in publishing.list_queue(conn)} # restored
# snooze: not in working queue, holds a snooze_until, restorable
publishing.set_status(conn, b, "snoozed", snooze_until=_ts(-24)) # 24h in the future
row = conn.execute("SELECT status, snooze_until FROM outbound_shares WHERE id=?", (b,)).fetchone()
assert row["status"] == "snoozed" and row["snooze_until"]
assert b not in {i["id"] for i in publishing.list_queue(conn)}
def test_inflight_build_does_not_clobber_a_freshly_extended_snooze(conn):
# Build snapshots eligibility, then the model ranks. If the user RE-SNOOZES to the
# future mid-rank, the finished build must NOT revive it (only EXPIRED snoozes revive).
_seed_n(conn, 1)
publishing.build_queue(conn, BASE, client=None, target=1)
sid = publishing.list_queue(conn)[0]["id"]
conn.execute("UPDATE outbound_shares SET status='snoozed', snooze_until=? WHERE id=?", (_ts(1), sid)) # expired → eligible
conn.commit()
future = _ts(-48) # 48h ahead
class RaceClient:
def rank_for_social(self, candidates):
# mid-build interleave: user extends the snooze into the future
conn.execute("UPDATE outbound_shares SET snooze_until=? WHERE id=?", (future, sid))
conn.commit()
return [{"id": 1, "social_score": 9}]
publishing.build_queue(conn, BASE, client=RaceClient(), target=1)
row = conn.execute("SELECT status, snooze_until FROM outbound_shares WHERE id=?", (sid,)).fetchone()
assert row["status"] == "snoozed" and row["snooze_until"] == future # left alone, not re-queued
+118
View File
@@ -0,0 +1,118 @@
"""Publishing Desk Phase 1 — admin API: gating, background build (deterministic
fallback), lifecycle enforcement, snooze validation, draft preservation, restore."""
from datetime import datetime, timedelta, timezone
import pytest
from fastapi.testclient import TestClient
from goodnews.db import connect, init_db
def _future(hours: int = 24) -> str:
return (datetime.now(timezone.utc) + timedelta(hours=hours)).strftime("%Y-%m-%d %H:%M:%S")
def _recent() -> str:
return (datetime.now(timezone.utc) - timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S")
@pytest.fixture
def api_app(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
# http (not https) so the session cookie isn't Secure-only — TestClient runs over http
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
monkeypatch.setenv("GOODNEWS_LLM_BASE_URL", "http://127.0.0.1:9") # dead → deterministic fallback, fast
import importlib
import goodnews.api as api
importlib.reload(api)
c = connect(str(db)); init_db(c)
# one eligible article (accepted, visible, complete summary, recent, readable)
c.execute("INSERT INTO sources (id,name,feed_url,trust_score,content_visible) VALUES (1,'S','http://s/f',5,1)")
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url,published_at) "
"VALUES (1,1,'https://ex.com/a','Title','h1','http://img/x.jpg',?)", (_recent(),))
c.execute("INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score,topic,reason_code) "
"VALUES (1,1,7,7,'science','ok')")
c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
"VALUES (1,'Sum','wh','wm','wb')")
c.commit(); c.close()
return api.create_app()
def _admin(app):
tc = TestClient(app)
sent = {}
import goodnews.email_send as es
orig = es.send_magic_link
es.send_magic_link = lambda to, link: sent.update(link=link)
try:
tc.post("/api/auth/email/start", json={"email": "admin@b.com"})
tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
finally:
es.send_magic_link = orig
return tc
def test_admin_gating(api_app):
anon = TestClient(api_app)
assert anon.get("/api/admin/publishing/queue").status_code == 401
assert anon.post("/api/admin/publishing/build").status_code == 401
def test_build_then_queue_deterministic(api_app):
tc = _admin(api_app)
assert tc.post("/api/admin/publishing/build").json() == {"building": True}
# TestClient runs the background task before returning; LLM URL is dead → fallback.
q = tc.get("/api/admin/publishing/queue").json()
assert q["building"] is False and q["last"]["ranked_by"] == "deterministic"
assert len(q["items"]) == 1 and q["items"][0]["article_id"] == 1
assert "utm_source=x" in q["items"][0]["share_url"]
# a second build is a no-op (already full) — never duplicates
tc.post("/api/admin/publishing/build")
assert len(tc.get("/api/admin/publishing/queue").json()["items"]) == 1
def _one(tc):
tc.post("/api/admin/publishing/build")
return tc.get("/api/admin/publishing/queue").json()["items"][0]["id"]
def test_invalid_transition_rejected(api_app):
tc = _admin(api_app)
sid = _one(tc)
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "posted"}).status_code == 200
# posted is terminal — resurrection refused
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "queued"}).status_code == 400
def test_snooze_validation(api_app):
tc = _admin(api_app)
sid = _one(tc)
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "snoozed"}).status_code == 400 # null
assert tc.post(f"/api/admin/publishing/{sid}/status",
json={"status": "snoozed", "snooze_until": "2000-01-01 00:00:00"}).status_code == 400 # past
assert tc.post(f"/api/admin/publishing/{sid}/status",
json={"status": "snoozed", "snooze_until": _future()}).status_code == 200
def test_draft_preserved_through_skip_and_restore(api_app):
tc = _admin(api_app)
sid = _one(tc)
assert tc.post(f"/api/admin/publishing/{sid}/draft", json={"draft_text": "my blurb"}).status_code == 200
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "skipped"}).status_code == 200
assert sid not in {i["id"] for i in tc.get("/api/admin/publishing/queue").json()["items"]} # left the queue
assert tc.post(f"/api/admin/publishing/{sid}/restore").status_code == 200
items = tc.get("/api/admin/publishing/queue").json()["items"]
back = next(i for i in items if i["id"] == sid)
assert back["draft_text"] == "my blurb" # work survived skip→restore
def test_save_handle_validates(api_app):
tc = _admin(api_app)
assert tc.post("/api/admin/publishing/handles",
json={"entity_name": "NASA", "handle": "@not a handle"}).status_code == 400
assert tc.post("/api/admin/publishing/handles",
json={"entity_name": "NASA", "handle": "https://x.com/NASA"}).status_code == 400
assert tc.post("/api/admin/publishing/handles",
json={"entity_name": "NASA", "handle": "@NASA"}).status_code == 200
+48 -1
View File
@@ -42,9 +42,56 @@ def test_share_page_missing_and_malformed(client):
assert tc.get("/a/999").status_code == 404 # unknown
assert tc.get("/a/not-a-number").status_code == 404 # malformed → calm 404
assert tc.get("/a/2").status_code == 404 # rejected article
assert tc.get("/a/3").status_code == 404 # duplicate
def test_share_page_duplicate_redirects_to_canonical(client):
# article 3 is a duplicate of the live article 1 — its URL may be indexed, so it
# 301s to the canonical (consolidates) rather than 404ing and dropping from Google.
r = TestClient(client).get("/a/3", follow_redirects=False)
assert r.status_code == 301 and r.headers["location"] == "/a/1"
def test_share_page_no_image_uses_summary_card(client, tmp_path, monkeypatch):
# article 1 has an image → large card
assert 'summary_large_image' in TestClient(client).get("/a/1").text
def test_incomplete_page_is_not_cached(client):
# article 1 has no summary/explanation → "generating" page must not be cached,
# and carries no-cache so it re-fetches once the summary lands.
import goodnews.api as api
r = TestClient(client).get("/a/1")
assert r.status_code == 200
assert r.headers.get("cache-control") == "no-cache"
assert 1 not in api._SHARE_CACHE
@pytest.fixture
def app_complete(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
import importlib
import goodnews.api as api
importlib.reload(api)
from goodnews.db import connect, init_db
c = connect(str(db)); init_db(c)
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) "
"VALUES (1,1,'https://bbc.com/x','Water voles return','h1','https://img/v.jpg')")
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (1,1,'Hopeful.')")
c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
"VALUES (1,'Voles are back.','They returned to the river.','Biodiversity rebound.','Quietly hopeful.')")
c.commit(); c.close()
return api
def test_complete_page_is_cached_and_served_from_cache(app_complete):
api = app_complete
tc = TestClient(api.create_app())
r1 = tc.get("/a/1")
assert r1.status_code == 200
assert r1.headers.get("cache-control") == "public, max-age=300"
assert 1 in api._SHARE_CACHE # finished page cached
r2 = tc.get("/a/1") # second hit served from cache
assert r2.status_code == 200 and r2.text == r1.text
+79
View File
@@ -0,0 +1,79 @@
"""Share page /a/{id}: a duplicate article 301-redirects to its canonical twin
instead of 404ing. A hard 404 silently drops already-indexed URLs from Google and
tanked impressions when a newer duplicate retired an older, indexed page."""
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
import importlib
import goodnews.api as api
importlib.reload(api)
from goodnews.db import connect, init_db
c = connect(str(db)); init_db(c)
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
def art(aid, *, accepted=1, dup=None, summary=True):
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
"VALUES (?,1,?,?,?,'2026-06-05T08:00:00')",
(aid, f"https://bbc.com/{aid}", f"Story {aid}", f"h{aid}"))
if dup is not None:
c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (?,?,'x')", (aid, accepted))
if summary:
c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"Summary {aid}."))
art(10) # canonical, live -> 200
art(11, dup=10) # duplicate of a live canonical -> 301 /a/10
art(12, accepted=0, dup=10) # REJECTED follower of an accepted rep -> still 301 /a/10
art(20, accepted=0) # rejected canonical
art(21, dup=20) # dup of a REJECTED canonical -> 404 (genuinely gone)
art(30, accepted=0) # rejected, not a duplicate -> 404
c.commit(); c.close()
return api.create_app()
def test_canonical_serves_200(client):
r = TestClient(client).get("/a/10")
assert r.status_code == 200 and "Story 10" in r.text
def test_duplicate_301s_to_canonical(client):
r = TestClient(client).get("/a/11", follow_redirects=False)
assert r.status_code == 301
assert r.headers["location"] == "/a/10" # consolidates onto the survivor
def test_rejected_follower_of_accepted_rep_still_301s(client):
# Policy: the route resolves duplicate_of BEFORE the follower's own acceptance, so a
# rejected article that points at an ACCEPTED representative 301s to it rather than
# 404ing. That's intentional — it sends the visitor/crawler to a serveable equivalent.
r = TestClient(client).get("/a/12", follow_redirects=False)
assert r.status_code == 301 and r.headers["location"] == "/a/10"
def test_duplicate_of_rejected_canonical_404s(client):
r = TestClient(client).get("/a/21", follow_redirects=False)
assert r.status_code == 404 # nothing serveable to redirect to
def test_rejected_article_404s(client):
assert TestClient(client).get("/a/30").status_code == 404
def test_missing_article_404s(client):
assert TestClient(client).get("/a/9999").status_code == 404
def test_head_matches_get_status(client):
# HEAD must return the same status as GET (not fall through to the static mount and
# 404). Some crawlers/link-checkers probe with HEAD.
tc = TestClient(client)
assert tc.head("/a/10").status_code == 200 # canonical
r = tc.head("/a/11", follow_redirects=False)
assert r.status_code == 301 and r.headers["location"] == "/a/10" # duplicate
assert tc.head("/a/9999").status_code == 404 # missing