Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker
The deploy pipeline runs from the working tree, so a wave of shipped features
had never been committed. This snapshots git to what's actually running.
SEO impression recovery (live + verified):
- Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404
(a hard 404 silently dropped already-indexed URLs and tanked impressions).
- Dedup representative selection reworked: accepted/serveable -> established
rep (URL stability) -> quality score, so an accepted page never retires to a
rejected rep and an indexed canonical doesn't churn when a newer twin arrives.
- HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of
falling through to the static mount and 404ing.
- `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the
policy to the existing corpus (shared cycle_lock context manager).
- CLI honors GOODNEWS_DB for its default --db (was silently ignored).
Publishing Desk (admin tool to post highlights to X via Web Intents):
- publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji
picker (bundled data, no CDN) for the blurb editor.
Play games + site:
- Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated).
- English-only language gate; source prospecting; paywall + dedup hardening.
Tests: full suite green (349). Ignores tightened (node_modules, data/*.db).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,134 @@
|
||||
"""Deep-preview accessibility check — content-level readable/paywalled/blocked/unknown,
|
||||
and the layered verdict (domain rule + sampled access, evidence over domain alone)."""
|
||||
import time
|
||||
|
||||
from goodnews import feeds
|
||||
from goodnews.paywall import check_article_access
|
||||
|
||||
READABLE = b"<html><body><article>" + (b"<p>Real article text here. </p>" * 80) + b"</article></body></html>"
|
||||
WALLED_SCHEMA = b'<html><head><script type="application/ld+json">{"isAccessibleForFree": false}</script></head><body><p>teaser</p></body></html>'
|
||||
WALLED_PHRASE = b"<html><body><p>Subscribe to continue reading this story.</p></body></html>"
|
||||
THIN = b"<html><body><p>hi</p></body></html>"
|
||||
|
||||
|
||||
def _fetcher(mapping):
|
||||
def f(url, timeout=8):
|
||||
if mapping.get(url) == "ERR":
|
||||
raise RuntimeError("boom")
|
||||
return mapping[url]
|
||||
return f
|
||||
|
||||
|
||||
def test_classifies_each_access_state():
|
||||
f = _fetcher({"r": READABLE, "s": WALLED_SCHEMA, "p": WALLED_PHRASE, "t": THIN, "b": "ERR"})
|
||||
assert check_article_access("r", f) == "readable"
|
||||
assert check_article_access("s", f) == "paywalled" # schema.org isAccessibleForFree:false
|
||||
assert check_article_access("p", f) == "paywalled" # explicit wall phrase
|
||||
assert check_article_access("t", f) == "unknown" # too thin to tell
|
||||
assert check_article_access("b", f) == "blocked" # fetch failed
|
||||
|
||||
|
||||
def test_does_not_falseflag_a_readable_page():
|
||||
# a long article that merely links "subscribe to our newsletter" in the footer
|
||||
html = b"<html><body><article>" + (b"<p>Lots of real content. </p>" * 100) + \
|
||||
b"<footer>Subscribe to our newsletter</footer></article></body></html>"
|
||||
assert check_article_access("x", _fetcher({"x": html})) == "readable"
|
||||
|
||||
|
||||
def _items(urls):
|
||||
return [feeds.FeedItem(title=f"T{i}", url=u, description="d", published_at=None)
|
||||
for i, u in enumerate(urls)]
|
||||
|
||||
|
||||
def test_preview_verdict_layers_domain_and_sample(monkeypatch):
|
||||
# a non-paywall-domain feed whose sampled articles mostly read fine -> "fine"
|
||||
urls = ["https://good.example/a1", "https://good.example/a2", "https://good.example/a3"]
|
||||
monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
|
||||
|
||||
class FakeClient:
|
||||
model = "test"
|
||||
def classify(self, art):
|
||||
return {"accepted": True, "topic": "science", "flavor": "discovery",
|
||||
"cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
|
||||
|
||||
def fetcher(url, timeout=10):
|
||||
return READABLE # every sampled article reads fine
|
||||
|
||||
out = feeds.preview_feed("https://good.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
|
||||
assert out["paywall_rule"] is False
|
||||
assert out["access"]["readable"] >= 1 and out["access"]["paywalled"] == 0
|
||||
assert out["access_verdict"] == "fine"
|
||||
|
||||
|
||||
def test_mostly_blocked_is_review_not_fine(monkeypatch):
|
||||
# bot-blocked sites (readable in a browser, blocked to our fetcher) must NOT read
|
||||
# as 'fine' off one sample, nor as 'reject-ready' — they land in 'review'.
|
||||
urls = [f"https://blocky.example/a{i}" for i in range(6)]
|
||||
monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
|
||||
|
||||
class FakeClient:
|
||||
model = "test"
|
||||
def classify(self, art):
|
||||
return {"accepted": True, "topic": "science", "flavor": "discovery",
|
||||
"cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
|
||||
|
||||
def fetcher(url, timeout=10):
|
||||
if url.endswith("/feed") or url.endswith("a0"):
|
||||
return READABLE # the feed fetch + one readable article
|
||||
raise RuntimeError("403 blocked") # the rest block (bot-blocked)
|
||||
|
||||
out = feeds.preview_feed("https://blocky.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
|
||||
assert out["access"]["blocked"] >= 4 and out["access"]["readable"] == 1
|
||||
assert out["access_verdict"] == "review" # thin assessable evidence → not 'fine', not 'reject-ready'
|
||||
|
||||
|
||||
def test_source_preview_endpoint_handles_null_rate(tmp_path, monkeypatch):
|
||||
# All-held (non-English) sample → acceptance_rate is None; the legacy
|
||||
# /api/source-preview must not 500 on it (SourcePreview.acceptance_rate is nullable).
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
from goodnews.db import connect, init_db
|
||||
c = connect(str(db)); init_db(c); c.commit(); c.close()
|
||||
all_held = {
|
||||
"url": "http://x/feed", "sampled": 4, "classified": True, "accepted": 0,
|
||||
"non_english": 4, "acceptance_rate": None, "avg_cortisol": 0.0, "avg_ragebait": 0.0,
|
||||
"avg_pr_risk": 0.0, "newest_published": None, "recent_7d": 0,
|
||||
"topic_mix": {}, "flavor_mix": {}, "examples_accepted": [], "examples_rejected": [],
|
||||
}
|
||||
monkeypatch.setattr(feeds, "preview_feed", lambda *a, **k: all_held)
|
||||
from fastapi.testclient import TestClient
|
||||
r = TestClient(api.create_app()).get("/api/source-preview?url=http://x/feed")
|
||||
assert r.status_code == 200 # was 500: None rejected by float field
|
||||
assert r.json()["acceptance_rate"] is None
|
||||
|
||||
|
||||
def test_one_hung_fetch_does_not_stall_the_preview(monkeypatch):
|
||||
# Codex's wall-clock audit: one article that sleeps WAY past the deadline must
|
||||
# not pin Deep Preview — it returns at the cap, with the slow one left 'unknown'.
|
||||
monkeypatch.setattr(feeds, "_ACCESS_DEADLINE_S", 0.5) # shrink the cap for the test
|
||||
urls = [f"https://mixed.example/a{i}" for i in range(6)]
|
||||
monkeypatch.setattr(feeds, "parse_feed", lambda raw: _items(urls))
|
||||
|
||||
class FakeClient:
|
||||
model = "test"
|
||||
def classify(self, art):
|
||||
return {"accepted": True, "topic": "science", "flavor": "discovery",
|
||||
"cortisol_score": 1, "ragebait_score": 1, "pr_risk_score": 2}
|
||||
|
||||
def fetcher(url, timeout=10):
|
||||
if url.endswith("a0"):
|
||||
time.sleep(5) # one ugly site hangs far past the 0.5s cap
|
||||
return READABLE
|
||||
|
||||
start = time.monotonic()
|
||||
out = feeds.preview_feed("https://mixed.example/feed", sample=8, client=FakeClient(), fetcher=fetcher)
|
||||
elapsed = time.monotonic() - start
|
||||
assert elapsed < 2.5 # returned at the cap (~0.5s), NOT after the 5s sleep
|
||||
# the hung one is 'unknown' (unverified), the rest read fine
|
||||
slow = next(e for e in out["access"]["examples"] if e["url"].endswith("a0"))
|
||||
assert slow["access"] == "unknown"
|
||||
assert out["access"]["readable"] >= 4
|
||||
@@ -114,3 +114,27 @@ def test_brief_cache_boundary(client):
|
||||
assert "public" in client.get("/api/brief").headers.get("cache-control", "")
|
||||
assert client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).headers.get("cache-control") == "private, no-store"
|
||||
assert client.get("/api/brief", params={"exclude": "3"}).headers.get("cache-control") == "private, no-store"
|
||||
|
||||
|
||||
def test_search_relevance_source_and_boundaries(client):
|
||||
import os, sqlite3, json as _j
|
||||
# A distinctively-named source proves source-name matching (the NYT use case).
|
||||
c = sqlite3.connect(os.environ["GOODNEWS_DB"])
|
||||
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (2,'Nature Digest','http://n/f',7)")
|
||||
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) "
|
||||
"VALUES (3,2,'http://n/3','Coral reefs rebound','2026-05-30T10:00:00+00:00','h3')")
|
||||
c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (3,1,'environment','hopeful')")
|
||||
c.commit(); c.close()
|
||||
# title match (index builds lazily on first search)
|
||||
assert client.get("/api/search?q=coral").json()["items"][0]["id"] == 3
|
||||
# SOURCE-NAME match — searching the publication finds its articles (Codex's requirement)
|
||||
assert 3 in [it["id"] for it in client.get("/api/search?q=nature").json()["items"]]
|
||||
# empty / junk query → empty, no error
|
||||
assert client.get("/api/search?q=").json()["count"] == 0
|
||||
assert client.get("/api/search?q=%20%21%21").json()["count"] == 0
|
||||
# boundary: a muted topic is excluded from search too (mirrors the visitor view)
|
||||
muted = client.get("/api/search", params={"q": "coral", "prefs": _j.dumps({"mute_topics": ["environment"]})}).json()
|
||||
assert muted["count"] == 0
|
||||
# boundary: a hard avoid-term filters a textual match
|
||||
avoided = client.get("/api/search", params={"q": "election", "prefs": _j.dumps({"avoid_terms": ["election"]})}).json()
|
||||
assert all(it["id"] != 2 for it in avoided["items"])
|
||||
|
||||
@@ -0,0 +1,255 @@
|
||||
"""Bloom — the daily word wheel. Locks the design/acceptance split:
|
||||
|
||||
• DESIGN (deterministic, stored): wheel + tiers + pangram + Full-Bloom target,
|
||||
from the COMMON list. The PERMANENT guardrail — Flourishing reachable with
|
||||
common words — still holds.
|
||||
• ACCEPTANCE (broad + dynamic): every valid word buildable from the wheel,
|
||||
computed live as broad dict ∪ {allow} − {block}; runtime admin overrides +
|
||||
player reports drive curation with no deploy.
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from goodnews import bloom, games
|
||||
from goodnews.db import connect, init_db
|
||||
|
||||
DATES = [f"2026-06-{d:02d}" for d in range(10, 25)] # 15 sample days
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def designs():
|
||||
return {d: bloom.build_puzzle(d) for d in DATES}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn(tmp_path):
|
||||
c = connect(str(tmp_path / "t.db"))
|
||||
init_db(c)
|
||||
c.execute("INSERT INTO users (email) VALUES ('a@b.c')")
|
||||
c.commit()
|
||||
return c
|
||||
|
||||
|
||||
def _letters(p):
|
||||
return frozenset(p["center"]) | frozenset(p["outer"])
|
||||
|
||||
|
||||
def _commons_for(p):
|
||||
"""COMMON words for a center-mode wheel (the designed puzzle)."""
|
||||
L = _letters(p)
|
||||
return [w for w in bloom._COMMON if p["center"] in w and frozenset(w) <= L]
|
||||
|
||||
|
||||
def _assert_no_answer_leak(resp):
|
||||
assert "words" not in resp
|
||||
assert resp["accepted"] and all(
|
||||
isinstance(h, str) and len(h) == 64 and set(h) <= set("0123456789abcdef")
|
||||
for h in resp["accepted"])
|
||||
|
||||
|
||||
# --- DESIGN (deterministic, common-based) --------------------------------------
|
||||
|
||||
def test_build_is_deterministic():
|
||||
assert bloom.build_puzzle("2026-06-15") == bloom.build_puzzle("2026-06-15")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("date", DATES)
|
||||
def test_design_shape(designs, date):
|
||||
p = designs[date]
|
||||
L = _letters(p)
|
||||
assert len(L) == 7 and "s" not in L
|
||||
assert p["center"] in L and len(p["outer"]) == 6
|
||||
assert bloom.MIN_COMMON_WORDS <= len(_commons_for(p)) <= bloom.MAX_COMMON_WORDS
|
||||
assert frozenset(p["pangram"]) == L # display pangram uses all 7
|
||||
assert p["pangram"] in bloom._COMMON and p["pangram"] not in bloom._AVOID
|
||||
|
||||
|
||||
@pytest.mark.parametrize("date", DATES)
|
||||
def test_PERMANENT_top_tier_reachable_with_common_words(designs, date):
|
||||
"""Flourishing reachable from COMMON words alone — never obscure-word hunting."""
|
||||
p = designs[date]
|
||||
flourishing = next(t["score"] for t in p["tiers"] if t["name"] == "Flourishing")
|
||||
assert bloom.score_words(p, _commons_for(p)) >= flourishing
|
||||
|
||||
|
||||
def test_tiers_are_8_30_70_of_common_and_max_is_common_total():
|
||||
p = bloom.build_puzzle("2026-06-15")
|
||||
assert [t["name"] for t in p["tiers"]] == ["Sprouting", "Budding", "Blooming", "Flourishing"]
|
||||
common_total = bloom.score_words(p, _commons_for(p))
|
||||
assert p["max_score"] == common_total # Full Bloom = the designed puzzle
|
||||
flour = next(t["score"] for t in p["tiers"] if t["name"] == "Flourishing")
|
||||
assert flour == int(0.70 * common_total) and flour <= p["max_score"]
|
||||
|
||||
|
||||
# --- ACCEPTANCE (broad + dynamic) ----------------------------------------------
|
||||
|
||||
def test_accept_is_broad_and_obeys_center_rule(conn):
|
||||
p = bloom.build_puzzle("2026-06-15")
|
||||
acc = bloom.accepted_words(conn, p["center"], p["outer"], require_center=True)
|
||||
L = _letters(p)
|
||||
for w in acc:
|
||||
assert len(w) >= 4 and "s" not in w and frozenset(w) <= L and p["center"] in w
|
||||
# broad accept is a SUPERSET of the common puzzle (bonus words beyond design)
|
||||
assert set(_commons_for(p)) <= set(acc)
|
||||
assert len(acc) > len(_commons_for(p))
|
||||
|
||||
def test_arraign_class_words_auto_accepted():
|
||||
# broad dict includes real-but-rare words without any include-list
|
||||
for w in ("arraign", "feign", "crwth"):
|
||||
assert w in set(bloom.ACCEPT)
|
||||
|
||||
def test_overrides_block_and_allow(conn):
|
||||
p = bloom.build_puzzle("2026-06-15")
|
||||
acc0 = set(bloom.accepted_words(conn, p["center"], p["outer"], True))
|
||||
victim = sorted(acc0)[0]
|
||||
bloom.set_override(conn, victim, "block", by="t")
|
||||
assert victim not in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
|
||||
# allow a made-up letter-combo that fits the wheel + center
|
||||
fake = (p["center"] + "".join(p["outer"][:3]))[:5]
|
||||
if "s" not in fake and len(fake) >= 4:
|
||||
bloom.set_override(conn, fake, "allow", by="t")
|
||||
assert fake in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
|
||||
bloom.clear_override(conn, victim)
|
||||
assert victim in set(bloom.accepted_words(conn, p["center"], p["outer"], True))
|
||||
|
||||
def test_allow_override_rejects_inert_hard_rule_words(conn):
|
||||
# an allow that could never count (too short / has 's') is rejected, not stored
|
||||
assert bloom.set_override(conn, "cat", "allow") is False # < 4 letters
|
||||
assert bloom.set_override(conn, "roses", "allow") is False # contains 's'
|
||||
assert bloom.set_override(conn, "bloom", "allow") is True # valid → stored
|
||||
allow, _ = bloom.overrides(conn)
|
||||
assert allow == {"bloom"}
|
||||
# block stays permissive (can block anything)
|
||||
assert bloom.set_override(conn, "roses", "block") is True
|
||||
|
||||
|
||||
def test_wild_accepts_words_without_center(conn):
|
||||
p = bloom.build_free("seed-w", "wild")
|
||||
acc = bloom.accepted_words(conn, p["center"], p["outer"], require_center=False)
|
||||
assert any(p["center"] not in w for w in acc) # Wild's defining trait
|
||||
assert all(frozenset(w) <= _letters(p) for w in acc)
|
||||
|
||||
|
||||
# --- responses + storage -------------------------------------------------------
|
||||
|
||||
def test_generate_is_idempotent_and_stored(conn):
|
||||
a = bloom.generate_bloom_puzzle(conn, "2026-06-15")
|
||||
assert a == bloom.generate_bloom_puzzle(conn, "2026-06-15") == bloom.stored_payload(conn, "2026-06-15")
|
||||
assert "words" not in a # design payload holds no answers
|
||||
|
||||
def test_response_no_leak_and_hash_roundtrip(conn):
|
||||
r = bloom.bloom_response(conn, "2026-06-15")
|
||||
_assert_no_answer_leak(r)
|
||||
p = bloom.stored_payload(conn, "2026-06-15")
|
||||
real = bloom.accepted_words(conn, p["center"], p["outer"], True)[0]
|
||||
assert bloom.word_hash("2026-06-15", real) in set(r["accepted"])
|
||||
assert bloom.word_hash("2026-06-15", "zzzzq") not in set(r["accepted"])
|
||||
assert r["max_score"] == p["max_score"]
|
||||
|
||||
def test_free_endpoint_resumes_and_leaks_nothing(api_app):
|
||||
tc = TestClient(api_app)
|
||||
r1 = tc.get("/api/puzzle/bloom/free?format=wild").json()
|
||||
seed = r1["seed"]
|
||||
assert r1["mode"] == "free" and r1["format"] == "wild" and seed
|
||||
r2 = tc.get(f"/api/puzzle/bloom/free?format=wild&seed={seed}").json()
|
||||
assert r2["center"] == r1["center"] and r2["outer"] == r1["outer"]
|
||||
_assert_no_answer_leak(r1)
|
||||
|
||||
|
||||
# --- server-side state ---------------------------------------------------------
|
||||
|
||||
def test_sanitize_drops_junk_recomputes_score_and_full(conn):
|
||||
p = bloom.generate_bloom_puzzle(conn, "2026-06-15")
|
||||
acc = bloom.accepted_words(conn, p["center"], p["outer"], True)
|
||||
good = acc[:3]
|
||||
clean = games.sanitize_game_state(conn, "bloom", "", "2026-06-15",
|
||||
{"found": good + ["zzzz", "ab", good[0], 9], "score": 9999})
|
||||
assert sorted(clean["found"]) == sorted(set(good))
|
||||
assert clean["score"] == bloom.score_words(p, good)
|
||||
assert "full" not in clean
|
||||
# finding the whole common puzzle ⇒ Full Bloom (score ≥ max_score)
|
||||
full = games.sanitize_game_state(conn, "bloom", "", "2026-06-15", {"found": _commons_for(p)})
|
||||
assert full.get("full") is True
|
||||
|
||||
def test_merge_unions_found():
|
||||
m = games.merge_game_state("bloom", {"found": ["able", "bake"]}, {"found": ["bake", "tale"]})
|
||||
assert sorted(m["found"]) == ["able", "bake", "tale"]
|
||||
|
||||
def test_block_override_takes_effect_without_regen(conn):
|
||||
# the live response reflects an override with no puzzle regeneration
|
||||
p = bloom.generate_bloom_puzzle(conn, "2026-06-15")
|
||||
victim = bloom.accepted_words(conn, p["center"], p["outer"], True)[0]
|
||||
before = set(bloom.bloom_response(conn, "2026-06-15")["accepted"])
|
||||
bloom.set_override(conn, victim, "block", by="t")
|
||||
after = set(bloom.bloom_response(conn, "2026-06-15")["accepted"])
|
||||
assert bloom.word_hash("2026-06-15", victim) in before
|
||||
assert bloom.word_hash("2026-06-15", victim) not in after
|
||||
|
||||
|
||||
# --- reports → admin queue → overrides -----------------------------------------
|
||||
|
||||
def test_report_then_approve_creates_allow_override(conn):
|
||||
assert bloom.add_report(conn, "arraign", "2026-06-15", "daily", "center", "aceglnr", "not in the word list")
|
||||
assert bloom.add_report(conn, "arraign", "2026-06-15", "daily", "center", "aceglnr", "x") # dedup pending
|
||||
pending = bloom.list_reports(conn, "pending")
|
||||
assert len(pending) == 1 and pending[0]["word"] == "arraign"
|
||||
assert bloom.resolve_report(conn, pending[0]["id"], "approve", by="admin")
|
||||
allow, _ = bloom.overrides(conn)
|
||||
assert "arraign" in allow
|
||||
assert not bloom.list_reports(conn, "pending")
|
||||
assert bloom.list_reports(conn, "approved")
|
||||
|
||||
def test_report_block_creates_block_override(conn):
|
||||
bloom.add_report(conn, "uglyword", None, "free", "wild", "abcdefg", "x")
|
||||
rid = bloom.list_reports(conn, "pending")[0]["id"]
|
||||
bloom.resolve_report(conn, rid, "block", by="admin")
|
||||
_, block = bloom.overrides(conn)
|
||||
assert "uglyword" in block
|
||||
|
||||
|
||||
# --- API: public report + admin endpoints --------------------------------------
|
||||
|
||||
@pytest.fixture
|
||||
def api_app(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
|
||||
monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
c = connect(str(db)); init_db(c); c.commit(); c.close()
|
||||
return api.create_app()
|
||||
|
||||
|
||||
def _admin(app):
|
||||
tc = TestClient(app)
|
||||
sent = {}
|
||||
import goodnews.email_send as es
|
||||
orig = es.send_magic_link
|
||||
es.send_magic_link = lambda to, link: sent.update(link=link)
|
||||
try:
|
||||
tc.post("/api/auth/email/start", json={"email": "admin@b.com"})
|
||||
tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
|
||||
finally:
|
||||
es.send_magic_link = orig
|
||||
return tc
|
||||
|
||||
|
||||
def test_public_report_then_admin_queue_flow(api_app):
|
||||
pub = TestClient(api_app)
|
||||
assert pub.post("/api/bloom/report", json={"word": "arraign", "date": "2026-06-15",
|
||||
"mode": "daily", "format": "center", "letters": "aceglnr",
|
||||
"reason": "not in the word list"}).json()["ok"]
|
||||
# admin-only queue
|
||||
assert TestClient(api_app).get("/api/admin/bloom/reports").status_code == 401
|
||||
tc = _admin(api_app)
|
||||
q = tc.get("/api/admin/bloom/reports").json()
|
||||
assert len(q["reports"]) == 1
|
||||
rid = q["reports"][0]["id"]
|
||||
assert tc.post(f"/api/admin/bloom/reports/{rid}", json={"action": "approve"}).json()["ok"]
|
||||
ovr = tc.get("/api/admin/bloom/reports").json()["overrides"]
|
||||
assert any(o["word"] == "arraign" and o["action"] == "allow" for o in ovr)
|
||||
@@ -5,6 +5,7 @@ from goodnews.sources import (
|
||||
list_candidates,
|
||||
promote_candidate,
|
||||
reject_candidate,
|
||||
restore_candidate,
|
||||
save_candidate,
|
||||
)
|
||||
|
||||
@@ -32,6 +33,18 @@ def test_re_preview_preserves_curator_status(conn):
|
||||
assert list_candidates(conn)[0]["status"] == "rejected"
|
||||
|
||||
|
||||
def test_restore_sends_rejected_back_to_staging(conn):
|
||||
save_candidate(conn, "http://x/feed")
|
||||
cid = list_candidates(conn)[0]["id"]
|
||||
reject_candidate(conn, cid)
|
||||
assert list_candidates(conn)[0]["status"] == "rejected"
|
||||
# restore → back to staging ('suggested'), re-enters the pending queue
|
||||
assert restore_candidate(conn, cid) is True
|
||||
assert list_candidates(conn)[0]["status"] == "suggested"
|
||||
# restoring a non-rejected candidate is a no-op (only un-rejects)
|
||||
assert restore_candidate(conn, cid) is False
|
||||
|
||||
|
||||
def test_promote_creates_inactive_source_and_marks_promoted(conn):
|
||||
cand = save_candidate(conn, "http://x/feed", name="Lovely Feed")
|
||||
source_id = promote_candidate(conn, cand["id"]) # inactive by default
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
"""CLI honors GOODNEWS_DB for its default --db, matching db.connect. Without this, a
|
||||
copy-DB maintenance run (e.g. `dedup --force-recluster`) silently targets production."""
|
||||
from pathlib import Path
|
||||
|
||||
from goodnews.cli import DEFAULT_DB, _default_db
|
||||
|
||||
|
||||
def test_default_db_honors_env(monkeypatch):
|
||||
monkeypatch.setenv("GOODNEWS_DB", "/tmp/some-copy.sqlite3")
|
||||
assert _default_db() == Path("/tmp/some-copy.sqlite3")
|
||||
|
||||
|
||||
def test_default_db_falls_back_to_bundled(monkeypatch):
|
||||
monkeypatch.delenv("GOODNEWS_DB", raising=False)
|
||||
assert _default_db() == DEFAULT_DB
|
||||
+31
-3
@@ -35,7 +35,7 @@ def conn():
|
||||
c.close()
|
||||
|
||||
|
||||
def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00"):
|
||||
def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00", accepted=1):
|
||||
conn.execute(
|
||||
"INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
|
||||
"VALUES (?, 1, ?, ?, ?, ?)",
|
||||
@@ -44,8 +44,8 @@ def _add(conn, article_id, vector, constructive, when="2026-05-30T10:00:00+00:00
|
||||
conn.execute(
|
||||
"INSERT INTO article_scores (article_id, constructive_score, agency_score, "
|
||||
"human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted) "
|
||||
"VALUES (?, ?, 0, 0, 0, 0, 0, 1)",
|
||||
(article_id, constructive),
|
||||
"VALUES (?, ?, 0, 0, 0, 0, 0, ?)",
|
||||
(article_id, constructive, accepted),
|
||||
)
|
||||
conn.execute(
|
||||
"INSERT INTO article_embeddings (article_id, vector, dim, model) VALUES (?, ?, ?, 'test')",
|
||||
@@ -69,6 +69,34 @@ def test_near_duplicates_collapse_to_highest_ranked(conn):
|
||||
assert dup_of[3] is None # C stands alone
|
||||
|
||||
|
||||
def test_accepted_member_beats_a_higher_quality_rejected_one(conn):
|
||||
# The rep must be SERVEABLE: an accepted page may never be retired to a rejected
|
||||
# representative (that page would 404 with nothing to 301 to). Accepted wins even
|
||||
# though the rejected twin scores higher on quality.
|
||||
_add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=9, accepted=0) # higher quality, REJECTED
|
||||
_add(conn, 2, [0.99, 0.02, 0.0, 0.0], constructive=3, accepted=1) # lower quality, accepted
|
||||
cluster_duplicates(conn, threshold=0.86, window_days=3)
|
||||
dup_of = {r["id"]: r["duplicate_of"] for r in conn.execute("SELECT id, duplicate_of FROM articles")}
|
||||
assert dup_of[2] is None # the accepted article is the representative (serves 200)
|
||||
assert dup_of[1] == 2 # the rejected one points at it
|
||||
|
||||
|
||||
def test_established_rep_stays_stable_when_a_better_twin_arrives(conn):
|
||||
# An already-indexed canonical shouldn't churn just because a higher-quality near
|
||||
# duplicate shows up later. Establish 1 as rep (with follower 3), then a stronger 2
|
||||
# arrives — 1 must remain the representative for URL stability.
|
||||
_add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=5)
|
||||
_add(conn, 3, [0.99, 0.01, 0.0, 0.0], constructive=1)
|
||||
cluster_duplicates(conn, threshold=0.86, window_days=3) # run 1: 1 is rep (score 5 > 1)
|
||||
assert conn.execute("SELECT duplicate_of FROM articles WHERE id=1").fetchone()[0] is None
|
||||
|
||||
_add(conn, 2, [0.995, 0.01, 0.0, 0.0], constructive=9) # higher quality newcomer
|
||||
cluster_duplicates(conn, threshold=0.86, window_days=3) # run 2
|
||||
dup_of = {r["id"]: r["duplicate_of"] for r in conn.execute("SELECT id, duplicate_of FROM articles")}
|
||||
assert dup_of[1] is None # incumbent stays canonical despite 2's higher score
|
||||
assert dup_of[2] == 1 and dup_of[3] == 1
|
||||
|
||||
|
||||
def test_distinct_articles_are_not_clustered(conn):
|
||||
_add(conn, 1, [1.0, 0.0, 0.0, 0.0], constructive=5)
|
||||
_add(conn, 2, [0.0, 1.0, 0.0, 0.0], constructive=5)
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
"""English-only gate: non-English articles are HELD (reason_code='non_english'),
|
||||
preserved (not deleted) and distinct from calm-filter rejections, so they don't
|
||||
penalize a multilingual source and can be revisited when translation lands."""
|
||||
from goodnews import queries
|
||||
from goodnews.db import connect, init_db
|
||||
from goodnews.llm import normalize_scores, upsert_article_score
|
||||
|
||||
|
||||
def _data(**kw):
|
||||
base = {
|
||||
"constructive_score": 7, "cortisol_score": 1, "ragebait_score": 1, "agency_score": 5,
|
||||
"human_benefit_score": 6, "novelty_score": 4, "pr_risk_score": 2, "accepted": True,
|
||||
"topic": "science", "flavor": "discovery", "tags": [],
|
||||
"reason_code": "ok", "reason_text": "good",
|
||||
}
|
||||
base.update(kw)
|
||||
return base
|
||||
|
||||
|
||||
def test_english_passes_through():
|
||||
s = normalize_scores(_data(language="en"), "m")
|
||||
assert s["accepted"] == 1 and s["reason_code"] == "ok" and s["language"] == "en"
|
||||
|
||||
|
||||
def test_en_variants_count_as_english():
|
||||
for lang in ("en-US", "EN", "en_us", "en-GB"):
|
||||
assert normalize_scores(_data(language=lang), "m")["accepted"] == 1
|
||||
|
||||
|
||||
def test_non_english_is_held_not_a_rejection():
|
||||
s = normalize_scores(_data(language="de"), "m")
|
||||
assert s["accepted"] == 0
|
||||
assert s["reason_code"] == "non_english" # distinct bucket, not a calm-filter reject
|
||||
assert s["language"] == "de"
|
||||
assert "non-English" in s["reason_text"]
|
||||
|
||||
|
||||
def test_missing_or_unknown_language_defaults_to_english():
|
||||
# a model hiccup must never silently drop genuine English content
|
||||
assert normalize_scores(_data(language=""), "m")["accepted"] == 1
|
||||
assert normalize_scores(_data(language="und"), "m")["accepted"] == 1
|
||||
assert normalize_scores(_data(), "m")["accepted"] == 1 # no language key at all
|
||||
|
||||
|
||||
def test_non_english_buckets_even_a_content_reject():
|
||||
# a non-English item that was also content-rejected is still 'held', so source
|
||||
# metrics can separate language-holds from calm rejections cleanly
|
||||
s = normalize_scores(_data(language="es", accepted=False, reason_code="ragebait"), "m")
|
||||
assert s["accepted"] == 0 and s["reason_code"] == "non_english"
|
||||
|
||||
|
||||
def test_language_persisted_structurally_and_inspector_marks_held():
|
||||
c = connect(":memory:"); init_db(c)
|
||||
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'S','http://s/f',5)")
|
||||
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash) VALUES (1,1,'http://x','T','h1')")
|
||||
c.commit()
|
||||
upsert_article_score(c, 1, normalize_scores(_data(language="de"), "m"))
|
||||
row = c.execute("SELECT accepted, reason_code, language FROM article_scores WHERE article_id=1").fetchone()
|
||||
assert row["language"] == "de" and row["reason_code"] == "non_english" and row["accepted"] == 0 # structured, not parsed
|
||||
# inspector: shows under 'held', flagged held=True, and NOT under 'rejected'
|
||||
held = queries.source_articles(c, 1, filter="held")
|
||||
assert len(held) == 1 and held[0]["held"] is True
|
||||
assert queries.source_articles(c, 1, filter="rejected") == []
|
||||
@@ -0,0 +1,134 @@
|
||||
"""Memory Match server state — light, durability-only (no anti-cheat; the board is
|
||||
deterministic and fully visible). Locks: malformed keys dropped, matched stored as
|
||||
deduped face KEYS, `done` DERIVED from the matched count vs the tier's target (never
|
||||
trusted from the client), cross-device merge unions matched, and the sync endpoint
|
||||
accepts only valid match variants."""
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from goodnews import games
|
||||
from goodnews.db import connect, init_db
|
||||
|
||||
|
||||
def _san(variant, state):
|
||||
return games.sanitize_game_state(None, "match", variant, "2026-06-16", state)
|
||||
|
||||
|
||||
def test_sanitize_drops_junk_and_dedupes():
|
||||
s = _san("standard-icons", {
|
||||
"matched": ["leaf", "leaf", "color-rose", "banana", "BAD KEY!", 42, "x" * 40, "sun"],
|
||||
"moves": -5, "done": "yes",
|
||||
})
|
||||
# deduped + validated against the real face set ("banana"/junk dropped), order kept
|
||||
assert s["matched"] == ["leaf", "color-rose", "sun"]
|
||||
assert s["moves"] == 0 # clamped ≥ 0
|
||||
assert s["done"] is False # 3 < 8 faces — client's "yes" ignored
|
||||
|
||||
|
||||
def test_done_is_derived_from_matched_count_not_client_flag():
|
||||
real8 = ["leaf", "sun", "star", "moon", "cloud", "wave", "tree", "heart"] # real faces
|
||||
# client lies that it's done with no progress → server says not done
|
||||
assert _san("standard-icons", {"matched": [], "done": True})["done"] is False
|
||||
# reaching the tier's face target (standard = 8) → done
|
||||
assert _san("standard-icons", {"matched": real8, "done": False})["done"] is True
|
||||
# gentle target is only 6
|
||||
assert _san("gentle-icons", {"matched": real8[:6]})["done"] is True
|
||||
assert _san("standard-icons", {"matched": real8[:6]})["done"] is False
|
||||
|
||||
|
||||
def test_sanitize_caps_face_count():
|
||||
many = ["color-rose", "color-coral", "color-amber", "color-gold", "color-lime",
|
||||
"color-green", "color-teal", "color-cyan", "color-sky", "color-blue",
|
||||
"color-indigo", "color-violet", "color-plum", "color-brown", "color-sand"] # 15 real
|
||||
s = _san("expert-colors", {"matched": many})
|
||||
assert len(s["matched"]) == 12 # _MATCH_MAX_FACES
|
||||
|
||||
|
||||
def test_merge_unions_matched_and_keeps_moves_without_trusting_done():
|
||||
a = {"matched": ["leaf", "sun"], "moves": 7, "done": False}
|
||||
b = {"matched": ["sun", "star"], "moves": 4, "done": True}
|
||||
m = games.merge_game_state("match", a, b)
|
||||
assert sorted(m["matched"]) == ["leaf", "star", "sun"] # union
|
||||
assert m["moves"] == 7 # larger move count
|
||||
assert "done" not in m # merge doesn't carry done; sanitize derives it
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def api_app(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
|
||||
monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
c = connect(str(db)); init_db(c); c.commit(); c.close()
|
||||
return api.create_app()
|
||||
|
||||
|
||||
def _signin(app, email="p@b.com"):
|
||||
tc = TestClient(app)
|
||||
sent = {}
|
||||
import goodnews.email_send as es
|
||||
orig = es.send_magic_link
|
||||
es.send_magic_link = lambda to, link: sent.update(link=link)
|
||||
try:
|
||||
tc.post("/api/auth/email/start", json={"email": email})
|
||||
tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
|
||||
finally:
|
||||
es.send_magic_link = orig
|
||||
return tc
|
||||
|
||||
|
||||
def _put(tc, variant, state):
|
||||
return tc.put("/api/games/state", json={
|
||||
"game": "match", "variant": variant, "date": "2026-06-16", "state": state})
|
||||
|
||||
|
||||
def test_sync_endpoint_flow(api_app):
|
||||
tc = _signin(api_app)
|
||||
r1 = _put(tc, "standard-icons", {"matched": ["leaf", "sun"], "moves": 3, "done": False})
|
||||
assert r1.status_code == 200
|
||||
assert sorted(r1.json()["state"]["matched"]) == ["leaf", "sun"]
|
||||
assert r1.json()["state"]["done"] is False
|
||||
# a second device merges in (still partial → not done)
|
||||
r2 = _put(tc, "standard-icons", {"matched": ["star"], "moves": 1, "done": True})
|
||||
assert sorted(r2.json()["state"]["matched"]) == ["leaf", "star", "sun"]
|
||||
assert r2.json()["state"]["done"] is False # 3 < 8, client's done ignored
|
||||
# completing the board (8 real faces) → done
|
||||
r3 = _put(tc, "standard-icons",
|
||||
{"matched": ["leaf", "sun", "star", "moon", "cloud", "wave", "tree", "heart"], "moves": 12})
|
||||
assert r3.json()["state"]["done"] is True
|
||||
# unknown variant rejected
|
||||
assert _put(tc, "huge-icons", {}).status_code == 404
|
||||
|
||||
|
||||
def test_batch_endpoint_reconciles_many_and_drops_bad(api_app):
|
||||
tc = _signin(api_app)
|
||||
body = {"date": "2026-06-16", "items": [
|
||||
{"game": "match", "variant": "standard-icons", "state": {"matched": ["leaf", "sun"], "moves": 2}},
|
||||
{"game": "bloom", "variant": "", "state": {"found": []}},
|
||||
{"game": "match", "variant": "bogus-xyz", "state": {}}, # unknown variant → dropped
|
||||
]}
|
||||
r = tc.put("/api/games/state/batch", json=body)
|
||||
assert r.status_code == 200
|
||||
states = r.json()["states"]
|
||||
variants = {(s["game"], s["variant"]) for s in states}
|
||||
assert ("match", "standard-icons") in variants
|
||||
assert ("bloom", "") in variants
|
||||
assert ("match", "bogus-xyz") not in variants # invalid item dropped, not fatal
|
||||
m = next(s for s in states if s["variant"] == "standard-icons")
|
||||
assert sorted(m["state"]["matched"]) == ["leaf", "sun"] # merged + sanitized
|
||||
# a second device merges via the same batch path
|
||||
r2 = tc.put("/api/games/state/batch", json={"date": "2026-06-16", "items": [
|
||||
{"game": "match", "variant": "standard-icons", "state": {"matched": ["star"], "moves": 5}}]})
|
||||
m2 = r2.json()["states"][0]["state"]
|
||||
assert sorted(m2["matched"]) == ["leaf", "star", "sun"] and m2["moves"] == 5
|
||||
|
||||
|
||||
def test_batch_endpoint_signed_out_echoes(api_app):
|
||||
from fastapi.testclient import TestClient
|
||||
r = TestClient(api_app).put("/api/games/state/batch", json={"date": "2026-06-16", "items": [
|
||||
{"game": "match", "variant": "gentle-colors", "state": {"matched": ["color-rose"]}}]})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["states"][0]["state"] == {"matched": ["color-rose"]} # echo, no sync
|
||||
@@ -0,0 +1,329 @@
|
||||
"""Publishing Desk Phase 1 — queue logic, top-up/dedup semantics, comparative LLM
|
||||
ranking with deterministic fallback, verified handle resolution, status transitions."""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from goodnews import publishing
|
||||
from goodnews.db import connect, init_db
|
||||
|
||||
BASE = "https://ub.test"
|
||||
|
||||
|
||||
def _ts(hours_ago: float) -> str:
|
||||
return (datetime.now(timezone.utc) - timedelta(hours=hours_ago)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def conn():
|
||||
c = connect(":memory:"); init_db(c)
|
||||
yield c
|
||||
c.close()
|
||||
|
||||
|
||||
def _src(c, sid, x_handle=None, paywall_override=None, content_visible=1):
|
||||
c.execute(
|
||||
"INSERT INTO sources (id,name,feed_url,trust_score,content_visible,x_handle,paywall_override) "
|
||||
"VALUES (?,?,?,?,?,?,?)",
|
||||
(sid, f"Source {sid}", f"http://s{sid}/feed", 5, content_visible, x_handle, paywall_override),
|
||||
)
|
||||
|
||||
|
||||
def _article(c, aid, sid, *, accepted=1, dup=None, novelty=5, constructive=5, topic="science",
|
||||
url=None, image="http://img/x.jpg", hours_ago=1.0, complete=True):
|
||||
c.execute(
|
||||
"INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url,published_at) "
|
||||
"VALUES (?,?,?,?,?,?,?)",
|
||||
(aid, sid, url or f"https://ex{aid}.com/a", f"Title {aid}", f"h{aid}", image, _ts(hours_ago)),
|
||||
)
|
||||
if dup is not None:
|
||||
c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
|
||||
c.execute(
|
||||
"INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score,topic,reason_code) "
|
||||
"VALUES (?,?,?,?,?, 'ok')", (aid, accepted, novelty, constructive, topic),
|
||||
)
|
||||
if complete:
|
||||
c.execute(
|
||||
"INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
|
||||
"VALUES (?,?,?,?,?)", (aid, f"Summary {aid}", "wh", "wm", "wb"),
|
||||
)
|
||||
|
||||
|
||||
def _seed_n(c, n):
|
||||
"""n eligible articles, each from its own source (so diversity caps don't interfere)."""
|
||||
for i in range(1, n + 1):
|
||||
_src(c, i)
|
||||
_article(c, i, i, novelty=10 - i, topic=f"t{i}")
|
||||
c.commit()
|
||||
|
||||
|
||||
class FakeClient:
|
||||
def __init__(self, ranked):
|
||||
self._ranked = ranked
|
||||
def rank_for_social(self, candidates):
|
||||
return self._ranked
|
||||
|
||||
|
||||
class BoomClient:
|
||||
def rank_for_social(self, candidates):
|
||||
raise RuntimeError("model down")
|
||||
|
||||
|
||||
# --- handle resolution ----------------------------------------------------------
|
||||
|
||||
def test_handles_source_first_then_entities_deduped_capped(conn):
|
||||
publishing.add_entity_handle(conn, "Anthropic", "AnthropicAI", "https://x.com/AnthropicAI")
|
||||
publishing.add_entity_handle(conn, "NASA", "NASA")
|
||||
out = publishing.resolve_handles(conn, ["Anthropic", "NASA", "Unknown Org"], source_handle="Phys_org")
|
||||
assert out[0]["via"] == "source" and out[0]["handle"] == "@Phys_org"
|
||||
assert len(out) == 2 # capped at 2
|
||||
assert out[1]["handle"] == "@AnthropicAI" # first matched entity; NASA dropped by cap
|
||||
assert all(h["handle"].startswith("@") for h in out)
|
||||
|
||||
|
||||
def test_handles_aliases_resolve_consistently(conn):
|
||||
publishing.add_entity_handle(conn, "Johns Hopkins University", "HopkinsMedicine")
|
||||
publishing.add_entity_handle(conn, "Johns Hopkins", "HopkinsMedicine") # alias row, same handle
|
||||
a = publishing.resolve_handles(conn, ["Johns Hopkins University"])
|
||||
b = publishing.resolve_handles(conn, ["johns hopkins"])
|
||||
assert a and b and a[0]["handle"] == b[0]["handle"] == "@HopkinsMedicine"
|
||||
|
||||
|
||||
def test_handles_unknown_entity_is_not_guessed(conn):
|
||||
assert publishing.resolve_handles(conn, ["Some Random Startup"]) == []
|
||||
|
||||
|
||||
def test_normalization_does_not_collide_identity_words(conn):
|
||||
# a handle stored for the SCHOOL must not get suggested for the STATE
|
||||
publishing.add_entity_handle(conn, "University of California", "UCBerkeley")
|
||||
assert publishing.resolve_handles(conn, ["California"]) == [] # no false match
|
||||
got = publishing.resolve_handles(conn, ["University of California"])
|
||||
assert got and got[0]["handle"] == "@UCBerkeley" # exact still resolves
|
||||
|
||||
|
||||
def test_normalization_preserves_the_and_strips_only_trailing_legal(conn):
|
||||
# "the" is never dropped, and legal suffixes only strip from the END
|
||||
assert publishing.normalize_entity("The Who") == "the who" # not "who"
|
||||
assert publishing.normalize_entity("Inc. Magazine") == "inc magazine" # leading legal kept
|
||||
assert publishing.normalize_entity("Apple Inc") == "apple" # trailing legal stripped
|
||||
# so "The Who" and "WHO" resolve to their OWN handles, no cross-match
|
||||
publishing.add_entity_handle(conn, "The Who", "TheWho")
|
||||
publishing.add_entity_handle(conn, "WHO", "WHO")
|
||||
assert publishing.resolve_handles(conn, ["The Who"])[0]["handle"] == "@TheWho"
|
||||
assert publishing.resolve_handles(conn, ["WHO"])[0]["handle"] == "@WHO"
|
||||
|
||||
|
||||
def test_invalid_handles_are_rejected_not_stored(conn):
|
||||
for bad in ("", "@", "not a handle", "https://x.com/NASA", "NASA!", "way_too_long_handle_x"):
|
||||
assert publishing.valid_handle(bad) is None
|
||||
assert publishing.add_entity_handle(conn, "Some Org", bad) is False
|
||||
# good ones: tolerate one leading @, store canonical
|
||||
assert publishing.valid_handle("@NASA") == "NASA"
|
||||
assert publishing.add_entity_handle(conn, "NASA", "@NASA") is True
|
||||
assert publishing.resolve_handles(conn, ["NASA"])[0]["handle"] == "@NASA"
|
||||
# a junk source handle is never suggested either
|
||||
assert publishing.resolve_handles(conn, [], source_handle="@bad handle!") == []
|
||||
|
||||
|
||||
# --- eligibility ----------------------------------------------------------------
|
||||
|
||||
def test_eligibility_excludes_the_unfit(conn):
|
||||
_src(c=conn, sid=1)
|
||||
_article(conn, 1, 1) # eligible
|
||||
_article(conn, 2, 1, accepted=0) # rejected
|
||||
_article(conn, 3, 1, dup=1) # duplicate
|
||||
_article(conn, 4, 1, complete=False) # no complete summary
|
||||
_article(conn, 5, 1, hours_ago=24 * 10) # too old
|
||||
_src(conn, 2, content_visible=0)
|
||||
_article(conn, 6, 2) # source hidden
|
||||
_src(conn, 3, paywall_override="paywalled")
|
||||
_article(conn, 7, 3) # paywalled
|
||||
conn.commit()
|
||||
ids = {c["id"] for c in publishing.eligible_candidates(conn)}
|
||||
assert ids == {1}
|
||||
|
||||
|
||||
# --- build: deterministic fallback + top-up/dedup -------------------------------
|
||||
|
||||
def test_build_tops_up_to_target_and_dedups(conn):
|
||||
_seed_n(conn, 6)
|
||||
r1 = publishing.build_queue(conn, BASE, client=None, target=3)
|
||||
assert r1["added"] == 3 and r1["ranked_by"] == "deterministic"
|
||||
q = publishing.list_queue(conn)
|
||||
assert len(q) == 3 and all(i["share_url"].startswith(BASE + "/a/") for i in q)
|
||||
assert "utm_source=x" in q[0]["share_url"]
|
||||
|
||||
# rebuild at same target → already full → adds nothing (no duplicates)
|
||||
assert publishing.build_queue(conn, BASE, client=None, target=3)["added"] == 0
|
||||
|
||||
# post one → a slot frees → next rebuild tops up with a NEW article, never the posted one
|
||||
posted_id = q[0]["id"]; posted_article = q[0]["article_id"]
|
||||
publishing.set_status(conn, posted_id, "posted")
|
||||
r3 = publishing.build_queue(conn, BASE, client=None, target=3)
|
||||
assert r3["added"] == 1
|
||||
active_articles = {i["article_id"] for i in publishing.list_queue(conn)}
|
||||
assert posted_article not in active_articles # posted never re-queued
|
||||
|
||||
|
||||
def test_build_preserves_saved_draft_on_requeue(conn):
|
||||
# a snoozed item that becomes eligible again must keep its draft text
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
sid = publishing.list_queue(conn)[0]["id"]
|
||||
publishing.save_draft(conn, sid, "my carefully written blurb")
|
||||
# force an EXPIRED snooze directly (set_status rightly refuses a past date)
|
||||
conn.execute("UPDATE outbound_shares SET status='snoozed', snooze_until=? WHERE id=?", (_ts(1), sid))
|
||||
conn.commit()
|
||||
publishing.build_queue(conn, BASE, client=None, target=1) # re-queues it
|
||||
row = conn.execute("SELECT status, draft_text FROM outbound_shares WHERE id=?", (sid,)).fetchone()
|
||||
assert row["status"] == "queued" and row["draft_text"] == "my carefully written blurb"
|
||||
|
||||
|
||||
# --- build: comparative LLM ranking + fallback ----------------------------------
|
||||
|
||||
def test_build_uses_llm_ranking_and_attaches_fields(conn):
|
||||
_seed_n(conn, 3)
|
||||
publishing.add_entity_handle(conn, "NASA", "NASA")
|
||||
ranked = [
|
||||
{"id": 3, "social_score": 9, "why": "wow", "talking_points": ["a", "b", "c"],
|
||||
"angle": "ask a question", "entities": ["NASA"]},
|
||||
{"id": 1, "social_score": 4, "why": "ok", "talking_points": [], "angle": "", "entities": []},
|
||||
]
|
||||
r = publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=2)
|
||||
assert r["ranked_by"] == "llm" and r["added"] == 2
|
||||
q = publishing.list_queue(conn)
|
||||
top = q[0]
|
||||
assert top["article_id"] == 3 and top["social_score"] == 9 # LLM order wins
|
||||
assert top["talking_points"] == ["a", "b", "c"] and top["angle"] == "ask a question"
|
||||
assert any(h["handle"] == "@NASA" for h in top["suggested_handles"])
|
||||
|
||||
|
||||
def test_build_falls_back_when_llm_errors(conn):
|
||||
_seed_n(conn, 3)
|
||||
r = publishing.build_queue(conn, BASE, client=BoomClient(), target=2)
|
||||
assert r["ranked_by"] == "deterministic" and r["added"] == 2 # model down ≠ broken Desk
|
||||
|
||||
|
||||
def test_deterministic_fallback_seeds_aids_but_leaves_score_and_angle_empty(conn):
|
||||
# Codex Fix-1: with no LLM, the card still carries writing aids (rationale +
|
||||
# talking points from the already-generated summary), but interest score and
|
||||
# angle stay None on purpose — those are LLM-only judgments, never manufactured.
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
item = publishing.list_queue(conn)[0]
|
||||
assert item["rationale"] == "Summary 1" # seeded from the summary
|
||||
assert item["talking_points"] == ["wh", "wm", "wb"] # seeded from the explanation
|
||||
assert item["social_score"] is None and item["angle"] is None # LLM-only, left empty
|
||||
|
||||
|
||||
# --- adversarial: malformed LLM output ------------------------------------------
|
||||
|
||||
def test_duplicate_llm_ids_do_not_inflate_the_queue(conn):
|
||||
# the model repeats id 1; only 2 real articles exist. added/active must reflect
|
||||
# ACTUAL unique rows, never the inflated loop count Codex saw.
|
||||
_seed_n(conn, 2)
|
||||
ranked = [{"id": 1, "social_score": 9}, {"id": 1, "social_score": 9},
|
||||
{"id": 1, "social_score": 9}, {"id": 2, "social_score": 5}]
|
||||
r = publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=5)
|
||||
q = publishing.list_queue(conn)
|
||||
assert r["added"] == len(q) == 2 # not 5, not 3
|
||||
assert len({i["article_id"] for i in q}) == 2 # unique articles
|
||||
|
||||
|
||||
def test_string_fields_do_not_become_char_arrays(conn):
|
||||
# model returns strings where lists are expected; build must store [], not ['f','a'..]
|
||||
_seed_n(conn, 1)
|
||||
ranked = [{"id": 1, "social_score": 7, "talking_points": "fact", "entities": "NASA"}]
|
||||
publishing.build_queue(conn, BASE, client=FakeClient(ranked), target=1)
|
||||
item = publishing.list_queue(conn)[0]
|
||||
assert item["talking_points"] == [] and item["entities"] == []
|
||||
|
||||
|
||||
# --- lifecycle enforcement ------------------------------------------------------
|
||||
|
||||
def test_posted_is_terminal_and_cannot_be_requeued(conn):
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
sid = publishing.list_queue(conn)[0]["id"]
|
||||
assert publishing.set_status(conn, sid, "posted") is True
|
||||
assert publishing.set_status(conn, sid, "queued") is False # no resurrection
|
||||
assert publishing.restore(conn, sid) is False # restore won't revive posted
|
||||
assert conn.execute("SELECT status FROM outbound_shares WHERE id=?", (sid,)).fetchone()["status"] == "posted"
|
||||
|
||||
|
||||
def test_late_autosave_is_rejected_after_terminal(conn):
|
||||
# Codex Fix-2: a debounced autosave that lands AFTER the item is posted must
|
||||
# not write to the terminal row (no clobbering what was actually published).
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
sid = publishing.list_queue(conn)[0]["id"]
|
||||
assert publishing.save_draft(conn, sid, "draft while active") is True
|
||||
publishing.set_status(conn, sid, "posted")
|
||||
assert publishing.save_draft(conn, sid, "late autosave") is False # no-op on terminal
|
||||
row = conn.execute("SELECT draft_text FROM outbound_shares WHERE id=?", (sid,)).fetchone()
|
||||
assert row["draft_text"] == "draft while active" # the late write was ignored
|
||||
|
||||
|
||||
def test_posted_rows_never_appear_in_queue_or_archived_tray(conn):
|
||||
# Codex Fix-4: posted history is terminal and excluded everywhere the UI lists
|
||||
# rows — neither the working queue nor the archived tray ever grows with it.
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
sid = publishing.list_queue(conn)[0]["id"]
|
||||
publishing.set_status(conn, sid, "posted")
|
||||
assert publishing.list_queue(conn) == [] # not in working queue
|
||||
assert publishing.list_queue(conn, include_archived=True) == [] # not in archived tray
|
||||
|
||||
|
||||
def test_snooze_requires_a_future_date(conn):
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
sid = publishing.list_queue(conn)[0]["id"]
|
||||
assert publishing.set_status(conn, sid, "snoozed", snooze_until=None) is False # null
|
||||
assert publishing.set_status(conn, sid, "snoozed", snooze_until=_ts(1)) is False # past
|
||||
assert publishing.set_status(conn, sid, "snoozed", snooze_until=_ts(-48)) is True # future
|
||||
# leaving snooze later (via restore) clears the date
|
||||
publishing.restore(conn, sid)
|
||||
assert conn.execute("SELECT snooze_until FROM outbound_shares WHERE id=?", (sid,)).fetchone()["snooze_until"] is None
|
||||
|
||||
|
||||
# --- status transitions + restore + snooze --------------------------------------
|
||||
|
||||
def test_skip_is_reversible_and_snooze_is_separate(conn):
|
||||
_seed_n(conn, 2)
|
||||
publishing.build_queue(conn, BASE, client=None, target=2)
|
||||
q = publishing.list_queue(conn)
|
||||
a, b = q[0]["id"], q[1]["id"]
|
||||
publishing.set_status(conn, a, "skipped")
|
||||
assert a not in {i["id"] for i in publishing.list_queue(conn)} # gone from working queue
|
||||
assert a in {i["id"] for i in publishing.list_queue(conn, include_archived=True)} # but in the tray
|
||||
assert publishing.restore(conn, a) is True
|
||||
assert a in {i["id"] for i in publishing.list_queue(conn)} # restored
|
||||
|
||||
# snooze: not in working queue, holds a snooze_until, restorable
|
||||
publishing.set_status(conn, b, "snoozed", snooze_until=_ts(-24)) # 24h in the future
|
||||
row = conn.execute("SELECT status, snooze_until FROM outbound_shares WHERE id=?", (b,)).fetchone()
|
||||
assert row["status"] == "snoozed" and row["snooze_until"]
|
||||
assert b not in {i["id"] for i in publishing.list_queue(conn)}
|
||||
|
||||
|
||||
def test_inflight_build_does_not_clobber_a_freshly_extended_snooze(conn):
|
||||
# Build snapshots eligibility, then the model ranks. If the user RE-SNOOZES to the
|
||||
# future mid-rank, the finished build must NOT revive it (only EXPIRED snoozes revive).
|
||||
_seed_n(conn, 1)
|
||||
publishing.build_queue(conn, BASE, client=None, target=1)
|
||||
sid = publishing.list_queue(conn)[0]["id"]
|
||||
conn.execute("UPDATE outbound_shares SET status='snoozed', snooze_until=? WHERE id=?", (_ts(1), sid)) # expired → eligible
|
||||
conn.commit()
|
||||
future = _ts(-48) # 48h ahead
|
||||
|
||||
class RaceClient:
|
||||
def rank_for_social(self, candidates):
|
||||
# mid-build interleave: user extends the snooze into the future
|
||||
conn.execute("UPDATE outbound_shares SET snooze_until=? WHERE id=?", (future, sid))
|
||||
conn.commit()
|
||||
return [{"id": 1, "social_score": 9}]
|
||||
|
||||
publishing.build_queue(conn, BASE, client=RaceClient(), target=1)
|
||||
row = conn.execute("SELECT status, snooze_until FROM outbound_shares WHERE id=?", (sid,)).fetchone()
|
||||
assert row["status"] == "snoozed" and row["snooze_until"] == future # left alone, not re-queued
|
||||
@@ -0,0 +1,118 @@
|
||||
"""Publishing Desk Phase 1 — admin API: gating, background build (deterministic
|
||||
fallback), lifecycle enforcement, snooze validation, draft preservation, restore."""
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from goodnews.db import connect, init_db
|
||||
|
||||
|
||||
def _future(hours: int = 24) -> str:
|
||||
return (datetime.now(timezone.utc) + timedelta(hours=hours)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
def _recent() -> str:
|
||||
return (datetime.now(timezone.utc) - timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def api_app(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
# http (not https) so the session cookie isn't Secure-only — TestClient runs over http
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
|
||||
monkeypatch.setenv("GOODNEWS_ADMIN_EMAILS", "admin@b.com")
|
||||
monkeypatch.setenv("GOODNEWS_LLM_BASE_URL", "http://127.0.0.1:9") # dead → deterministic fallback, fast
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
c = connect(str(db)); init_db(c)
|
||||
# one eligible article (accepted, visible, complete summary, recent, readable)
|
||||
c.execute("INSERT INTO sources (id,name,feed_url,trust_score,content_visible) VALUES (1,'S','http://s/f',5,1)")
|
||||
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url,published_at) "
|
||||
"VALUES (1,1,'https://ex.com/a','Title','h1','http://img/x.jpg',?)", (_recent(),))
|
||||
c.execute("INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score,topic,reason_code) "
|
||||
"VALUES (1,1,7,7,'science','ok')")
|
||||
c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
|
||||
"VALUES (1,'Sum','wh','wm','wb')")
|
||||
c.commit(); c.close()
|
||||
return api.create_app()
|
||||
|
||||
|
||||
def _admin(app):
|
||||
tc = TestClient(app)
|
||||
sent = {}
|
||||
import goodnews.email_send as es
|
||||
orig = es.send_magic_link
|
||||
es.send_magic_link = lambda to, link: sent.update(link=link)
|
||||
try:
|
||||
tc.post("/api/auth/email/start", json={"email": "admin@b.com"})
|
||||
tc.post("/api/auth/email/verify", json={"token": sent["link"].split("token=")[1]})
|
||||
finally:
|
||||
es.send_magic_link = orig
|
||||
return tc
|
||||
|
||||
|
||||
def test_admin_gating(api_app):
|
||||
anon = TestClient(api_app)
|
||||
assert anon.get("/api/admin/publishing/queue").status_code == 401
|
||||
assert anon.post("/api/admin/publishing/build").status_code == 401
|
||||
|
||||
|
||||
def test_build_then_queue_deterministic(api_app):
|
||||
tc = _admin(api_app)
|
||||
assert tc.post("/api/admin/publishing/build").json() == {"building": True}
|
||||
# TestClient runs the background task before returning; LLM URL is dead → fallback.
|
||||
q = tc.get("/api/admin/publishing/queue").json()
|
||||
assert q["building"] is False and q["last"]["ranked_by"] == "deterministic"
|
||||
assert len(q["items"]) == 1 and q["items"][0]["article_id"] == 1
|
||||
assert "utm_source=x" in q["items"][0]["share_url"]
|
||||
# a second build is a no-op (already full) — never duplicates
|
||||
tc.post("/api/admin/publishing/build")
|
||||
assert len(tc.get("/api/admin/publishing/queue").json()["items"]) == 1
|
||||
|
||||
|
||||
def _one(tc):
|
||||
tc.post("/api/admin/publishing/build")
|
||||
return tc.get("/api/admin/publishing/queue").json()["items"][0]["id"]
|
||||
|
||||
|
||||
def test_invalid_transition_rejected(api_app):
|
||||
tc = _admin(api_app)
|
||||
sid = _one(tc)
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "posted"}).status_code == 200
|
||||
# posted is terminal — resurrection refused
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "queued"}).status_code == 400
|
||||
|
||||
|
||||
def test_snooze_validation(api_app):
|
||||
tc = _admin(api_app)
|
||||
sid = _one(tc)
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "snoozed"}).status_code == 400 # null
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/status",
|
||||
json={"status": "snoozed", "snooze_until": "2000-01-01 00:00:00"}).status_code == 400 # past
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/status",
|
||||
json={"status": "snoozed", "snooze_until": _future()}).status_code == 200
|
||||
|
||||
|
||||
def test_draft_preserved_through_skip_and_restore(api_app):
|
||||
tc = _admin(api_app)
|
||||
sid = _one(tc)
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/draft", json={"draft_text": "my blurb"}).status_code == 200
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/status", json={"status": "skipped"}).status_code == 200
|
||||
assert sid not in {i["id"] for i in tc.get("/api/admin/publishing/queue").json()["items"]} # left the queue
|
||||
assert tc.post(f"/api/admin/publishing/{sid}/restore").status_code == 200
|
||||
items = tc.get("/api/admin/publishing/queue").json()["items"]
|
||||
back = next(i for i in items if i["id"] == sid)
|
||||
assert back["draft_text"] == "my blurb" # work survived skip→restore
|
||||
|
||||
|
||||
def test_save_handle_validates(api_app):
|
||||
tc = _admin(api_app)
|
||||
assert tc.post("/api/admin/publishing/handles",
|
||||
json={"entity_name": "NASA", "handle": "@not a handle"}).status_code == 400
|
||||
assert tc.post("/api/admin/publishing/handles",
|
||||
json={"entity_name": "NASA", "handle": "https://x.com/NASA"}).status_code == 400
|
||||
assert tc.post("/api/admin/publishing/handles",
|
||||
json={"entity_name": "NASA", "handle": "@NASA"}).status_code == 200
|
||||
+48
-1
@@ -42,9 +42,56 @@ def test_share_page_missing_and_malformed(client):
|
||||
assert tc.get("/a/999").status_code == 404 # unknown
|
||||
assert tc.get("/a/not-a-number").status_code == 404 # malformed → calm 404
|
||||
assert tc.get("/a/2").status_code == 404 # rejected article
|
||||
assert tc.get("/a/3").status_code == 404 # duplicate
|
||||
|
||||
|
||||
def test_share_page_duplicate_redirects_to_canonical(client):
|
||||
# article 3 is a duplicate of the live article 1 — its URL may be indexed, so it
|
||||
# 301s to the canonical (consolidates) rather than 404ing and dropping from Google.
|
||||
r = TestClient(client).get("/a/3", follow_redirects=False)
|
||||
assert r.status_code == 301 and r.headers["location"] == "/a/1"
|
||||
|
||||
|
||||
def test_share_page_no_image_uses_summary_card(client, tmp_path, monkeypatch):
|
||||
# article 1 has an image → large card
|
||||
assert 'summary_large_image' in TestClient(client).get("/a/1").text
|
||||
|
||||
|
||||
def test_incomplete_page_is_not_cached(client):
|
||||
# article 1 has no summary/explanation → "generating" page must not be cached,
|
||||
# and carries no-cache so it re-fetches once the summary lands.
|
||||
import goodnews.api as api
|
||||
r = TestClient(client).get("/a/1")
|
||||
assert r.status_code == 200
|
||||
assert r.headers.get("cache-control") == "no-cache"
|
||||
assert 1 not in api._SHARE_CACHE
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app_complete(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
from goodnews.db import connect, init_db
|
||||
c = connect(str(db)); init_db(c)
|
||||
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
|
||||
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) "
|
||||
"VALUES (1,1,'https://bbc.com/x','Water voles return','h1','https://img/v.jpg')")
|
||||
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (1,1,'Hopeful.')")
|
||||
c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
|
||||
"VALUES (1,'Voles are back.','They returned to the river.','Biodiversity rebound.','Quietly hopeful.')")
|
||||
c.commit(); c.close()
|
||||
return api
|
||||
|
||||
|
||||
def test_complete_page_is_cached_and_served_from_cache(app_complete):
|
||||
api = app_complete
|
||||
tc = TestClient(api.create_app())
|
||||
r1 = tc.get("/a/1")
|
||||
assert r1.status_code == 200
|
||||
assert r1.headers.get("cache-control") == "public, max-age=300"
|
||||
assert 1 in api._SHARE_CACHE # finished page cached
|
||||
r2 = tc.get("/a/1") # second hit served from cache
|
||||
assert r2.status_code == 200 and r2.text == r1.text
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
"""Share page /a/{id}: a duplicate article 301-redirects to its canonical twin
|
||||
instead of 404ing. A hard 404 silently drops already-indexed URLs from Google and
|
||||
tanked impressions when a newer duplicate retired an older, indexed page."""
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
from goodnews.db import connect, init_db
|
||||
c = connect(str(db)); init_db(c)
|
||||
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
|
||||
|
||||
def art(aid, *, accepted=1, dup=None, summary=True):
|
||||
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
|
||||
"VALUES (?,1,?,?,?,'2026-06-05T08:00:00')",
|
||||
(aid, f"https://bbc.com/{aid}", f"Story {aid}", f"h{aid}"))
|
||||
if dup is not None:
|
||||
c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
|
||||
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (?,?,'x')", (aid, accepted))
|
||||
if summary:
|
||||
c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"Summary {aid}."))
|
||||
|
||||
art(10) # canonical, live -> 200
|
||||
art(11, dup=10) # duplicate of a live canonical -> 301 /a/10
|
||||
art(12, accepted=0, dup=10) # REJECTED follower of an accepted rep -> still 301 /a/10
|
||||
art(20, accepted=0) # rejected canonical
|
||||
art(21, dup=20) # dup of a REJECTED canonical -> 404 (genuinely gone)
|
||||
art(30, accepted=0) # rejected, not a duplicate -> 404
|
||||
c.commit(); c.close()
|
||||
return api.create_app()
|
||||
|
||||
|
||||
def test_canonical_serves_200(client):
|
||||
r = TestClient(client).get("/a/10")
|
||||
assert r.status_code == 200 and "Story 10" in r.text
|
||||
|
||||
|
||||
def test_duplicate_301s_to_canonical(client):
|
||||
r = TestClient(client).get("/a/11", follow_redirects=False)
|
||||
assert r.status_code == 301
|
||||
assert r.headers["location"] == "/a/10" # consolidates onto the survivor
|
||||
|
||||
|
||||
def test_rejected_follower_of_accepted_rep_still_301s(client):
|
||||
# Policy: the route resolves duplicate_of BEFORE the follower's own acceptance, so a
|
||||
# rejected article that points at an ACCEPTED representative 301s to it rather than
|
||||
# 404ing. That's intentional — it sends the visitor/crawler to a serveable equivalent.
|
||||
r = TestClient(client).get("/a/12", follow_redirects=False)
|
||||
assert r.status_code == 301 and r.headers["location"] == "/a/10"
|
||||
|
||||
|
||||
def test_duplicate_of_rejected_canonical_404s(client):
|
||||
r = TestClient(client).get("/a/21", follow_redirects=False)
|
||||
assert r.status_code == 404 # nothing serveable to redirect to
|
||||
|
||||
|
||||
def test_rejected_article_404s(client):
|
||||
assert TestClient(client).get("/a/30").status_code == 404
|
||||
|
||||
|
||||
def test_missing_article_404s(client):
|
||||
assert TestClient(client).get("/a/9999").status_code == 404
|
||||
|
||||
|
||||
def test_head_matches_get_status(client):
|
||||
# HEAD must return the same status as GET (not fall through to the static mount and
|
||||
# 404). Some crawlers/link-checkers probe with HEAD.
|
||||
tc = TestClient(client)
|
||||
assert tc.head("/a/10").status_code == 200 # canonical
|
||||
r = tc.head("/a/11", follow_redirects=False)
|
||||
assert r.status_code == 301 and r.headers["location"] == "/a/10" # duplicate
|
||||
assert tc.head("/a/9999").status_code == 404 # missing
|
||||
Reference in New Issue
Block a user