Files
thejayman77 89c0fbe1f6 Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker
The deploy pipeline runs from the working tree, so a wave of shipped features
had never been committed. This snapshots git to what's actually running.

SEO impression recovery (live + verified):
- Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404
  (a hard 404 silently dropped already-indexed URLs and tanked impressions).
- Dedup representative selection reworked: accepted/serveable -> established
  rep (URL stability) -> quality score, so an accepted page never retires to a
  rejected rep and an indexed canonical doesn't churn when a newer twin arrives.
- HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of
  falling through to the static mount and 404ing.
- `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the
  policy to the existing corpus (shared cycle_lock context manager).
- CLI honors GOODNEWS_DB for its default --db (was silently ignored).

Publishing Desk (admin tool to post highlights to X via Web Intents):
- publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji
  picker (bundled data, no CDN) for the blurb editor.

Play games + site:
- Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated).
- English-only language gate; source prospecting; paywall + dedup hardening.

Tests: full suite green (349). Ignores tightened (node_modules, data/*.db).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 11:32:27 -04:00

141 lines
6.5 KiB
Python

import json
import pytest
from fastapi.testclient import TestClient
from goodnews.db import connect, init_db
@pytest.fixture
def client(tmp_path, monkeypatch):
db = tmp_path / "test.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
conn = connect(db)
init_db(conn)
conn.execute("INSERT INTO sources (id, name, feed_url, trust_score) VALUES (1,'S','http://s/f',7)")
def add(aid, topic, flavor, title):
conn.execute(
"INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
"VALUES (?,1,?,?, '2026-05-30T10:00:00+00:00', ?)",
(aid, f"http://s/{aid}", title, f"h{aid}"),
)
conn.execute(
"INSERT INTO article_scores (article_id, constructive_score, agency_score, "
"human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted, topic, flavor) "
"VALUES (?, 7, 3, 4, 1, 0, 2, 1, ?, ?)",
(aid, topic, flavor),
)
add(1, "science", "discovery", "A quiet science discovery")
add(2, "health", "breakthrough", "Election season health update") # has avoid-able term
conn.execute("INSERT INTO daily_briefs (id, brief_date, title) VALUES (1,'2026-05-30','Brief')")
conn.execute("INSERT INTO daily_brief_items (brief_id, article_id, rank) VALUES (1,1,1),(1,2,2)")
conn.commit()
conn.close()
# Import after env is set so the app reads the temp DB.
from goodnews.api import create_app
return TestClient(create_app())
def _prefs(client, **kw):
return client.get("/api/feed", params={"prefs": json.dumps(kw)})
def test_bad_prefs_returns_200_and_full_feed(client):
r = client.get("/api/feed", params={"prefs": "not json at all"})
assert r.status_code == 200
assert r.json()["count"] == 2 # forgiving: bad blob ignored
def test_mute_topic_affects_feed(client):
r = _prefs(client, mute_topics=["science"])
topics = [i["topic"] for i in r.json()["items"]]
assert topics == ["health"]
def test_avoid_term_filters_feed(client):
r = _prefs(client, avoid_terms=["election"])
titles = [i["title"] for i in r.json()["items"]]
assert all("election" not in t.lower() for t in titles)
assert len(titles) == 1
def test_brief_filters_down_without_refill(client):
full = client.get("/api/brief").json()
assert len(full["items"]) == 2
muted = client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
assert [i["topic"] for i in muted["items"]] == ["science"]
def test_category_counts_match_filtered_feed(client):
counts = client.get("/api/category-counts", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
assert all(c["topic"] != "health" for c in counts)
def test_feed_excludes_dismissed(client):
r = client.get("/api/feed", params={"exclude": "1"})
ids = [i["id"] for i in r.json()["items"]]
assert 1 not in ids
def test_families_endpoint(client):
fams = client.get("/api/families").json()
names = [f["name"] for f in fams]
assert "Discovery & Wonder" in names
assert all("tags" in f and isinstance(f["tags"], list) for f in fams)
def test_global_endpoints_are_edge_cacheable(client):
# The startup endpoints are identical for every visitor → publicly cacheable
# so "Gathering the good news…" resolves from the edge, not the origin.
for path in ("/api/moods", "/api/categories", "/api/lanes", "/api/families"):
cc = client.get(path).headers.get("cache-control", "")
assert "public" in cc and "s-maxage" in cc, f"{path}: {cc!r}"
def test_feed_cache_boundary(client):
# Shareable (URL-determined) feeds are public; personalized ones are private.
public_cc = client.get("/api/feed").headers.get("cache-control", "")
assert "public" in public_cc and "s-maxage" in public_cc
# topic/tag browse is still shareable (same for everyone)
assert "public" in client.get("/api/feed", params={"topic": "science"}).headers.get("cache-control", "")
# personal filters + the following feed must never be shared across users
assert client.get("/api/feed", params={"following": "true"}).headers.get("cache-control") == "private, no-store"
assert client.get("/api/feed", params={"prefs": json.dumps({"mute_topics": ["science"]})}).headers.get("cache-control") == "private, no-store"
assert client.get("/api/feed", params={"exclude": "1,2"}).headers.get("cache-control") == "private, no-store"
def test_brief_cache_boundary(client):
# Default highlights are global → public; personal filters → private.
assert "public" in client.get("/api/brief").headers.get("cache-control", "")
assert client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).headers.get("cache-control") == "private, no-store"
assert client.get("/api/brief", params={"exclude": "3"}).headers.get("cache-control") == "private, no-store"
def test_search_relevance_source_and_boundaries(client):
import os, sqlite3, json as _j
# A distinctively-named source proves source-name matching (the NYT use case).
c = sqlite3.connect(os.environ["GOODNEWS_DB"])
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (2,'Nature Digest','http://n/f',7)")
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) "
"VALUES (3,2,'http://n/3','Coral reefs rebound','2026-05-30T10:00:00+00:00','h3')")
c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (3,1,'environment','hopeful')")
c.commit(); c.close()
# title match (index builds lazily on first search)
assert client.get("/api/search?q=coral").json()["items"][0]["id"] == 3
# SOURCE-NAME match — searching the publication finds its articles (Codex's requirement)
assert 3 in [it["id"] for it in client.get("/api/search?q=nature").json()["items"]]
# empty / junk query → empty, no error
assert client.get("/api/search?q=").json()["count"] == 0
assert client.get("/api/search?q=%20%21%21").json()["count"] == 0
# boundary: a muted topic is excluded from search too (mirrors the visitor view)
muted = client.get("/api/search", params={"q": "coral", "prefs": _j.dumps({"mute_topics": ["environment"]})}).json()
assert muted["count"] == 0
# boundary: a hard avoid-term filters a textual match
avoided = client.get("/api/search", params={"q": "election", "prefs": _j.dumps({"avoid_terms": ["election"]})}).json()
assert all(it["id"] != 2 for it in avoided["items"])