Files
upbeatBytes/tests/test_share_redirect.py
thejayman77 89c0fbe1f6 Sync repo to deployed state: SEO recovery, Publishing Desk, Play games, emoji picker
The deploy pipeline runs from the working tree, so a wave of shipped features
had never been committed. This snapshots git to what's actually running.

SEO impression recovery (live + verified):
- Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404
  (a hard 404 silently dropped already-indexed URLs and tanked impressions).
- Dedup representative selection reworked: accepted/serveable -> established
  rep (URL stability) -> quality score, so an accepted page never retires to a
  rejected rep and an indexed canonical doesn't churn when a newer twin arrives.
- HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of
  falling through to the static mount and 404ing.
- `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the
  policy to the existing corpus (shared cycle_lock context manager).
- CLI honors GOODNEWS_DB for its default --db (was silently ignored).

Publishing Desk (admin tool to post highlights to X via Web Intents):
- publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji
  picker (bundled data, no CDN) for the blurb editor.

Play games + site:
- Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated).
- English-only language gate; source prospecting; paywall + dedup hardening.

Tests: full suite green (349). Ignores tightened (node_modules, data/*.db).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-18 11:32:27 -04:00

80 lines
3.6 KiB
Python

"""Share page /a/{id}: a duplicate article 301-redirects to its canonical twin
instead of 404ing. A hard 404 silently drops already-indexed URLs from Google and
tanked impressions when a newer duplicate retired an older, indexed page."""
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
import importlib
import goodnews.api as api
importlib.reload(api)
from goodnews.db import connect, init_db
c = connect(str(db)); init_db(c)
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
def art(aid, *, accepted=1, dup=None, summary=True):
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
"VALUES (?,1,?,?,?,'2026-06-05T08:00:00')",
(aid, f"https://bbc.com/{aid}", f"Story {aid}", f"h{aid}"))
if dup is not None:
c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (?,?,'x')", (aid, accepted))
if summary:
c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"Summary {aid}."))
art(10) # canonical, live -> 200
art(11, dup=10) # duplicate of a live canonical -> 301 /a/10
art(12, accepted=0, dup=10) # REJECTED follower of an accepted rep -> still 301 /a/10
art(20, accepted=0) # rejected canonical
art(21, dup=20) # dup of a REJECTED canonical -> 404 (genuinely gone)
art(30, accepted=0) # rejected, not a duplicate -> 404
c.commit(); c.close()
return api.create_app()
def test_canonical_serves_200(client):
r = TestClient(client).get("/a/10")
assert r.status_code == 200 and "Story 10" in r.text
def test_duplicate_301s_to_canonical(client):
r = TestClient(client).get("/a/11", follow_redirects=False)
assert r.status_code == 301
assert r.headers["location"] == "/a/10" # consolidates onto the survivor
def test_rejected_follower_of_accepted_rep_still_301s(client):
# Policy: the route resolves duplicate_of BEFORE the follower's own acceptance, so a
# rejected article that points at an ACCEPTED representative 301s to it rather than
# 404ing. That's intentional — it sends the visitor/crawler to a serveable equivalent.
r = TestClient(client).get("/a/12", follow_redirects=False)
assert r.status_code == 301 and r.headers["location"] == "/a/10"
def test_duplicate_of_rejected_canonical_404s(client):
r = TestClient(client).get("/a/21", follow_redirects=False)
assert r.status_code == 404 # nothing serveable to redirect to
def test_rejected_article_404s(client):
assert TestClient(client).get("/a/30").status_code == 404
def test_missing_article_404s(client):
assert TestClient(client).get("/a/9999").status_code == 404
def test_head_matches_get_status(client):
# HEAD must return the same status as GET (not fall through to the static mount and
# 404). Some crawlers/link-checkers probe with HEAD.
tc = TestClient(client)
assert tc.head("/a/10").status_code == 200 # canonical
r = tc.head("/a/11", follow_redirects=False)
assert r.status_code == 301 and r.headers["location"] == "/a/10" # duplicate
assert tc.head("/a/9999").status_code == 404 # missing