upbeatBytes/tests/test_share_redirect.py

"""Share page /a/{id}: a duplicate article 301-redirects to its canonical twin
instead of 404ing. A hard 404 silently drops already-indexed URLs from Google and
tanked impressions when a newer duplicate retired an older, indexed page."""
import pytest
from fastapi.testclient import TestClient


@pytest.fixture
def client(tmp_path, monkeypatch):
    db = tmp_path / "t.sqlite3"
    monkeypatch.setenv("GOODNEWS_DB", str(db))
    monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
    import importlib
    import goodnews.api as api
    importlib.reload(api)
    from goodnews.db import connect, init_db
    c = connect(str(db)); init_db(c)
    c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")

    def art(aid, *, accepted=1, dup=None, summary=True):
        c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
                  "VALUES (?,1,?,?,?,'2026-06-05T08:00:00')",
                  (aid, f"https://bbc.com/{aid}", f"Story {aid}", f"h{aid}"))
        if dup is not None:
            c.execute("UPDATE articles SET duplicate_of=? WHERE id=?", (dup, aid))
        c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (?,?,'x')", (aid, accepted))
        if summary:
            c.execute("INSERT INTO article_summaries (article_id,summary) VALUES (?,?)", (aid, f"Summary {aid}."))

    art(10)                       # canonical, live  -> 200
    art(11, dup=10)               # duplicate of a live canonical -> 301 /a/10
    art(12, accepted=0, dup=10)   # REJECTED follower of an accepted rep -> still 301 /a/10
    art(20, accepted=0)           # rejected canonical
    art(21, dup=20)               # dup of a REJECTED canonical -> 404 (genuinely gone)
    art(30, accepted=0)           # rejected, not a duplicate -> 404
    c.commit(); c.close()
    return api.create_app()


def test_canonical_serves_200(client):
    r = TestClient(client).get("/a/10")
    assert r.status_code == 200 and "Story 10" in r.text


def test_duplicate_301s_to_canonical(client):
    r = TestClient(client).get("/a/11", follow_redirects=False)
    assert r.status_code == 301
    assert r.headers["location"] == "/a/10"     # consolidates onto the survivor


def test_rejected_follower_of_accepted_rep_still_301s(client):
    # Policy: the route resolves duplicate_of BEFORE the follower's own acceptance, so a
    # rejected article that points at an ACCEPTED representative 301s to it rather than
    # 404ing. That's intentional — it sends the visitor/crawler to a serveable equivalent.
    r = TestClient(client).get("/a/12", follow_redirects=False)
    assert r.status_code == 301 and r.headers["location"] == "/a/10"


def test_duplicate_of_rejected_canonical_404s(client):
    r = TestClient(client).get("/a/21", follow_redirects=False)
    assert r.status_code == 404                  # nothing serveable to redirect to


def test_rejected_article_404s(client):
    assert TestClient(client).get("/a/30").status_code == 404


def test_missing_article_404s(client):
    assert TestClient(client).get("/a/9999").status_code == 404


def test_head_matches_get_status(client):
    # HEAD must return the same status as GET (not fall through to the static mount and
    # 404). Some crawlers/link-checkers probe with HEAD.
    tc = TestClient(client)
    assert tc.head("/a/10").status_code == 200                              # canonical
    r = tc.head("/a/11", follow_redirects=False)
    assert r.status_code == 301 and r.headers["location"] == "/a/10"        # duplicate
    assert tc.head("/a/9999").status_code == 404                            # missing