upbeatBytes/tests/test_api.py

import json

import pytest
from fastapi.testclient import TestClient

from goodnews.db import connect, init_db


@pytest.fixture
def client(tmp_path, monkeypatch):
    db = tmp_path / "test.sqlite3"
    monkeypatch.setenv("GOODNEWS_DB", str(db))

    conn = connect(db)
    init_db(conn)
    conn.execute("INSERT INTO sources (id, name, feed_url, trust_score) VALUES (1,'S','http://s/f',7)")

    def add(aid, topic, flavor, title):
        conn.execute(
            "INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
            "VALUES (?,1,?,?, '2026-05-30T10:00:00+00:00', ?)",
            (aid, f"http://s/{aid}", title, f"h{aid}"),
        )
        conn.execute(
            "INSERT INTO article_scores (article_id, constructive_score, agency_score, "
            "human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted, topic, flavor) "
            "VALUES (?, 7, 3, 4, 1, 0, 2, 1, ?, ?)",
            (aid, topic, flavor),
        )

    add(1, "science", "discovery", "A quiet science discovery")
    add(2, "health", "breakthrough", "Election season health update")  # has avoid-able term
    conn.execute("INSERT INTO daily_briefs (id, brief_date, title) VALUES (1,'2026-05-30','Brief')")
    conn.execute("INSERT INTO daily_brief_items (brief_id, article_id, rank) VALUES (1,1,1),(1,2,2)")
    conn.commit()
    conn.close()

    # Import after env is set so the app reads the temp DB.
    from goodnews.api import create_app

    return TestClient(create_app())


def _prefs(client, **kw):
    return client.get("/api/feed", params={"prefs": json.dumps(kw)})


def test_bad_prefs_returns_200_and_full_feed(client):
    r = client.get("/api/feed", params={"prefs": "not json at all"})
    assert r.status_code == 200
    assert r.json()["count"] == 2  # forgiving: bad blob ignored


def test_mute_topic_affects_feed(client):
    r = _prefs(client, mute_topics=["science"])
    topics = [i["topic"] for i in r.json()["items"]]
    assert topics == ["health"]


def test_avoid_term_filters_feed(client):
    r = _prefs(client, avoid_terms=["election"])
    titles = [i["title"] for i in r.json()["items"]]
    assert all("election" not in t.lower() for t in titles)
    assert len(titles) == 1


def test_brief_filters_down_without_refill(client):
    full = client.get("/api/brief").json()
    assert len(full["items"]) == 2
    muted = client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
    assert [i["topic"] for i in muted["items"]] == ["science"]


def test_category_counts_match_filtered_feed(client):
    counts = client.get("/api/category-counts", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
    assert all(c["topic"] != "health" for c in counts)


def test_feed_excludes_dismissed(client):
    r = client.get("/api/feed", params={"exclude": "1"})
    ids = [i["id"] for i in r.json()["items"]]
    assert 1 not in ids


def test_families_endpoint(client):
    fams = client.get("/api/families").json()
    names = [f["name"] for f in fams]
    assert "Discovery & Wonder" in names
    assert all("tags" in f and isinstance(f["tags"], list) for f in fams)


def test_global_endpoints_are_edge_cacheable(client):
    # The startup endpoints are identical for every visitor → publicly cacheable
    # so "Gathering the good news…" resolves from the edge, not the origin.
    for path in ("/api/moods", "/api/categories", "/api/lanes", "/api/families"):
        cc = client.get(path).headers.get("cache-control", "")
        assert "public" in cc and "s-maxage" in cc, f"{path}: {cc!r}"


def test_feed_cache_boundary(client):
    # Shareable (URL-determined) feeds are public; personalized ones are private.
    public_cc = client.get("/api/feed").headers.get("cache-control", "")
    assert "public" in public_cc and "s-maxage" in public_cc
    # topic/tag browse is still shareable (same for everyone)
    assert "public" in client.get("/api/feed", params={"topic": "science"}).headers.get("cache-control", "")
    # personal filters + the following feed must never be shared across users
    assert client.get("/api/feed", params={"following": "true"}).headers.get("cache-control") == "private, no-store"
    assert client.get("/api/feed", params={"prefs": json.dumps({"mute_topics": ["science"]})}).headers.get("cache-control") == "private, no-store"
    assert client.get("/api/feed", params={"exclude": "1,2"}).headers.get("cache-control") == "private, no-store"


def test_brief_cache_boundary(client):
    # Default highlights are global → public; personal filters → private.
    assert "public" in client.get("/api/brief").headers.get("cache-control", "")
    assert client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).headers.get("cache-control") == "private, no-store"
    assert client.get("/api/brief", params={"exclude": "3"}).headers.get("cache-control") == "private, no-store"


def test_search_relevance_source_and_boundaries(client):
    import os, sqlite3, json as _j
    # A distinctively-named source proves source-name matching (the NYT use case).
    c = sqlite3.connect(os.environ["GOODNEWS_DB"])
    c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (2,'Nature Digest','http://n/f',7)")
    c.execute("INSERT INTO articles (id,source_id,canonical_url,title,published_at,url_hash) "
              "VALUES (3,2,'http://n/3','Coral reefs rebound','2026-05-30T10:00:00+00:00','h3')")
    c.execute("INSERT INTO article_scores (article_id,accepted,topic,flavor) VALUES (3,1,'environment','hopeful')")
    c.commit(); c.close()
    # title match (index builds lazily on first search)
    assert client.get("/api/search?q=coral").json()["items"][0]["id"] == 3
    # SOURCE-NAME match — searching the publication finds its articles (Codex's requirement)
    assert 3 in [it["id"] for it in client.get("/api/search?q=nature").json()["items"]]
    # empty / junk query → empty, no error
    assert client.get("/api/search?q=").json()["count"] == 0
    assert client.get("/api/search?q=%20%21%21").json()["count"] == 0
    # boundary: a muted topic is excluded from search too (mirrors the visitor view)
    muted = client.get("/api/search", params={"q": "coral", "prefs": _j.dumps({"mute_topics": ["environment"]})}).json()
    assert muted["count"] == 0
    # boundary: a hard avoid-term filters a textual match
    avoided = client.get("/api/search", params={"q": "election", "prefs": _j.dumps({"avoid_terms": ["election"]})}).json()
    assert all(it["id"] != 2 for it in avoided["items"])