a47a1504c8
Three-layer organization: primary topic (one per article, for ranking and brief balance) + grouping tags (1-4 per article from a controlled vocabulary, the organic "wandering" axis) + tonal flavor. - taxonomy: add technology + learning topics; 4 calm tag families (Discovery & Wonder, People & Kindness, Solutions & Progress, Mind & Craft) defined in code, not the DB; ALLOWED_TAGS union + coerce_tags validation. - db: article_tags(article_id, tag) join table + tag index. - llm: tags added to the classifier json_schema (enum-constrained, maxItems 4) and system prompt; normalize_scores coerces tags; upsert_article_score replaces a row's tags atomically on every (re)classification. - queries: feed gains a tag filter and exposes tags via group_concat; tag_counts. - api: Article.tags, feed tag param, and /api/families with per-tag counts. - tests: coerce/normalize/upsert/tag-filter/reclassify-replace/tag_counts + /api/families. 99 passing. Corpus reclassify (re-tag + new primary topics) runs separately against the local LLM. Frontend (B2) pairs with this; the live site is unchanged until then. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
90 lines
3.1 KiB
Python
90 lines
3.1 KiB
Python
import json
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from goodnews.db import connect, init_db
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path, monkeypatch):
|
|
db = tmp_path / "test.sqlite3"
|
|
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
|
|
|
conn = connect(db)
|
|
init_db(conn)
|
|
conn.execute("INSERT INTO sources (id, name, feed_url, trust_score) VALUES (1,'S','http://s/f',7)")
|
|
|
|
def add(aid, topic, flavor, title):
|
|
conn.execute(
|
|
"INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
|
|
"VALUES (?,1,?,?, '2026-05-30T10:00:00+00:00', ?)",
|
|
(aid, f"http://s/{aid}", title, f"h{aid}"),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO article_scores (article_id, constructive_score, agency_score, "
|
|
"human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted, topic, flavor) "
|
|
"VALUES (?, 7, 3, 4, 1, 0, 2, 1, ?, ?)",
|
|
(aid, topic, flavor),
|
|
)
|
|
|
|
add(1, "science", "discovery", "A quiet science discovery")
|
|
add(2, "health", "breakthrough", "Election season health update") # has avoid-able term
|
|
conn.execute("INSERT INTO daily_briefs (id, brief_date, title) VALUES (1,'2026-05-30','Brief')")
|
|
conn.execute("INSERT INTO daily_brief_items (brief_id, article_id, rank) VALUES (1,1,1),(1,2,2)")
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
# Import after env is set so the app reads the temp DB.
|
|
from goodnews.api import create_app
|
|
|
|
return TestClient(create_app())
|
|
|
|
|
|
def _prefs(client, **kw):
|
|
return client.get("/api/feed", params={"prefs": json.dumps(kw)})
|
|
|
|
|
|
def test_bad_prefs_returns_200_and_full_feed(client):
|
|
r = client.get("/api/feed", params={"prefs": "not json at all"})
|
|
assert r.status_code == 200
|
|
assert r.json()["count"] == 2 # forgiving: bad blob ignored
|
|
|
|
|
|
def test_mute_topic_affects_feed(client):
|
|
r = _prefs(client, mute_topics=["science"])
|
|
topics = [i["topic"] for i in r.json()["items"]]
|
|
assert topics == ["health"]
|
|
|
|
|
|
def test_avoid_term_filters_feed(client):
|
|
r = _prefs(client, avoid_terms=["election"])
|
|
titles = [i["title"] for i in r.json()["items"]]
|
|
assert all("election" not in t.lower() for t in titles)
|
|
assert len(titles) == 1
|
|
|
|
|
|
def test_brief_filters_down_without_refill(client):
|
|
full = client.get("/api/brief").json()
|
|
assert len(full["items"]) == 2
|
|
muted = client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
|
|
assert [i["topic"] for i in muted["items"]] == ["science"]
|
|
|
|
|
|
def test_category_counts_match_filtered_feed(client):
|
|
counts = client.get("/api/category-counts", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
|
|
assert all(c["topic"] != "health" for c in counts)
|
|
|
|
|
|
def test_feed_excludes_dismissed(client):
|
|
r = client.get("/api/feed", params={"exclude": "1"})
|
|
ids = [i["id"] for i in r.json()["items"]]
|
|
assert 1 not in ids
|
|
|
|
|
|
def test_families_endpoint(client):
|
|
fams = client.get("/api/families").json()
|
|
names = [f["name"] for f in fams]
|
|
assert "Discovery & Wonder" in names
|
|
assert all("tags" in f and isinstance(f["tags"], list) for f in fams)
|