import pytest from fastapi.testclient import TestClient # The events beacon now drops known-bot User-Agents (queries.is_bot_ua), and the test # client's default UA contains "python" → would be filtered. Send a real browser UA so # these record like a genuine visitor; the bot case is covered explicitly below. _BROWSER = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"} @pytest.fixture def app_db(tmp_path, monkeypatch): db = tmp_path / "t.sqlite3" monkeypatch.setenv("GOODNEWS_DB", str(db)) monkeypatch.setenv("GOODNEWS_SESSION_SECRET", "test-secret") import importlib import goodnews.api as api importlib.reload(api) from goodnews.db import connect, init_db connect(str(db)).close() # creates schema lazily? ensure init c = connect(str(db)); init_db(c); c.close() return api.create_app(), db def _count(db, **where): from goodnews.db import connect c = connect(str(db)) clause = " AND ".join(f"{k}=?" for k in where) sql = "SELECT COUNT(*) FROM events" + (f" WHERE {clause}" if where else "") n = c.execute(sql, tuple(where.values())).fetchone()[0] c.close() return n def test_event_recorded_and_deduped(app_db): app, db = app_db tc = TestClient(app, headers=_BROWSER) for _ in range(3): # same (kind, article, visitor, day) → one row assert tc.post("/api/events", json={"kind": "open", "article_id": 5, "visitor": "tok"}).json() == {"ok": True} assert _count(db, kind="open", article_id=5) == 1 # a different visitor is a distinct row tc.post("/api/events", json={"kind": "open", "article_id": 5, "visitor": "other"}) assert _count(db, kind="open", article_id=5) == 2 def test_visitor_token_is_hashed_not_stored_raw(app_db): app, db = app_db TestClient(app, headers=_BROWSER).post("/api/events", json={"kind": "visit", "visitor": "secret-token"}) from goodnews.db import connect c = connect(str(db)) vh = c.execute("SELECT visitor_hash FROM events").fetchone()[0] c.close() assert vh and vh != "secret-token" and len(vh) == 64 # sha256 hex def test_unknown_kind_is_ignored(app_db): app, db = app_db assert TestClient(app, headers=_BROWSER).post("/api/events", json={"kind": "evil", "visitor": "x"}).json() == {"ok": True} assert _count(db) == 0 def test_bot_user_agents_are_not_counted(app_db): """JS-capable crawlers fire this beacon too; honest bot UAs must not inflate counts. Response stays {ok:true} so a bot can't tell it was dropped.""" app, db = app_db for bot_ua in ("Mozilla/5.0 (compatible; GPTBot/1.0; +https://openai.com/gptbot)", "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)", "Mozilla/5.0 (X11; Linux x86_64) HeadlessChrome/120.0 Safari/537.36"): tc = TestClient(app, headers={"user-agent": bot_ua}) assert tc.post("/api/events", json={"kind": "visit", "visitor": "b"}).json() == {"ok": True} assert _count(db, kind="visit") == 0 # none recorded # a real browser on the same beacon IS counted TestClient(app, headers=_BROWSER).post("/api/events", json={"kind": "visit", "visitor": "human"}) assert _count(db, kind="visit") == 1 def test_game_event_kinds_are_allowed(app_db): app, db = app_db tc = TestClient(app, headers=_BROWSER) # the per-game funnel kinds (incl. the share-loop arrival) pass the allowlist for kind in ("word_started", "word_completed", "word_shared", "word_arrival", "match_arrival"): assert tc.post("/api/events", json={"kind": kind, "article_id": 0, "visitor": "t"}).json() == {"ok": True} assert _count(db, kind=kind) == 1 # a bogus game kind is still rejected tc.post("/api/events", json={"kind": "chess_started", "visitor": "t"}) assert _count(db, kind="chess_started") == 0 def test_admin_stats_games_funnel_aggregates(app_db): app, db = app_db tc = TestClient(app, headers=_BROWSER) # two visitors arrive at Daily Word via a shared link; one engages + shares; a Match completes for v in ("a", "b"): tc.post("/api/events", json={"kind": "word_arrival", "article_id": 0, "visitor": v}) tc.post("/api/events", json={"kind": "word_started", "article_id": 0, "visitor": "a"}) tc.post("/api/events", json={"kind": "word_shared", "article_id": 0, "visitor": "a"}) tc.post("/api/events", json={"kind": "match_completed", "article_id": 0, "visitor": "a"}) from goodnews.db import connect from goodnews import queries c = connect(str(db)) games = queries.admin_stats(c, days=30)["games"] c.close() assert games["by_game"]["word"] == {"arrival": 2, "started": 1, "completed": 0, "shared": 1} assert games["by_game"]["match"]["completed"] == 1 assert games["totals"]["arrival"] == 2 and games["totals"]["shared"] == 1 def test_engaged_readers_metric(app_db): """Engaged readers counts the gesture-gated 'engaged' beacon + deliberate actions, NOT auto-fired visit/summary_viewed or a game-share arrival.""" app, db = app_db tc = TestClient(app, headers=_BROWSER) tc.post("/api/events", json={"kind": "engaged", "visitor": "a"}) # gesture beacon tc.post("/api/events", json={"kind": "source_click", "article_id": 5, "visitor": "b"}) # deliberate tc.post("/api/events", json={"kind": "visit", "visitor": "c"}) # raw visit only tc.post("/api/events", json={"kind": "summary_viewed", "article_id": 5, "visitor": "c"}) # auto-fired tc.post("/api/events", json={"kind": "word_arrival", "visitor": "d"}) # share-loop landing from goodnews.db import connect from goodnews import queries cn = connect(str(db)); v = queries.admin_stats(cn, days=30)["visitors"]; cn.close() assert v["engaged_today"] == 2 # a (engaged) + b (source_click) assert v["today"] == 1 # only c fired a raw visit