Admin: source Articles inspector (verify metrics against real evidence)

New per-row "Articles" button on the Sources table expands a read-only inline panel of the source's ACTUAL ingested articles — so the automated metrics (paywall/image/acceptance/duplicate) can be verified against evidence instead of trusted blind. Distinct from "Check" (which re-samples the LIVE feed for would-pass quality); this shows what's already in the DB, which is what the table metrics are computed from. - Backend: GET /api/admin/sources/{id}/articles?filter=&limit=&offset= (admin, read-only). queries.source_articles + source_articles_summary — per article: title, url, date, accepted, reason (the "why"), topic/flavor, paywalled (domain rule), has_image, duplicate. Summary = counts + source-level paywall rule. - Frontend: expandable panel with a summary header ("27 ingested · 18 accepted · … · paywall rule: ON (domain)"), filter chips (All/Accepted/Rejected/No image/Duplicates), compact rows with title→link + badges + reason, Load more. So "100% paywall" or "0% images" becomes clickable evidence: open two articles to tell a real paywall from a mis-flagged domain, or a true image gap from an enrichment failure. Test: test_source_articles_inspector. 241 pytest + 11 vitest. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-12 21:37:51 -04:00
parent 64339aafb0
commit ddcfab3a11
9 changed files with 445 additions and 61 deletions
@@ -503,3 +503,18 @@ def test_wordsearch_theme_admin(tmp_path, monkeypatch):
    # remove
    left = tc.delete(f"/api/admin/wordsearch/themes/{tid}").json()
    assert not any(t["id"] == tid for t in left)
+
+
+def test_source_articles_inspector(tmp_path, monkeypatch):
+    app, api = _make(tmp_path, monkeypatch, admin_email="boss@x.com")
+    assert TestClient(app).get("/api/admin/sources/1/articles").status_code == 401  # gated
+    tc = _signin(app, api, "boss@x.com")
+    r = tc.get("/api/admin/sources/1/articles").json()
+    assert r["summary"]["total"] == 1 and r["summary"]["accepted"] == 1 and r["summary"]["no_image"] == 1
+    assert len(r["articles"]) == 1
+    a = r["articles"][0]
+    assert a["title"] == "t1" and a["accepted"] == 1 and a["has_image"] is False and a["paywalled"] is False
+    # filters resolve in SQL; rejected → none (the seeded article is accepted)
+    assert tc.get("/api/admin/sources/1/articles?filter=rejected").json()["articles"] == []
+    assert len(tc.get("/api/admin/sources/1/articles?filter=no_image").json()["articles"]) == 1
+    assert tc.get("/api/admin/sources/999/articles").status_code == 404  # unknown source
@@ -17,11 +17,13 @@ def conn(tmp_path):
 # --- merge logic (the audited core) ---

 def test_merge_wordsearch_unions_finds():
-    a = {"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "startTime": 100, "ms": 0}
-    b = {"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "startTime": 50, "ms": 0}
+    a = {"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "played": 9000, "ms": 0}
+    b = {"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "played": 4000, "ms": 0}
    m = games.merge_game_state("wordsearch", a, b)
    assert {f["word"] for f in m["foundWords"]} == {"CAT", "DOG"}  # union of finds
-    assert m["startTime"] == 50  # earliest start
+    # active-time clock: the device that banked the most play time is the truth —
+    # wall-clock gaps between sittings must never inflate the timer
+    assert m["played"] == 9000


 def test_merge_wordsearch_dedupes_and_keeps_best_time():
@@ -55,12 +57,12 @@ def _find(word, row):  # a shape-valid find: cells spelling the word along a row
 def test_save_converges_across_devices(conn):
    # No stored puzzle for this date → shape-only sanitize (words 4-12, cells match).
    games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
-                          {"foundWords": [_find("BEACH", 0)], "startTime": 100})
+                          {"foundWords": [_find("BEACH", 0)], "played": 100})
    merged = games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
-                                   {"foundWords": [_find("OCEAN", 1)], "startTime": 50})
+                                   {"foundWords": [_find("OCEAN", 1)], "played": 50})
    assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"}
-    # stored state reflects the merge (order-independent)
-    assert games.load_game_state(conn, 1, "wordsearch", "small", "2026-06-12")["startTime"] == 50
+    # stored state reflects the merge (order-independent): most banked time wins
+    assert games.load_game_state(conn, 1, "wordsearch", "small", "2026-06-12")["played"] == 100


 # --- derived stats ---
@@ -96,15 +98,15 @@ def test_game_state_api_roundtrip(tmp_path, monkeypatch):
    # signed out → no sync, echoes the posted state and GET sees nothing stored
    anon = TestClient(app)
    body = {"game": "wordsearch", "variant": "small", "date": "2026-06-12",
-            "state": {"foundWords": [_find("BEACH", 0)], "startTime": 9}}
+            "state": {"foundWords": [_find("BEACH", 0)], "played": 9}}
    assert anon.put("/api/games/state", json=body).json()["state"]["foundWords"][0]["word"] == "BEACH"
    assert anon.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"] is None
    # signed in: push from "device A", then "device B" → server returns the union
    tc = _signin(app, api, "p@x.com")
    tc.put("/api/games/state", json=body)
-    bodyB = {**body, "state": {"foundWords": [_find("OCEAN", 1)], "startTime": 4}}
+    bodyB = {**body, "state": {"foundWords": [_find("OCEAN", 1)], "played": 4}}
    merged = tc.put("/api/games/state", json=bodyB).json()["state"]
-    assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"} and merged["startTime"] == 4
+    assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"} and merged["played"] == 9
    # GET returns the stored merge; unknown game → 404; bad date → 400
    got = tc.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"]
    assert {f["word"] for f in got["foundWords"]} == {"BEACH", "OCEAN"}
@@ -123,5 +125,9 @@ def test_sanitizers_reject_junk(conn):
        "foundWords": [_find("BEACH", 0),                         # ok
                       {"word": "CAT", "cells": [[0, 0], [0, 1], [0, 2]]},   # too short (<4)
                       {"word": "OCEAN", "cells": [[1, 0], [1, 1]]}],        # cells != len
-        "ms": 12345})
+        "ms": 12345, "played": -5})
    assert [f["word"] for f in ws["foundWords"]] == ["BEACH"] and ws["ms"] == 0
+    assert ws["played"] == 0  # negative junk clamped
+    # absurd active-time claims are capped at a day
+    capped = games._sanitize_wordsearch(conn, "small", "2026-06-12", {"foundWords": [], "played": 10**12})
+    assert capped["played"] == 86_400_000
@@ -0,0 +1,78 @@
+"""Locks the word-search placement qualities players actually feel:
+
+1. Every word gets placed (exhaustive candidate search — nothing silently dropped).
+2. Grids INTERLOCK like a real puzzle (the "clean isolated words" regression).
+3. Words SPREAD across the board (the "all clumped in one corner" regression).
+4. Same date/seed → same grid (cross-device players must see identical puzzles).
+
+Thresholds were calibrated against all curated themes × 12 seeds × 3 tiers
+(288 grids/tier): crossing fraction averaged ~0.7 (old algorithm: ~0.3, with a
+third of small grids having ZERO crossings), worst quadrant share 0.42, and all
+four quadrants always held word cells. Deterministic, so no flake margin needed.
+"""
+
+import random
+import statistics
+
+from goodnews.games import _WS_FALLBACKS, WS_TIERS, _WS_ORDER, _build_grid, _place_words, _zone
+
+
+def _tier_grids(tier):
+    """Yield (placements, size) for every curated theme × 12 seeds in a tier."""
+    t = WS_TIERS[tier]
+    for _, words in _WS_FALLBACKS:
+        for seed in range(12):
+            rng = random.Random(seed * 1000 + 7)
+            ws = list(words)
+            rng.shuffle(ws)
+            _, placements = _place_words(ws[: t["count"]], t["grid"], seed)
+            yield placements, t["grid"]
+
+
+def _cross_fraction(placements):
+    """Fraction of placed words sharing at least one cell with another word."""
+    owners: dict[tuple[int, int], list[str]] = {}
+    for w, cells in placements:
+        for cell in cells:
+            owners.setdefault(cell, []).append(w)
+    crossing = set()
+    for ws in owners.values():
+        if len(ws) > 1:
+            crossing.update(ws)
+    return len(crossing) / len(placements)
+
+
+def test_all_words_placed():
+    for tier in _WS_ORDER:
+        for placements, _ in _tier_grids(tier):
+            assert len(placements) == WS_TIERS[tier]["count"]
+
+
+def test_grids_interlock_without_clumping():
+    for tier in _WS_ORDER:
+        fracs = []
+        for placements, size in _tier_grids(tier):
+            fracs.append(_cross_fraction(placements))
+            # Spread: word cells must reach all four quadrants, and no quadrant
+            # may hoard more than half of them (perfectly even would be 0.25).
+            quad: dict[tuple[int, int], int] = {}
+            cells = {c for _, cs in placements for c in cs}
+            for r, c in cells:
+                quad[_zone(r, c, size)] = quad.get(_zone(r, c, size), 0) + 1
+            assert len(quad) == 4, f"{tier}: words confined to {len(quad)} quadrant(s)"
+            assert max(quad.values()) / len(cells) <= 0.5, f"{tier}: clumped in one quadrant"
+        # Interlock: every grid has some crossings; on average most words connect.
+        assert min(fracs) >= 0.3, f"{tier}: a grid came out as disconnected clean words"
+        assert 0.55 <= statistics.mean(fracs) <= 0.9, f"{tier}: avg crossing {statistics.mean(fracs):.2f}"
+
+
+def test_grid_deterministic_and_honest():
+    """Same inputs → byte-identical grid, and every reported word is really in it
+    (forward or reversed along some line — spot-checked via placements)."""
+    words = _WS_FALLBACKS[0][1][:9]
+    rows1, placed1 = _build_grid(words, 11, 42)
+    rows2, placed2 = _build_grid(words, 11, 42)
+    assert rows1 == rows2 and placed1 == placed2
+    _, placements = _place_words(words, 11, 42)
+    for word, cells in placements:
+        assert "".join(rows1[r][c] for r, c in cells) == word