Admin: source Articles inspector (verify metrics against real evidence)

New per-row "Articles" button on the Sources table expands a read-only inline
panel of the source's ACTUAL ingested articles — so the automated metrics
(paywall/image/acceptance/duplicate) can be verified against evidence instead of
trusted blind. Distinct from "Check" (which re-samples the LIVE feed for
would-pass quality); this shows what's already in the DB, which is what the table
metrics are computed from.

- Backend: GET /api/admin/sources/{id}/articles?filter=&limit=&offset= (admin,
  read-only). queries.source_articles + source_articles_summary — per article:
  title, url, date, accepted, reason (the "why"), topic/flavor, paywalled
  (domain rule), has_image, duplicate. Summary = counts + source-level paywall
  rule.
- Frontend: expandable panel with a summary header ("27 ingested · 18 accepted
  · … · paywall rule: ON (domain)"), filter chips (All/Accepted/Rejected/No
  image/Duplicates), compact rows with title→link + badges + reason, Load more.

So "100% paywall" or "0% images" becomes clickable evidence: open two articles
to tell a real paywall from a mis-flagged domain, or a true image gap from an
enrichment failure. Test: test_source_articles_inspector. 241 pytest + 11 vitest.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-12 21:37:51 -04:00
parent 64339aafb0
commit ddcfab3a11
9 changed files with 445 additions and 61 deletions
+15
View File
@@ -503,3 +503,18 @@ def test_wordsearch_theme_admin(tmp_path, monkeypatch):
# remove
left = tc.delete(f"/api/admin/wordsearch/themes/{tid}").json()
assert not any(t["id"] == tid for t in left)
def test_source_articles_inspector(tmp_path, monkeypatch):
app, api = _make(tmp_path, monkeypatch, admin_email="boss@x.com")
assert TestClient(app).get("/api/admin/sources/1/articles").status_code == 401 # gated
tc = _signin(app, api, "boss@x.com")
r = tc.get("/api/admin/sources/1/articles").json()
assert r["summary"]["total"] == 1 and r["summary"]["accepted"] == 1 and r["summary"]["no_image"] == 1
assert len(r["articles"]) == 1
a = r["articles"][0]
assert a["title"] == "t1" and a["accepted"] == 1 and a["has_image"] is False and a["paywalled"] is False
# filters resolve in SQL; rejected → none (the seeded article is accepted)
assert tc.get("/api/admin/sources/1/articles?filter=rejected").json()["articles"] == []
assert len(tc.get("/api/admin/sources/1/articles?filter=no_image").json()["articles"]) == 1
assert tc.get("/api/admin/sources/999/articles").status_code == 404 # unknown source
+17 -11
View File
@@ -17,11 +17,13 @@ def conn(tmp_path):
# --- merge logic (the audited core) ---
def test_merge_wordsearch_unions_finds():
a = {"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "startTime": 100, "ms": 0}
b = {"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "startTime": 50, "ms": 0}
a = {"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "played": 9000, "ms": 0}
b = {"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "played": 4000, "ms": 0}
m = games.merge_game_state("wordsearch", a, b)
assert {f["word"] for f in m["foundWords"]} == {"CAT", "DOG"} # union of finds
assert m["startTime"] == 50 # earliest start
# active-time clock: the device that banked the most play time is the truth —
# wall-clock gaps between sittings must never inflate the timer
assert m["played"] == 9000
def test_merge_wordsearch_dedupes_and_keeps_best_time():
@@ -55,12 +57,12 @@ def _find(word, row): # a shape-valid find: cells spelling the word along a row
def test_save_converges_across_devices(conn):
# No stored puzzle for this date → shape-only sanitize (words 4-12, cells match).
games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
{"foundWords": [_find("BEACH", 0)], "startTime": 100})
{"foundWords": [_find("BEACH", 0)], "played": 100})
merged = games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
{"foundWords": [_find("OCEAN", 1)], "startTime": 50})
{"foundWords": [_find("OCEAN", 1)], "played": 50})
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"}
# stored state reflects the merge (order-independent)
assert games.load_game_state(conn, 1, "wordsearch", "small", "2026-06-12")["startTime"] == 50
# stored state reflects the merge (order-independent): most banked time wins
assert games.load_game_state(conn, 1, "wordsearch", "small", "2026-06-12")["played"] == 100
# --- derived stats ---
@@ -96,15 +98,15 @@ def test_game_state_api_roundtrip(tmp_path, monkeypatch):
# signed out → no sync, echoes the posted state and GET sees nothing stored
anon = TestClient(app)
body = {"game": "wordsearch", "variant": "small", "date": "2026-06-12",
"state": {"foundWords": [_find("BEACH", 0)], "startTime": 9}}
"state": {"foundWords": [_find("BEACH", 0)], "played": 9}}
assert anon.put("/api/games/state", json=body).json()["state"]["foundWords"][0]["word"] == "BEACH"
assert anon.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"] is None
# signed in: push from "device A", then "device B" → server returns the union
tc = _signin(app, api, "p@x.com")
tc.put("/api/games/state", json=body)
bodyB = {**body, "state": {"foundWords": [_find("OCEAN", 1)], "startTime": 4}}
bodyB = {**body, "state": {"foundWords": [_find("OCEAN", 1)], "played": 4}}
merged = tc.put("/api/games/state", json=bodyB).json()["state"]
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"} and merged["startTime"] == 4
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"} and merged["played"] == 9
# GET returns the stored merge; unknown game → 404; bad date → 400
got = tc.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"]
assert {f["word"] for f in got["foundWords"]} == {"BEACH", "OCEAN"}
@@ -123,5 +125,9 @@ def test_sanitizers_reject_junk(conn):
"foundWords": [_find("BEACH", 0), # ok
{"word": "CAT", "cells": [[0, 0], [0, 1], [0, 2]]}, # too short (<4)
{"word": "OCEAN", "cells": [[1, 0], [1, 1]]}], # cells != len
"ms": 12345})
"ms": 12345, "played": -5})
assert [f["word"] for f in ws["foundWords"]] == ["BEACH"] and ws["ms"] == 0
assert ws["played"] == 0 # negative junk clamped
# absurd active-time claims are capped at a day
capped = games._sanitize_wordsearch(conn, "small", "2026-06-12", {"foundWords": [], "played": 10**12})
assert capped["played"] == 86_400_000
+78
View File
@@ -0,0 +1,78 @@
"""Locks the word-search placement qualities players actually feel:
1. Every word gets placed (exhaustive candidate search — nothing silently dropped).
2. Grids INTERLOCK like a real puzzle (the "clean isolated words" regression).
3. Words SPREAD across the board (the "all clumped in one corner" regression).
4. Same date/seed → same grid (cross-device players must see identical puzzles).
Thresholds were calibrated against all curated themes × 12 seeds × 3 tiers
(288 grids/tier): crossing fraction averaged ~0.7 (old algorithm: ~0.3, with a
third of small grids having ZERO crossings), worst quadrant share 0.42, and all
four quadrants always held word cells. Deterministic, so no flake margin needed.
"""
import random
import statistics
from goodnews.games import _WS_FALLBACKS, WS_TIERS, _WS_ORDER, _build_grid, _place_words, _zone
def _tier_grids(tier):
"""Yield (placements, size) for every curated theme × 12 seeds in a tier."""
t = WS_TIERS[tier]
for _, words in _WS_FALLBACKS:
for seed in range(12):
rng = random.Random(seed * 1000 + 7)
ws = list(words)
rng.shuffle(ws)
_, placements = _place_words(ws[: t["count"]], t["grid"], seed)
yield placements, t["grid"]
def _cross_fraction(placements):
"""Fraction of placed words sharing at least one cell with another word."""
owners: dict[tuple[int, int], list[str]] = {}
for w, cells in placements:
for cell in cells:
owners.setdefault(cell, []).append(w)
crossing = set()
for ws in owners.values():
if len(ws) > 1:
crossing.update(ws)
return len(crossing) / len(placements)
def test_all_words_placed():
for tier in _WS_ORDER:
for placements, _ in _tier_grids(tier):
assert len(placements) == WS_TIERS[tier]["count"]
def test_grids_interlock_without_clumping():
for tier in _WS_ORDER:
fracs = []
for placements, size in _tier_grids(tier):
fracs.append(_cross_fraction(placements))
# Spread: word cells must reach all four quadrants, and no quadrant
# may hoard more than half of them (perfectly even would be 0.25).
quad: dict[tuple[int, int], int] = {}
cells = {c for _, cs in placements for c in cs}
for r, c in cells:
quad[_zone(r, c, size)] = quad.get(_zone(r, c, size), 0) + 1
assert len(quad) == 4, f"{tier}: words confined to {len(quad)} quadrant(s)"
assert max(quad.values()) / len(cells) <= 0.5, f"{tier}: clumped in one quadrant"
# Interlock: every grid has some crossings; on average most words connect.
assert min(fracs) >= 0.3, f"{tier}: a grid came out as disconnected clean words"
assert 0.55 <= statistics.mean(fracs) <= 0.9, f"{tier}: avg crossing {statistics.mean(fracs):.2f}"
def test_grid_deterministic_and_honest():
"""Same inputs → byte-identical grid, and every reported word is really in it
(forward or reversed along some line — spot-checked via placements)."""
words = _WS_FALLBACKS[0][1][:9]
rows1, placed1 = _build_grid(words, 11, 42)
rows2, placed2 = _build_grid(words, 11, 42)
assert rows1 == rows2 and placed1 == placed2
_, placements = _place_words(words, 11, 42)
for word, cells in placements:
assert "".join(rows1[r][c] for r, c in cells) == word