Admin: source Articles inspector (verify metrics against real evidence)
New per-row "Articles" button on the Sources table expands a read-only inline
panel of the source's ACTUAL ingested articles — so the automated metrics
(paywall/image/acceptance/duplicate) can be verified against evidence instead of
trusted blind. Distinct from "Check" (which re-samples the LIVE feed for
would-pass quality); this shows what's already in the DB, which is what the table
metrics are computed from.
- Backend: GET /api/admin/sources/{id}/articles?filter=&limit=&offset= (admin,
read-only). queries.source_articles + source_articles_summary — per article:
title, url, date, accepted, reason (the "why"), topic/flavor, paywalled
(domain rule), has_image, duplicate. Summary = counts + source-level paywall
rule.
- Frontend: expandable panel with a summary header ("27 ingested · 18 accepted
· … · paywall rule: ON (domain)"), filter chips (All/Accepted/Rejected/No
image/Duplicates), compact rows with title→link + badges + reason, Load more.
So "100% paywall" or "0% images" becomes clickable evidence: open two articles
to tell a real paywall from a mis-flagged domain, or a true image gap from an
enrichment failure. Test: test_source_articles_inspector. 241 pytest + 11 vitest.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1146,6 +1146,24 @@ def create_app() -> FastAPI:
|
||||
url = src["feed_url"]
|
||||
return _preview_or_502(url) # safe fetch, no DB connection held
|
||||
|
||||
@app.get("/api/admin/sources/{sid}/articles")
|
||||
def admin_source_articles(sid: int, request: Request, filter: str = "all",
|
||||
limit: int = 25, offset: int = 0) -> dict:
|
||||
# Read-only inspector: the REAL ingested articles behind a source's metrics,
|
||||
# so paywall/image/acceptance/duplicate signals can be verified against evidence.
|
||||
limit = max(1, min(int(limit), 100))
|
||||
offset = max(0, int(offset))
|
||||
with get_conn() as conn:
|
||||
_require_admin(conn, request)
|
||||
if not conn.execute("SELECT 1 FROM sources WHERE id = ?", (sid,)).fetchone():
|
||||
raise HTTPException(status_code=404, detail="source not found")
|
||||
arts = queries.source_articles(conn, sid, filter, limit, offset)
|
||||
return {
|
||||
"articles": arts,
|
||||
"summary": queries.source_articles_summary(conn, sid) if offset == 0 else None,
|
||||
"has_more": len(arts) == limit,
|
||||
}
|
||||
|
||||
# --- Source candidates (supervised add-a-source pipeline) ----------------
|
||||
|
||||
def _candidate_dict(row) -> dict:
|
||||
|
||||
+79
-34
@@ -518,42 +518,79 @@ def generate_wordsearch_puzzle(conn: sqlite3.Connection, date: str, client=None)
|
||||
return json.loads(row["payload_json"])
|
||||
|
||||
|
||||
_WS_CROSS_TARGET = 0.5 # aim: about half the placements cross an existing word
|
||||
|
||||
|
||||
def _zone(r: int, c: int, size: int) -> tuple[int, int]:
|
||||
"""Which quadrant a cell falls in — coarse occupancy used to spread words."""
|
||||
return (r * 2 // size, c * 2 // size)
|
||||
|
||||
|
||||
def _place_words(words: list[str], size: int, seed: int) -> tuple[list[list[str | None]], list[tuple[str, list[tuple[int, int]]]]]:
|
||||
"""Core placement (date-seeded, deterministic). Returns the letter grid (None
|
||||
where unfilled) and [(word, cells)] for every word genuinely placed.
|
||||
|
||||
Interlock is a TARGET, not a side effect: each word either (a) must cross an
|
||||
already-placed word — when crossings are running below ~half of placements —
|
||||
or (b) anchors in open ground. Both modes steer toward the least crowded /
|
||||
least developed quadrant, so crossings attach to lonely words at the edges of
|
||||
structure rather than thickening one knot, and anchors spread across the
|
||||
board. All valid spots are enumerated (the grid is tiny) — earlier random
|
||||
sampling kept missing the rare crossing spots, which is why grids came out
|
||||
as disconnected "clean" words."""
|
||||
rng = random.Random(seed)
|
||||
grid: list[list[str | None]] = [[None] * size for _ in range(size)]
|
||||
zone_fill = {(zr, zc): 0 for zr in (0, 1) for zc in (0, 1)}
|
||||
placements: list[tuple[str, list[tuple[int, int]]]] = []
|
||||
crossed = 0
|
||||
for word in sorted(words, key=len, reverse=True):
|
||||
n = len(word)
|
||||
if n > size:
|
||||
continue
|
||||
cands = [] # (overlap, cells) over every legal placement
|
||||
for dr, dc in _DIRS:
|
||||
for r0 in range(size):
|
||||
for c0 in range(size):
|
||||
if not (0 <= r0 + dr * (n - 1) < size and 0 <= c0 + dc * (n - 1) < size):
|
||||
continue
|
||||
cells = [(r0 + dr * i, c0 + dc * i) for i in range(n)]
|
||||
if not all(grid[r][c] in (None, word[i]) for i, (r, c) in enumerate(cells)):
|
||||
continue
|
||||
cands.append((sum(1 for i, (r, c) in enumerate(cells) if grid[r][c] == word[i]), cells))
|
||||
if not cands:
|
||||
continue
|
||||
crossing = [t for t in cands if t[0] > 0]
|
||||
want_cross = bool(crossing) and crossed < _WS_CROSS_TARGET * len(placements)
|
||||
scored = [] # (score, overlap, cells)
|
||||
for overlap, cells in crossing if want_cross else cands:
|
||||
crowd = _neighbour_fill(grid, cells, size)
|
||||
zload = sum(zone_fill[_zone(r, c, size)] for r, c in cells) // n
|
||||
# Crossing mode rewards extra overlaps; anchor mode is overlap-neutral
|
||||
# (crowding already steers it to open ground).
|
||||
scored.append(((overlap * 4 if want_cross else 0) - 2 * crowd - zload, overlap, cells))
|
||||
scored.sort(key=lambda t: t[0], reverse=True)
|
||||
top = [t for t in scored if t[0] >= scored[0][0] - 1] # near-best: variety without losing intent
|
||||
_, overlap, cells = rng.choice(top)
|
||||
for i, (r, c) in enumerate(cells):
|
||||
if grid[r][c] is None:
|
||||
grid[r][c] = word[i]
|
||||
zone_fill[_zone(r, c, size)] += 1
|
||||
placements.append((word, cells))
|
||||
if overlap:
|
||||
crossed += 1
|
||||
return grid, placements
|
||||
|
||||
|
||||
def _build_grid(words: list[str], size: int, seed: int) -> tuple[list[str], list[str]]:
|
||||
"""Place words in a size×size grid (date-seeded, deterministic) and fill the
|
||||
rest. Returns (rows, placed_words). Every returned word is genuinely placed."""
|
||||
rng = random.Random(seed)
|
||||
grid: list[list[str | None]] = [[None] * size for _ in range(size)]
|
||||
placed = []
|
||||
for word in sorted(words, key=len, reverse=True):
|
||||
if len(word) > size:
|
||||
continue
|
||||
# Gather valid placements and SCORE them: reward crossing an existing word
|
||||
# (so the grid interlocks like a real puzzle) but penalise crowding, so
|
||||
# words spread across the board instead of all clustering around the ones
|
||||
# placed first. Pick at random among the best ~20% to keep organic variety.
|
||||
scored = [] # (score, cells)
|
||||
for _ in range(400):
|
||||
dr, dc = rng.choice(_DIRS)
|
||||
r0, c0 = rng.randrange(size), rng.randrange(size)
|
||||
cells = [(r0 + dr * i, c0 + dc * i) for i in range(len(word))]
|
||||
if any(not (0 <= r < size and 0 <= c < size) for r, c in cells):
|
||||
continue
|
||||
if not all(grid[r][c] in (None, word[i]) for i, (r, c) in enumerate(cells)):
|
||||
continue
|
||||
overlap = sum(1 for i, (r, c) in enumerate(cells) if grid[r][c] == word[i])
|
||||
scored.append((overlap * 4 - _neighbour_fill(grid, cells, size), cells))
|
||||
if not scored:
|
||||
continue
|
||||
scored.sort(key=lambda t: t[0], reverse=True)
|
||||
_, cells = rng.choice(scored[: max(1, len(scored) // 5)])
|
||||
for i, (r, c) in enumerate(cells):
|
||||
grid[r][c] = word[i]
|
||||
placed.append(word)
|
||||
grid, placements = _place_words(words, size, seed)
|
||||
rng = random.Random(_seed(str(seed), "fill"))
|
||||
for r in range(size):
|
||||
for c in range(size):
|
||||
if grid[r][c] is None:
|
||||
grid[r][c] = chr(65 + rng.randrange(26))
|
||||
return ["".join(row) for row in grid], placed
|
||||
return ["".join(row) for row in grid], [w for w, _ in placements]
|
||||
|
||||
|
||||
# --- Cross-device game state sync -------------------------------------------
|
||||
@@ -562,17 +599,18 @@ def _build_grid(words: list[str], size: int, seed: int) -> tuple[list[str], list
|
||||
|
||||
def _merge_wordsearch(a: dict, b: dict) -> dict:
|
||||
"""Union the found words (a find is monotonic — you can't un-find one, so the
|
||||
union is always correct), keep the earliest start and the best (min) time."""
|
||||
union is always correct), credit the most ACTIVE play time either device has
|
||||
banked (max — the clock only runs while the puzzle is on screen, so wall-clock
|
||||
gaps between sittings never count), and keep the best (min) finish time."""
|
||||
by_word = {}
|
||||
for fw in list(a.get("foundWords") or []) + list(b.get("foundWords") or []):
|
||||
w = fw.get("word") if isinstance(fw, dict) else None
|
||||
if w and w not in by_word:
|
||||
by_word[w] = fw
|
||||
starts = [s for s in (a.get("startTime"), b.get("startTime")) if s]
|
||||
times = [m for m in (a.get("ms"), b.get("ms")) if m]
|
||||
return {
|
||||
"foundWords": list(by_word.values()),
|
||||
"startTime": min(starts) if starts else 0,
|
||||
"played": max(_int(a.get("played")), _int(b.get("played"))),
|
||||
"ms": min(times) if times else 0,
|
||||
}
|
||||
|
||||
@@ -615,6 +653,13 @@ def _int(x) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
_WS_MS_CAP = 86_400_000 # clamp client-sent timings to one day — beyond that is junk
|
||||
|
||||
|
||||
def _ms(x) -> int:
|
||||
return max(0, min(_int(x), _WS_MS_CAP))
|
||||
|
||||
|
||||
def _sanitize_wordsearch(conn: sqlite3.Connection, variant: str, date: str, state: dict) -> dict:
|
||||
"""Trust only finds that are real for THIS puzzle: word in the day's list and
|
||||
cells that actually spell it in the grid (validated when the puzzle exists,
|
||||
@@ -656,8 +701,8 @@ def _sanitize_wordsearch(conn: sqlite3.Connection, variant: str, date: str, stat
|
||||
seen.add(w)
|
||||
clean.append({"word": w, "cells": cells, "ci": len(clean) % 10})
|
||||
done = bool(words) and len(clean) == len(words)
|
||||
return {"foundWords": clean, "startTime": _int(state.get("startTime")),
|
||||
"ms": _int(state.get("ms")) if done else 0}
|
||||
return {"foundWords": clean, "played": _ms(state.get("played")),
|
||||
"ms": _ms(state.get("ms")) if done else 0}
|
||||
|
||||
|
||||
_WORD_COLOURS = {"absent", "present", "correct"}
|
||||
|
||||
@@ -454,6 +454,75 @@ def _attention(content: dict, sources: list[dict], feedback_unread: int, now: da
|
||||
return items
|
||||
|
||||
|
||||
# --- Source article inspector: the real articles behind the source metrics -----
|
||||
|
||||
_SRC_ART_FILTERS = {
|
||||
"accepted": "AND s.accepted = 1",
|
||||
"rejected": "AND s.accepted = 0",
|
||||
"no_image": "AND (a.image_url IS NULL OR a.image_url = '')",
|
||||
"duplicates": "AND a.duplicate_of IS NOT NULL",
|
||||
}
|
||||
|
||||
|
||||
def source_articles(conn: sqlite3.Connection, source_id: int, filter: str = "all",
|
||||
limit: int = 25, offset: int = 0) -> list[dict]:
|
||||
"""The actual ingested articles for a source, newest first — so admins can
|
||||
verify the metric (paywall/image/acceptance) against real evidence."""
|
||||
where = _SRC_ART_FILTERS.get(filter, "")
|
||||
rows = conn.execute(
|
||||
f"""
|
||||
SELECT a.id, a.title, a.canonical_url, a.published_at, a.discovered_at,
|
||||
a.image_url, a.duplicate_of,
|
||||
s.accepted, s.reason_code, s.reason_text, s.topic, s.flavor
|
||||
FROM articles a
|
||||
LEFT JOIN article_scores s ON s.article_id = a.id
|
||||
WHERE a.source_id = ? {where}
|
||||
ORDER BY COALESCE(a.published_at, a.discovered_at) DESC
|
||||
LIMIT ? OFFSET ?
|
||||
""",
|
||||
(source_id, limit, offset),
|
||||
).fetchall()
|
||||
return [
|
||||
{
|
||||
"id": r["id"],
|
||||
"title": r["title"],
|
||||
"url": r["canonical_url"],
|
||||
"published_at": r["published_at"] or r["discovered_at"],
|
||||
"accepted": r["accepted"],
|
||||
"reason": r["reason_text"] or r["reason_code"], # the "why" behind accept/reject
|
||||
"topic": r["topic"],
|
||||
"flavor": r["flavor"],
|
||||
"paywalled": is_paywalled(r["canonical_url"]), # domain rule — same for the source
|
||||
"has_image": bool(r["image_url"]),
|
||||
"duplicate": r["duplicate_of"] is not None,
|
||||
}
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
def source_articles_summary(conn: sqlite3.Connection, source_id: int) -> dict:
|
||||
"""Counts behind the table metrics + the source-level paywall rule, so the
|
||||
panel header reads e.g. '120 · 96 accepted · 24 rejected · 3 no image · paywall: ON'."""
|
||||
agg = conn.execute(
|
||||
"""
|
||||
SELECT COUNT(*) total,
|
||||
COALESCE(SUM(s.accepted = 1), 0) accepted,
|
||||
COALESCE(SUM(s.accepted = 0), 0) rejected,
|
||||
COALESCE(SUM(a.image_url IS NULL OR a.image_url = ''), 0) no_image,
|
||||
COALESCE(SUM(a.duplicate_of IS NOT NULL), 0) duplicates
|
||||
FROM articles a LEFT JOIN article_scores s ON s.article_id = a.id
|
||||
WHERE a.source_id = ?
|
||||
""",
|
||||
(source_id,),
|
||||
).fetchone()
|
||||
one = conn.execute("SELECT canonical_url FROM articles WHERE source_id = ? LIMIT 1", (source_id,)).fetchone()
|
||||
return {
|
||||
"total": agg["total"], "accepted": agg["accepted"], "rejected": agg["rejected"],
|
||||
"no_image": agg["no_image"], "duplicates": agg["duplicates"],
|
||||
"paywalled": is_paywalled(one["canonical_url"]) if one else False,
|
||||
}
|
||||
|
||||
|
||||
def admin_stats(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||
"""Aggregate, non-personal usage stats for the admin dashboard."""
|
||||
since = f"-{days} days"
|
||||
|
||||
Reference in New Issue
Block a user