Admin CSV export (sources snapshot + audience time-series)

Per Codex v1 — boring-in-the-best-way: inspect/archive operational data outside
the app. Admin-gated, Python csv module, text/csv + attachment disposition.

* GET /api/admin/export/sources.csv — current-state snapshot per source: name,
  feed/homepage, status, visible, served/accepted/total, acceptance/duplicate/
  accepted-dup/image-coverage %, last success/error, retry-after, review.
* GET /api/admin/export/audience.csv?days= — summary block (visitors, returning,
  accounts, feedback, shares) + a blank line + the daily visits/opens series;
  range applies to audience, sources is a snapshot.
* source_health now also returns feed_url/homepage. Small download links on the
  Sources + Audience tabs.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-09 13:05:09 -04:00
parent 26014297f4
commit 1cd7f1d89a
4 changed files with 95 additions and 2 deletions
+5 -1
View File
@@ -434,7 +434,7 @@
</section>
{/if}
<h2>Sources</h2>
<h2>Sources <a class="exportlink" href="/api/admin/export/sources.csv" download>export CSV ↓</a></h2>
<p class="sub2">{healthy} healthy · {resting} resting · {flagged} flagged · {paused} paused · {retired} retired · {sources.length} total</p>
<div class="filterchips">
{#each [['all', 'All'], ['healthy', 'Healthy'], ['resting', 'Resting'], ['flagged', 'Flagged'], ['paused', 'Paused'], ['retired', 'Retired']] as [key, label] (key)}
@@ -490,6 +490,7 @@
<p class="legend2">“served” = accepted, non-duplicate articles live · accept/dup % is of all ingested · pausing stops polling but keeps existing articles live · times in your local zone</p>
{:else if section === 'audience'}
<p class="exporthdr"><a class="exportlink" href={'/api/admin/export/audience.csv?days=' + range} download>export audience CSV ({range}d) ↓</a></p>
<section>
<h2>Visitors</h2>
<div class="cards">
@@ -740,6 +741,9 @@
.legend .sw.opens { background: var(--accent); }
.sub2 { color: var(--muted); font-size: 0.84rem; margin: 0 0 12px; }
.exporthdr { margin: 0 0 14px; }
.exportlink { font-size: 0.78rem; font-weight: 400; color: var(--accent-deep); margin-left: 10px; }
.exportlink:hover { text-decoration: underline; }
.legend2 { color: var(--muted); font-size: 0.76rem; margin: 10px 0 0; font-style: italic; }
/* Analytics window picker */
+65
View File
@@ -13,8 +13,10 @@ so the API and CLI always read the same file.
from __future__ import annotations
import csv
import hashlib
import hmac
import io
import json
import os
import re
@@ -1010,6 +1012,69 @@ def create_app() -> FastAPI:
cand = conn.execute("SELECT * FROM source_candidates WHERE id = ?", (cid,)).fetchone()
return _candidate_dict(cand)
# --- CSV exports (admin-gated, for inspection / archiving) ---------------
def _csv_response(filename: str, write) -> Response:
buf = io.StringIO()
write(csv.writer(buf))
return Response(
content=buf.getvalue(),
media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@app.get("/api/admin/export/sources.csv")
def admin_export_sources(request: Request) -> Response:
# Current-state snapshot of every source (active + paused + retired).
with get_conn() as conn:
_require_admin(conn, request)
rows = queries.source_health(conn)
def write(w):
w.writerow([
"name", "feed_url", "homepage", "status", "visible", "served", "accepted_total",
"total_articles", "acceptance_pct", "duplicate_pct", "accepted_dup_pct",
"image_coverage_pct", "last_success", "last_error", "retry_after", "review_flag", "review_reason",
])
for s in rows:
w.writerow([
s["name"], s["feed_url"], s.get("homepage_url") or "",
s.get("status") or "", "yes" if s.get("content_visible") else "no",
s["served"], s["accepted_total"], s["total_articles"],
s["acceptance_rate"], s["duplicate_rate"], s["accepted_dup_rate"], s["image_coverage"],
s.get("last_success_at") or "", s.get("last_error") or "", s.get("retry_after_at") or "",
"yes" if s.get("review_flag") else "no", s.get("review_reason") or "",
])
return _csv_response("sources.csv", write)
@app.get("/api/admin/export/audience.csv")
def admin_export_audience(request: Request, days: int = Query(30)) -> Response:
days = days if days in (7, 30, 90) else 30
with get_conn() as conn:
_require_admin(conn, request)
st = queries.admin_stats(conn, days=days)
def write(w):
v, a = st["visitors"], st["accounts"]
w.writerow(["metric", "value"])
for label, value in [
("window_days", st["days"]),
("visitors_today", v["today"]), ("visitors_7d", v["d7"]), ("visitors_30d", v["d30"]),
("returning_30d", st.get("returning", 0)), ("once_30d", st.get("once", 0)),
("accounts_total", a["total"]), ("accounts_new_7d", a["new_7d"]), ("accounts_active_7d", a["active_7d"]),
("feedback_7d", st.get("feedback_7d", 0)), ("feedback_unread", st.get("feedback_unread", 0)),
]:
w.writerow([label, value])
for kind, n in (st.get("shares") or {}).items():
w.writerow([f"share_{kind}", n])
w.writerow([]) # blank line, then the daily time series
w.writerow(["date", "visitors", "opens"])
for d in st.get("daily", []):
w.writerow([d["day"], d["visits"], d["opens"]])
return _csv_response("audience.csv", write)
@app.post("/api/admin/sources/{sid}/review")
def admin_source_review(sid: int, body: SourceReviewBody, request: Request) -> dict:
with get_conn() as conn:
+1 -1
View File
@@ -295,7 +295,7 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
rows = conn.execute(
"""
SELECT
s.id, s.name, s.default_category AS category, s.active,
s.id, s.name, s.feed_url, s.homepage_url, s.default_category AS category, s.active,
s.status, s.content_visible, s.retry_after_at,
s.consecutive_failures AS failures, s.review_flag, s.review_reason,
s.poll_interval_minutes AS interval_minutes,
+24
View File
@@ -160,3 +160,27 @@ def test_safe_fetch_feed_blocks_ssrf():
"http://169.254.169.254/latest", "ftp://x/y"):
with pytest.raises(RuntimeError):
safe_fetch_feed(bad, timeout=2)
def test_export_sources_csv(tmp_path, monkeypatch):
app, api = _make(tmp_path, monkeypatch, admin_email="boss@x.com")
tc = _signin(app, api, "boss@x.com")
r = tc.get("/api/admin/export/sources.csv")
assert r.status_code == 200 and r.headers["content-type"].startswith("text/csv")
assert 'attachment; filename="sources.csv"' in r.headers["content-disposition"]
lines = r.text.splitlines()
assert lines[0].startswith("name,feed_url,homepage,status,visible,served")
assert any("http://s/f" in ln for ln in lines[1:]) # the seeded source row
assert TestClient(app).get("/api/admin/export/sources.csv").status_code == 401 # gated
def test_export_audience_csv(tmp_path, monkeypatch):
app, api = _make(tmp_path, monkeypatch, admin_email="boss@x.com")
tc = _signin(app, api, "boss@x.com")
tc.post("/api/events", json={"kind": "visit", "visitor": "v1"})
r = tc.get("/api/admin/export/audience.csv?days=7")
assert r.status_code == 200 and r.headers["content-type"].startswith("text/csv")
body = r.text
assert "metric,value" in body and "window_days,7" in body
assert "date,visitors,opens" in body # daily time-series section
assert TestClient(app).get("/api/admin/export/audience.csv").status_code == 401 # gated