images/analytics: purge on policy revoke + engagement warm-up note (Codex close-out)
- newsimg.purge_source(): when a source leaves 'cache' (permission revoked / re-classified),
the admin image-policy endpoint now deletes that source's re-hosted copies immediately,
rather than leaving them inaccessible-but-on-disk. Endpoint returns {purged}.
- Admin "Engaged readers" carries a warm-up note: tracking began 2026-06-30, so low
rolling windows are partly warm-up, not all bots (compare d7 after a week, the window
after its full span). Guards against misreading "6 engaged vs 135 visits" as 129 bots.
Tests: purge_source removes only the target source's copies; endpoint reports purged.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+5
-1
@@ -1437,7 +1437,11 @@ def create_app() -> FastAPI:
|
||||
if cur.rowcount == 0:
|
||||
raise HTTPException(status_code=404, detail="source not found")
|
||||
conn.commit()
|
||||
return {"ok": True, "policy": pol}
|
||||
# Leaving 'cache' (e.g. permission revoked) → take the re-hosted copies down now,
|
||||
# not just make them inaccessible. Setting TO 'cache' just flips the flag; the
|
||||
# cycle warms it.
|
||||
purged = newsimg.purge_source(conn, sid) if pol != "cache" else 0
|
||||
return {"ok": True, "policy": pol, "purged": purged}
|
||||
|
||||
# --- Source candidates (supervised add-a-source pipeline) ----------------
|
||||
|
||||
|
||||
@@ -216,6 +216,31 @@ def fetch_and_cache(url: str | None) -> Path | None:
|
||||
return dest
|
||||
|
||||
|
||||
def purge_source(conn, source_id: int) -> int:
|
||||
"""Delete every cached file for a source's article image URLs. Called when a source
|
||||
leaves 'cache' policy (revoked permission / re-classified), so the re-hosted copies
|
||||
come down immediately rather than lingering inaccessible on disk. Returns webp count."""
|
||||
rows = conn.execute(
|
||||
"SELECT DISTINCT image_url FROM articles WHERE source_id = ? "
|
||||
"AND image_url IS NOT NULL AND image_url != ''",
|
||||
(source_id,),
|
||||
).fetchall()
|
||||
cdir = cache_dir()
|
||||
removed = 0
|
||||
for r in rows:
|
||||
key = _key(r[0])
|
||||
for suffix in (".webp", ".fail"):
|
||||
p = cdir / f"{key}{suffix}"
|
||||
try:
|
||||
if p.exists():
|
||||
p.unlink()
|
||||
if suffix == ".webp":
|
||||
removed += 1
|
||||
except OSError:
|
||||
pass
|
||||
return removed
|
||||
|
||||
|
||||
def warm(conn, limit: int = 200) -> int:
|
||||
"""Pre-fetch display copies for the newest ACCEPTED, CANONICAL articles whose SOURCE
|
||||
is cleared to cache (image_policy='cache'), so the API only ever serves cache hits.
|
||||
|
||||
Reference in New Issue
Block a user