Attention strip: richer source-health items (stale/reject/dup/thin/rate-limit)
Per Codex — make the Overview strip diagnostic without making the operator hunt through tables. Aggregated (one calm line per condition with a count), volume- gated, conservative thresholds: * Stale: active+visible source, last success > 10 days ago (warn). * High rejection: >=20 ingested, acceptance < 25% (info). * High duplicate: >=10 accepted, accepted-dup > 50% (info). * Thin images: >=10 served, per-source image coverage < 25% (info). * Long rate-limit: retry_after_at more than 12h out (info). source_health gains a per-source images count + image_coverage. _attention takes an optional now (for tests). Existing site-wide items (global image coverage, thin brief, unread feedback) unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+53
-2
@@ -8,6 +8,7 @@ the original source — never stored bodies.
|
||||
from __future__ import annotations
|
||||
|
||||
import sqlite3
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from .feeds import MAX_BACKOFF_MINUTES
|
||||
|
||||
@@ -307,6 +308,9 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
|
||||
(SELECT COUNT(*) FROM articles a WHERE a.source_id = s.id AND a.duplicate_of IS NOT NULL) AS duplicates,
|
||||
(SELECT COUNT(*) FROM articles a JOIN article_scores sc ON sc.article_id = a.id
|
||||
WHERE a.source_id = s.id AND sc.accepted = 1 AND a.duplicate_of IS NULL) AS served,
|
||||
(SELECT COUNT(*) FROM articles a JOIN article_scores sc ON sc.article_id = a.id
|
||||
WHERE a.source_id = s.id AND sc.accepted = 1 AND a.duplicate_of IS NULL
|
||||
AND a.image_url IS NOT NULL AND a.image_url != '') AS images,
|
||||
datetime(
|
||||
(SELECT MAX(r.finished_at) FROM ingest_runs r
|
||||
WHERE r.source_id = s.id AND r.finished_at IS NOT NULL),
|
||||
@@ -327,6 +331,7 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
|
||||
# Curation quality: of what this source got ACCEPTED, how much was a
|
||||
# duplicate of content already served (accepted_total − served = accepted dupes).
|
||||
d["accepted_dup_rate"] = round(100 * (accepted - d["served"]) / accepted) if accepted else None
|
||||
d["image_coverage"] = round(100 * (d["images"] or 0) / d["served"]) if d["served"] else None
|
||||
# Match the REAL scheduler gate: due = the later of the streak-backoff time
|
||||
# and any retry_after_at rest (UTC strings sort chronologically).
|
||||
due_times = [t for t in (d["next_due_at"], d["retry_after_at"]) if t]
|
||||
@@ -335,20 +340,66 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
|
||||
return out
|
||||
|
||||
|
||||
def _attention(content: dict, sources: list[dict], feedback_unread: int) -> list[dict]:
|
||||
# Attention thresholds — conservative + volume-gated, so the strip is a calm
|
||||
# operator nudge rather than a noisy scold. Items are aggregated (one line per
|
||||
# condition with a count), not one line per offending source.
|
||||
_STALE_DAYS = 10
|
||||
_REJECT_MIN_INGESTED = 20
|
||||
_REJECT_RATE = 25 # acceptance below this % (with enough volume)
|
||||
_DUP_MIN_ACCEPTED = 10
|
||||
_DUP_RATE = 50 # accepted-duplicate above this %
|
||||
_IMG_MIN_SERVED = 10
|
||||
_IMG_COVERAGE = 25 # per-source image coverage below this %
|
||||
_LONG_REST_HOURS = 12
|
||||
|
||||
|
||||
def _attention(content: dict, sources: list[dict], feedback_unread: int, now: datetime | None = None) -> list[dict]:
|
||||
"""The 'Attention Needed' strip: what an operator should look at, soft-toned
|
||||
(warn = act soon, info = worth a glance). Derived from the same data shown
|
||||
elsewhere, so it never disagrees with the detail sections."""
|
||||
items: list[dict] = []
|
||||
n = lambda c: "" if c == 1 else "s" # noqa: E731 — tiny pluralizer
|
||||
now = now or datetime.now(UTC)
|
||||
active = [s for s in sources if (s.get("status") or ("active" if s.get("active") else "paused")) == "active"]
|
||||
|
||||
resting = [s for s in sources if s.get("active") and (s.get("failures") or 0) > 0]
|
||||
resting = [s for s in active if (s.get("failures") or 0) > 0]
|
||||
if resting:
|
||||
items.append({"level": "warn", "text": f"{len(resting)} source{n(len(resting))} backing off after failures"})
|
||||
flagged = [s for s in sources if s.get("review_flag")]
|
||||
if flagged:
|
||||
items.append({"level": "warn", "text": f"{len(flagged)} source{n(len(flagged))} flagged for review"})
|
||||
|
||||
# Stale: active, visible feeds whose last success is well in the past.
|
||||
stale_cutoff = (now - timedelta(days=_STALE_DAYS)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
stale = [s for s in active if s.get("content_visible", 1) and s.get("last_success_at") and s["last_success_at"] < stale_cutoff]
|
||||
if stale:
|
||||
items.append({"level": "warn", "text": f"{len(stale)} source{n(len(stale))} haven't updated in over {_STALE_DAYS} days"})
|
||||
|
||||
# High rejection: enough ingested volume, low acceptance.
|
||||
rejecting = [s for s in active if (s.get("total_articles") or 0) >= _REJECT_MIN_INGESTED
|
||||
and s.get("acceptance_rate") is not None and s["acceptance_rate"] < _REJECT_RATE]
|
||||
if rejecting:
|
||||
items.append({"level": "info", "text": f"{len(rejecting)} source{n(len(rejecting))} accepting under {_REJECT_RATE}% of submissions"})
|
||||
|
||||
# High accepted-duplicate: enough accepted volume, mostly echoing others.
|
||||
duping = [s for s in active if (s.get("accepted_total") or 0) >= _DUP_MIN_ACCEPTED
|
||||
and s.get("accepted_dup_rate") is not None and s["accepted_dup_rate"] > _DUP_RATE]
|
||||
if duping:
|
||||
items.append({"level": "info", "text": f"{len(duping)} source{n(len(duping))} mostly duplicating other feeds"})
|
||||
|
||||
# Low image coverage (info, not a warning).
|
||||
thin = [s for s in active if (s.get("served") or 0) >= _IMG_MIN_SERVED
|
||||
and s.get("image_coverage") is not None and s["image_coverage"] < _IMG_COVERAGE]
|
||||
if thin:
|
||||
items.append({"level": "info", "text": f"{len(thin)} source{n(len(thin))} with thin image coverage (under {_IMG_COVERAGE}%)"})
|
||||
|
||||
# Long rate-limit rest (info).
|
||||
rest_cutoff = (now + timedelta(hours=_LONG_REST_HOURS)).strftime("%Y-%m-%d %H:%M:%S")
|
||||
long_rest = [s for s in sources if s.get("retry_after_at") and s["retry_after_at"] > rest_cutoff]
|
||||
if long_rest:
|
||||
items.append({"level": "info", "text": f"{len(long_rest)} source{n(len(long_rest))} rate-limited for {_LONG_REST_HOURS}h+"})
|
||||
|
||||
# Site-wide signals.
|
||||
served = content.get("served") or 0
|
||||
with_image = content.get("with_image") or 0
|
||||
if served and (with_image / served) < 0.70:
|
||||
|
||||
@@ -74,3 +74,43 @@ def test_attention_clear_when_healthy():
|
||||
content = {"served": 100, "with_image": 95, "latest_brief_size": 7}
|
||||
sources = [{"failures": 0, "review_flag": 0}]
|
||||
assert queries._attention(content, sources, feedback_unread=0) == []
|
||||
|
||||
|
||||
def test_attention_richer_items_fire_and_are_quiet_below_threshold():
|
||||
from datetime import UTC, datetime
|
||||
from goodnews import queries
|
||||
now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC)
|
||||
content = {"served": 100, "with_image": 95, "latest_brief_size": 7} # no site-wide items
|
||||
sources = [
|
||||
# stale: active, visible, last success 20 days ago
|
||||
{"status": "active", "content_visible": 1, "last_success_at": "2026-05-20 00:00:00"},
|
||||
# high rejection: 40 ingested, 10% acceptance
|
||||
{"status": "active", "content_visible": 1, "total_articles": 40, "acceptance_rate": 10},
|
||||
# high duplicate: 30 accepted, 70% accepted-dup
|
||||
{"status": "active", "content_visible": 1, "accepted_total": 30, "accepted_dup_rate": 70},
|
||||
# thin images: 40 served, 5% coverage
|
||||
{"status": "active", "content_visible": 1, "served": 40, "image_coverage": 5},
|
||||
# long rate-limit rest: 2 days out
|
||||
{"status": "active", "content_visible": 1, "retry_after_at": "2026-06-11 12:00:00"},
|
||||
]
|
||||
texts = " | ".join(i["text"] for i in queries._attention(content, sources, 0, now=now))
|
||||
assert "haven't updated in over 10 days" in texts
|
||||
assert "accepting under 25%" in texts
|
||||
assert "mostly duplicating" in texts
|
||||
assert "thin image coverage" in texts
|
||||
assert "rate-limited for 12h+" in texts
|
||||
|
||||
|
||||
def test_attention_quiet_below_thresholds():
|
||||
from datetime import UTC, datetime
|
||||
from goodnews import queries
|
||||
now = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC)
|
||||
content = {"served": 100, "with_image": 95, "latest_brief_size": 7}
|
||||
sources = [
|
||||
{"status": "active", "content_visible": 1, "last_success_at": "2026-06-08 00:00:00"}, # 1 day — fresh
|
||||
{"status": "active", "content_visible": 1, "total_articles": 5, "acceptance_rate": 10}, # too little volume
|
||||
{"status": "active", "content_visible": 1, "accepted_total": 3, "accepted_dup_rate": 90}, # too little volume
|
||||
{"status": "active", "content_visible": 1, "served": 5, "image_coverage": 0}, # too little volume
|
||||
{"status": "paused", "content_visible": 1, "last_success_at": "2020-01-01 00:00:00"}, # paused → ignored
|
||||
]
|
||||
assert queries._attention(content, sources, 0, now=now) == []
|
||||
|
||||
Reference in New Issue
Block a user