Sources table: Media column (image coverage % + paywall marker)

Per Codex — make the table more decision-ready from data we already have.
Paywall is a domain-level hint, so it's a per-source flag (not a meaningful
rate): show image-coverage % plus a 🔒 marker for subscription domains in one
compact "Media" column (tooltip spells it out). source_health gains a
`paywalled` flag (is_paywalled on homepage/feed); also added to sources.csv.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-09 14:58:19 -04:00
parent 9ba9851f6d
commit eacf91225a
4 changed files with 29 additions and 2 deletions
+6 -1
View File
@@ -445,7 +445,7 @@
<table class="srctable"> <table class="srctable">
<thead> <thead>
<tr> <tr>
<th>Source</th><th class="num">Served</th><th class="num">Accept</th><th class="num">Dup</th> <th>Source</th><th class="num">Served</th><th class="num">Accept</th><th class="num">Dup</th><th class="num">Media</th>
<th>Last success</th><th>Next poll</th><th class="num">Fails</th><th>Status</th><th>Actions</th> <th>Last success</th><th>Next poll</th><th class="num">Fails</th><th>Status</th><th>Actions</th>
</tr> </tr>
</thead> </thead>
@@ -460,6 +460,9 @@
<td class="num">{s.served}</td> <td class="num">{s.served}</td>
<td class="num">{s.acceptance_rate != null ? s.acceptance_rate + '%' : '—'}</td> <td class="num">{s.acceptance_rate != null ? s.acceptance_rate + '%' : '—'}</td>
<td class="num" title={s.accepted_dup_rate != null ? `${s.accepted_dup_rate}% of accepted were duplicates` : ''}>{s.duplicate_rate != null ? s.duplicate_rate + '%' : '—'}</td> <td class="num" title={s.accepted_dup_rate != null ? `${s.accepted_dup_rate}% of accepted were duplicates` : ''}>{s.duplicate_rate != null ? s.duplicate_rate + '%' : '—'}</td>
<td class="num media" title={`${s.image_coverage != null ? s.image_coverage + '% of served have an image' : 'no served articles yet'}${s.paywalled ? ' · subscription / paywalled domain' : ''}`}>
{s.image_coverage != null ? s.image_coverage + '%' : '—'}{#if s.paywalled} <span class="pw">🔒</span>{/if}
</td>
<td class="dim">{s.last_success_at ? fwhen(s.last_success_at) : '—'}</td> <td class="dim">{s.last_success_at ? fwhen(s.last_success_at) : '—'}</td>
<td class="dim">{st === 'active' ? fwhen(s.next_due_at) : '—'}</td> <td class="dim">{st === 'active' ? fwhen(s.next_due_at) : '—'}</td>
<td class="num">{s.failures || ''}</td> <td class="num">{s.failures || ''}</td>
@@ -791,6 +794,8 @@
} }
.srctable td { padding: 8px 10px; border-bottom: 1px solid var(--line); vertical-align: baseline; } .srctable td { padding: 8px 10px; border-bottom: 1px solid var(--line); vertical-align: baseline; }
.srctable .num { text-align: right; font-variant-numeric: tabular-nums; } .srctable .num { text-align: right; font-variant-numeric: tabular-nums; }
.srctable .media { white-space: nowrap; }
.srctable .media .pw { font-size: 0.78rem; opacity: 0.75; }
.srctable .dim { color: var(--muted); white-space: nowrap; font-size: 0.82rem; } .srctable .dim { color: var(--muted); white-space: nowrap; font-size: 0.82rem; }
.srctable .sname { font-weight: 600; color: var(--ink); } .srctable .sname { font-weight: 600; color: var(--ink); }
.srctable .sname .cat { display: block; font-weight: 400; font-size: 0.72rem; color: var(--muted); text-transform: capitalize; } .srctable .sname .cat { display: block; font-weight: 400; font-size: 0.72rem; color: var(--muted); text-transform: capitalize; }
+2 -1
View File
@@ -1046,7 +1046,7 @@ def create_app() -> FastAPI:
row([ row([
"name", "feed_url", "homepage", "status", "visible", "served", "accepted_total", "name", "feed_url", "homepage", "status", "visible", "served", "accepted_total",
"total_articles", "acceptance_pct", "duplicate_pct", "accepted_dup_pct", "total_articles", "acceptance_pct", "duplicate_pct", "accepted_dup_pct",
"image_coverage_pct", "last_success", "last_error", "retry_after", "review_flag", "review_reason", "image_coverage_pct", "paywalled", "last_success", "last_error", "retry_after", "review_flag", "review_reason",
]) ])
for s in rows: for s in rows:
row([ row([
@@ -1054,6 +1054,7 @@ def create_app() -> FastAPI:
s.get("status") or "", "yes" if s.get("content_visible") else "no", s.get("status") or "", "yes" if s.get("content_visible") else "no",
s["served"], s["accepted_total"], s["total_articles"], s["served"], s["accepted_total"], s["total_articles"],
s["acceptance_rate"], s["duplicate_rate"], s["accepted_dup_rate"], s["image_coverage"], s["acceptance_rate"], s["duplicate_rate"], s["accepted_dup_rate"], s["image_coverage"],
"yes" if s.get("paywalled") else "no",
s.get("last_success_at") or "", s.get("last_error") or "", s.get("retry_after_at") or "", s.get("last_success_at") or "", s.get("last_error") or "", s.get("retry_after_at") or "",
"yes" if s.get("review_flag") else "no", s.get("review_reason") or "", "yes" if s.get("review_flag") else "no", s.get("review_reason") or "",
]) ])
+3
View File
@@ -11,6 +11,7 @@ import sqlite3
from datetime import UTC, datetime, timedelta from datetime import UTC, datetime, timedelta
from .feeds import MAX_BACKOFF_MINUTES from .feeds import MAX_BACKOFF_MINUTES
from .paywall import is_paywalled
# Composite ranking used everywhere a "best first" order is needed. Kept as one # Composite ranking used everywhere a "best first" order is needed. Kept as one
# expression so brief, category feeds, and the API all rank identically. # expression so brief, category feeds, and the API all rank identically.
@@ -332,6 +333,8 @@ def source_health(conn: sqlite3.Connection) -> list[dict]:
# duplicate of content already served (accepted_total served = accepted dupes). # duplicate of content already served (accepted_total served = accepted dupes).
d["accepted_dup_rate"] = round(100 * (accepted - d["served"]) / accepted) if accepted else None d["accepted_dup_rate"] = round(100 * (accepted - d["served"]) / accepted) if accepted else None
d["image_coverage"] = round(100 * (d["images"] or 0) / d["served"]) if d["served"] else None d["image_coverage"] = round(100 * (d["images"] or 0) / d["served"]) if d["served"] else None
# Paywall is a domain-level hint, so it's a per-source flag (not a rate).
d["paywalled"] = is_paywalled(d.get("homepage_url") or d.get("feed_url"))
# Match the REAL scheduler gate: due = the later of the streak-backoff time # Match the REAL scheduler gate: due = the later of the streak-backoff time
# and any retry_after_at rest (UTC strings sort chronologically). # and any retry_after_at rest (UTC strings sort chronologically).
due_times = [t for t in (d["next_due_at"], d["retry_after_at"]) if t] due_times = [t for t in (d["next_due_at"], d["retry_after_at"]) if t]
+18
View File
@@ -127,3 +127,21 @@ def test_attention_ignores_rate_limit_on_paused_or_retired():
] ]
# neither should nag about a rate-limit rest # neither should nag about a rate-limit rest
assert queries._attention(content, sources, 0, now=now) == [] assert queries._attention(content, sources, 0, now=now) == []
def test_source_health_paywall_and_image_coverage(tmp_path):
import sqlite3
from goodnews.db import connect, init_db
from goodnews import queries
c = connect(str(tmp_path / "t.db")); init_db(c)
# a paywalled-domain source and a free one, each with a served article
c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,active) VALUES (1,'NS','http://x/f','https://www.nature.com',1)")
c.execute("INSERT INTO sources (id,name,feed_url,homepage_url,active) VALUES (2,'Free','http://y/f','https://goodsite.org',1)")
for aid, sid, img in [(1, 1, 'http://i/1.jpg'), (2, 2, None)]:
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) VALUES (?,?,?,?,?,?)",
(aid, sid, f'http://u/{aid}', f't{aid}', f'h{aid}', img))
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (?,1)", (aid,))
c.commit()
sh = {s["id"]: s for s in queries.source_health(c)}
assert sh[1]["paywalled"] is True and sh[2]["paywalled"] is False
assert sh[1]["image_coverage"] == 100 and sh[2]["image_coverage"] == 0