Hardening pass: scheduler backoff, FK cascade, a11y, test safety net

Pre-traffic cleanup from an audit:

* Scheduler: poll_due_sources now keys on the last *attempt* (success or
  failure), not the last success, and scales the wait by the consecutive-
  failure streak (capped at a day). A failing feed (e.g. Phys.org's HTTP 429s)
  used to be retried every cycle because it had no successful run; it now backs
  off and recovers on its own. Extracted due_source_rows() + tests.

* FK hygiene: deleting a daily_brief is supposed to cascade to its items, but
  SQLite enforces foreign keys per-connection — connect() already sets the
  pragma, so the cascade is correct going forward; added a regression test.
  (Orphaned items + Phys.org settings were cleaned directly on the live DB.)

* a11y: modal/drawer dialogs are now focusable (tabindex), close on Escape
  (window) and on backdrop click via a target check (dropping the inner
  stopPropagation handlers). Build is warning-free.

* tests: conftest points any un-mocked LLM client at a closed port with a 1s
  timeout, so an accidental real call fails fast instead of hanging the suite.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-06 19:18:18 +00:00
parent 978edc8f4a
commit 452e5a3fe4
8 changed files with 158 additions and 20 deletions
@@ -38,8 +38,10 @@
<svelte:window onkeydown={onkey} />
<div class="overlay" onclick={onclose} role="presentation">
<div class="sheet rise" role="dialog" aria-modal="true" aria-label="Send feedback" onclick={(e) => e.stopPropagation()}>
<!-- Backdrop: clicking it closes; keyboard users close with Escape (window). -->
<!-- svelte-ignore a11y_click_events_have_key_events -->
<div class="overlay" role="presentation" onclick={(e) => e.target === e.currentTarget && onclose?.()}>
<div class="sheet rise" role="dialog" aria-modal="true" aria-label="Send feedback" tabindex="-1">
<button class="x" onclick={onclose} aria-label="Close">×</button>
{#if status === 'sent'}
@@ -64,8 +64,10 @@
{#if inline}
<section class="panel rise">{@render body()}</section>
{:else}
<div class="overlay" onclick={onclose} role="presentation">
<div class="sheet rise" role="dialog" aria-modal="true" aria-label="Customize lanes" onclick={(e) => e.stopPropagation()}>
<!-- Backdrop: clicking it closes; keyboard users close with Escape (window). -->
<!-- svelte-ignore a11y_click_events_have_key_events -->
<div class="overlay" role="presentation" onclick={(e) => e.target === e.currentTarget && onclose?.()}>
<div class="sheet rise" role="dialog" aria-modal="true" aria-label="Customize lanes" tabindex="-1">
<button class="x" onclick={onclose} aria-label="Close">×</button>
{@render body()}
</div>
@@ -28,8 +28,10 @@
<svelte:window onkeydown={onkey} />
<div class="overlay" onclick={onclose} role="presentation">
<aside class="drawer" onclick={(e) => e.stopPropagation()} aria-label="Saved articles">
<!-- Backdrop: clicking it closes; keyboard users close with Escape (window). -->
<!-- svelte-ignore a11y_click_events_have_key_events -->
<div class="overlay" role="presentation" onclick={(e) => e.target === e.currentTarget && onclose?.()}>
<div class="drawer" role="dialog" aria-modal="true" aria-label="Saved articles" tabindex="-1">
<div class="head">
<h2>Saved</h2>
<button class="x" onclick={onclose} aria-label="Close">×</button>
@@ -53,7 +55,7 @@
{:else}
<p class="muted">Nothing saved yet — tap <strong>Save</strong> on a story to keep it here.</p>
{/if}
</aside>
</div>
</div>
<style>
+10 -2
View File
@@ -19,10 +19,18 @@
error = err?.message || 'Something went wrong — try again in a moment.';
}
}
function onkey(e) {
if (e.key === 'Escape') onclose?.();
}
</script>
<div class="overlay" onclick={onclose} role="presentation">
<div class="sheet rise" role="dialog" aria-modal="true" aria-label="Sign in" onclick={(e) => e.stopPropagation()}>
<svelte:window onkeydown={onkey} />
<!-- Backdrop: clicking it closes; keyboard users close with Escape (window). -->
<!-- svelte-ignore a11y_click_events_have_key_events -->
<div class="overlay" role="presentation" onclick={(e) => e.target === e.currentTarget && onclose?.()}>
<div class="sheet rise" role="dialog" aria-modal="true" aria-label="Sign in" tabindex="-1">
<button class="x" onclick={onclose} aria-label="Close">×</button>
{#if status === 'sent'}
+32 -11
View File
@@ -35,14 +35,32 @@ def poll_all_sources(conn: sqlite3.Connection, limit: int | None = None) -> dict
).fetchall(), limit)
def poll_due_sources(conn: sqlite3.Connection, limit: int | None = None) -> dict:
"""Poll only active sources whose last successful poll is older than their
poll_interval_minutes (or that have never been polled successfully).
# A failing source backs off so it isn't re-hit every scheduler cycle: the
# effective wait grows with the failure streak, capped at a day. This keeps a
# rate-limited feed (HTTP 429) resting instead of hammered, and lets it recover
# on its own once the limit lifts.
MAX_BACKOFF_MINUTES = 1440
This is what makes poll_interval_minutes meaningful and lets a scheduler run
frequently without re-hitting feeds that are not yet due.
def poll_due_sources(conn: sqlite3.Connection, limit: int | None = None) -> dict:
"""Poll only active sources whose last *attempt* (success OR failure) is
older than their effective interval, or that have never been polled.
Keying on the last attempt — not the last success — is what stops a
perpetually-failing feed from being retried every cycle. The effective
interval is poll_interval_minutes scaled up by the consecutive-failure
streak (capped at MAX_BACKOFF_MINUTES), so healthy feeds keep their cadence
while broken ones step down to occasional retries.
"""
rows = conn.execute(
return _poll_rows(conn, due_source_rows(conn), limit)
def due_source_rows(conn: sqlite3.Connection) -> list[sqlite3.Row]:
"""Active sources currently due to poll (see poll_due_sources for the rule).
Split out so the due/backoff decision can be tested without the network.
"""
return conn.execute(
"""
SELECT s.*
FROM sources s
@@ -50,17 +68,20 @@ def poll_due_sources(conn: sqlite3.Connection, limit: int | None = None) -> dict
AND (
NOT EXISTS (
SELECT 1 FROM ingest_runs r
WHERE r.source_id = s.id AND r.status = 'ok'
WHERE r.source_id = s.id AND r.finished_at IS NOT NULL
)
OR (
SELECT MAX(r.finished_at) FROM ingest_runs r
WHERE r.source_id = s.id AND r.status = 'ok'
) <= datetime('now', '-' || s.poll_interval_minutes || ' minutes')
WHERE r.source_id = s.id AND r.finished_at IS NOT NULL
) <= datetime(
'now',
'-' || MIN(?, s.poll_interval_minutes * (1 + s.consecutive_failures)) || ' minutes'
)
)
ORDER BY s.id
"""
""",
(MAX_BACKOFF_MINUTES,),
).fetchall()
return _poll_rows(conn, rows, limit)
def _poll_rows(conn: sqlite3.Connection, rows: list[sqlite3.Row], limit: int | None) -> dict:
+16
View File
@@ -0,0 +1,16 @@
import pytest
@pytest.fixture(autouse=True)
def _no_real_llm(monkeypatch):
"""Safety net: no test should reach the real arbiter.
If a test constructs an LLM client from env without mocking, point it at a
closed local port with a 1s timeout so the call fails fast instead of
hanging on the production 300s timeout (which looks like a stalled suite).
Tests that exercise the LLM patch it at the function level, which overrides
these env defaults.
"""
monkeypatch.setenv("GOODNEWS_LLM_BASE_URL", "http://127.0.0.1:1/v1")
monkeypatch.setenv("GOODNEWS_LLM_TIMEOUT", "1")
monkeypatch.delenv("GOODNEWS_LLM_API_KEY", raising=False)
+25
View File
@@ -0,0 +1,25 @@
from goodnews.db import connect, init_db
def test_deleting_a_brief_cascades_to_items(tmp_path):
"""The orphaned-items bug: a brief was deleted while its items survived,
because FK enforcement is per-connection in SQLite. connect() turns it on,
so ON DELETE CASCADE must now clear the items and leave no FK violations.
"""
c = connect(str(tmp_path / "t.db")); init_db(c)
assert c.execute("PRAGMA foreign_keys").fetchone()[0] == 1 # enforced
c.execute("INSERT INTO sources (id, name, feed_url) VALUES (1, 'S', 'http://s/f')")
c.execute(
"INSERT INTO articles (id, source_id, canonical_url, title, url_hash, published_at) "
"VALUES (1, 1, 'http://s/a', 'A', 'h1', '2026-01-01T00:00:00')"
)
c.execute("INSERT INTO daily_briefs (id, brief_date, title) VALUES (9, '2026-01-01', 'B')")
c.execute("INSERT INTO daily_brief_items (brief_id, article_id, rank) VALUES (9, 1, 1)")
c.commit()
c.execute("DELETE FROM daily_briefs WHERE id = 9")
c.commit()
assert c.execute("SELECT COUNT(*) FROM daily_brief_items").fetchone()[0] == 0
assert c.execute("PRAGMA foreign_key_check").fetchall() == []
+62
View File
@@ -0,0 +1,62 @@
from goodnews.db import connect, init_db
from goodnews.feeds import due_source_rows
def _src(c, sid, *, interval=120, failures=0, active=1):
c.execute(
"INSERT INTO sources (id, name, feed_url, poll_interval_minutes, consecutive_failures, active) "
"VALUES (?, ?, ?, ?, ?, ?)",
(sid, f"S{sid}", f"http://s/{sid}", interval, failures, active),
)
def _attempt(c, sid, *, minutes_ago, status="ok"):
c.execute(
"INSERT INTO ingest_runs (source_id, finished_at, status) "
"VALUES (?, datetime('now', ?), ?)",
(sid, f"-{minutes_ago} minutes", status),
)
def _due_ids(c):
return {r["id"] for r in due_source_rows(c)}
def test_never_attempted_is_due(tmp_path):
c = connect(str(tmp_path / "t.db")); init_db(c)
_src(c, 1)
assert _due_ids(c) == {1}
def test_recent_attempt_not_due_old_attempt_due(tmp_path):
c = connect(str(tmp_path / "t.db")); init_db(c)
_src(c, 1, interval=120); _attempt(c, 1, minutes_ago=5) # polled recently
_src(c, 2, interval=120); _attempt(c, 2, minutes_ago=200) # overdue
assert _due_ids(c) == {2}
def test_failing_source_backs_off(tmp_path):
# The regression: a perpetually-FAILING source (no 'ok' run) used to be due
# every cycle. With backoff it rests — effective wait = 60*(1+10)=660 min.
c = connect(str(tmp_path / "t.db")); init_db(c)
_src(c, 1, interval=60, failures=10)
_attempt(c, 1, minutes_ago=200, status="failed") # 200 < 660 → not due yet
assert _due_ids(c) == set()
# ...but once past the backed-off window it becomes due again.
c.execute("DELETE FROM ingest_runs WHERE source_id = 1")
_attempt(c, 1, minutes_ago=700, status="failed") # 700 > 660 → due
assert _due_ids(c) == {1}
def test_backoff_capped_at_a_day(tmp_path):
# A long failure streak can't push the wait past MAX_BACKOFF_MINUTES (1440).
c = connect(str(tmp_path / "t.db")); init_db(c)
_src(c, 1, interval=120, failures=999) # 120*1000 would be huge
_attempt(c, 1, minutes_ago=1500, status="failed") # >1440 → due despite streak
assert _due_ids(c) == {1}
def test_inactive_never_due(tmp_path):
c = connect(str(tmp_path / "t.db")); init_db(c)
_src(c, 1, active=0)
assert _due_ids(c) == set()