01de5a3ef0
Per Codex: the Next poll column computed only the streak-backoff time, so a rate-limited source could show an earlier Next poll than the real gate (which also requires retry_after_at <= now). Take the later of the two in the Python post-process so the admin table agrees with due_source_rows. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
77 lines
3.3 KiB
Python
77 lines
3.3 KiB
Python
from datetime import UTC, datetime, timedelta
|
|
|
|
from goodnews import feeds
|
|
from goodnews.db import connect, init_db
|
|
|
|
|
|
NOW = datetime(2026, 6, 9, 12, 0, 0, tzinfo=UTC)
|
|
|
|
|
|
def test_parse_delta_seconds():
|
|
assert feeds.parse_retry_after("120", now=NOW) == "2026-06-09 12:02:00"
|
|
|
|
|
|
def test_parse_http_date():
|
|
assert feeds.parse_retry_after("Tue, 09 Jun 2026 12:30:00 GMT", now=NOW) == "2026-06-09 12:30:00"
|
|
|
|
|
|
def test_parse_ignores_invalid_and_past():
|
|
assert feeds.parse_retry_after("", now=NOW) is None
|
|
assert feeds.parse_retry_after("soon", now=NOW) is None
|
|
assert feeds.parse_retry_after("-30", now=NOW) is None # negative
|
|
assert feeds.parse_retry_after("Tue, 09 Jun 2020 00:00:00 GMT", now=NOW) is None # past
|
|
|
|
|
|
def test_parse_caps_at_max_backoff():
|
|
capped = feeds.parse_retry_after(str(60 * 60 * 24 * 30), now=NOW) # 30 days
|
|
assert capped == (NOW + timedelta(minutes=feeds.MAX_BACKOFF_MINUTES)).strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
def _src(c):
|
|
c.execute("INSERT INTO sources (id,name,feed_url,active) VALUES (1,'S','http://s/f',1)")
|
|
c.commit()
|
|
return c.execute("SELECT * FROM sources WHERE id=1").fetchone()
|
|
|
|
|
|
def test_429_sets_retry_after_without_streak(monkeypatch):
|
|
c = connect(":memory:"); init_db(c); src = _src(c)
|
|
def boom(url, timeout=20):
|
|
raise feeds.RateLimited("HTTP 429", retry_after_at="2030-01-01 00:00:00")
|
|
monkeypatch.setattr(feeds, "fetch_feed", boom)
|
|
res = feeds.poll_source(c, src)
|
|
assert res["status"] == "rate_limited"
|
|
row = c.execute("SELECT consecutive_failures, retry_after_at FROM sources WHERE id=1").fetchone()
|
|
assert row["consecutive_failures"] == 0 # NOT inflated
|
|
assert row["retry_after_at"] == "2030-01-01 00:00:00"
|
|
# and it's not due while resting
|
|
assert [s["id"] for s in feeds.due_source_rows(c)] == []
|
|
|
|
|
|
def test_success_clears_retry_after(monkeypatch):
|
|
c = connect(":memory:"); init_db(c); src = _src(c)
|
|
c.execute("UPDATE sources SET retry_after_at='2030-01-01 00:00:00', consecutive_failures=2 WHERE id=1")
|
|
c.commit()
|
|
monkeypatch.setattr(feeds, "fetch_feed", lambda url, timeout=20: b"<rss><channel></channel></rss>")
|
|
feeds.poll_source(c, src)
|
|
row = c.execute("SELECT consecutive_failures, retry_after_at FROM sources WHERE id=1").fetchone()
|
|
assert row["retry_after_at"] is None and row["consecutive_failures"] == 0
|
|
|
|
|
|
def test_non_429_failure_still_increments_streak(monkeypatch):
|
|
c = connect(":memory:"); init_db(c); src = _src(c)
|
|
monkeypatch.setattr(feeds, "fetch_feed", lambda url, timeout=20: (_ for _ in ()).throw(RuntimeError("HTTP 500")))
|
|
res = feeds.poll_source(c, src)
|
|
assert res["status"] == "failed"
|
|
assert c.execute("SELECT consecutive_failures FROM sources WHERE id=1").fetchone()[0] == 1
|
|
|
|
|
|
def test_source_health_next_due_uses_later_of_backoff_and_retry_after():
|
|
from goodnews import queries
|
|
c = connect(":memory:"); init_db(c); _src(c)
|
|
# a recent attempt (streak due ~soon) but a far-future retry_after_at
|
|
c.execute("INSERT INTO ingest_runs (source_id, finished_at, status) VALUES (1, datetime('now'), 'rate_limited')")
|
|
c.execute("UPDATE sources SET retry_after_at = '2099-01-01 00:00:00' WHERE id = 1")
|
|
c.commit()
|
|
s = next(x for x in queries.source_health(c) if x["id"] == 1)
|
|
assert s["next_due_at"] == "2099-01-01 00:00:00" # agrees with the real gate, not the streak time
|