1e190c5e88
- Add source health columns (last_success_at, last_error_at, last_error, consecutive_failures, review_flag, review_reason) via SCHEMA + migration. - poll_source maintains them: success resets the failure streak and records the success time; failure increments it and stores the latest error. - review_sources() flags active sources that are stale, repeatedly failing, low-acceptance, duplicate-heavy, or doom-skewed (high cortisol/ragebait) over a recent window. It is purely advisory: it sets review_flag/review_reason and never changes the active column (human stays in the loop), clearing the flag when a source recovers. - CLI review-sources; cycle runs it as a final step (--no-review to skip); source-report shows a review line for flagged feeds. - Tests: healthy/failing/stale/low-acceptance/recovery and never-deactivates. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
90 lines
2.9 KiB
Python
90 lines
2.9 KiB
Python
from datetime import datetime, timedelta, timezone
|
|
|
|
import pytest
|
|
|
|
from goodnews.db import connect, init_db
|
|
from goodnews.sources import review_sources
|
|
|
|
|
|
@pytest.fixture
|
|
def conn():
|
|
c = connect(":memory:")
|
|
init_db(c)
|
|
yield c
|
|
c.close()
|
|
|
|
|
|
def _source(conn, sid, name, failures=0):
|
|
conn.execute(
|
|
"INSERT INTO sources (id, name, feed_url, trust_score, consecutive_failures) VALUES (?,?,?,5,?)",
|
|
(sid, name, f"http://s{sid}/feed", failures),
|
|
)
|
|
|
|
|
|
def _article(conn, sid, aid, when, accepted=1, cortisol=1, ragebait=0, dup=None):
|
|
conn.execute(
|
|
"INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash, duplicate_of) "
|
|
"VALUES (?,?,?,?,?,?,?)",
|
|
(aid, sid, f"http://s{sid}/{aid}", f"t{aid}", when, f"h{aid}", dup),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO article_scores (article_id, cortisol_score, ragebait_score, accepted) VALUES (?,?,?,?)",
|
|
(aid, cortisol, ragebait, accepted),
|
|
)
|
|
|
|
|
|
def test_healthy_source_not_flagged(conn):
|
|
_source(conn, 1, "Healthy")
|
|
now = datetime.now(timezone.utc)
|
|
for i in range(20):
|
|
_article(conn, 1, i, now.isoformat(), accepted=1, cortisol=1, ragebait=0)
|
|
conn.commit()
|
|
assert review_sources(conn) == []
|
|
assert conn.execute("SELECT review_flag FROM sources WHERE id=1").fetchone()["review_flag"] == 0
|
|
|
|
|
|
def test_repeated_failures_flagged(conn):
|
|
_source(conn, 1, "Flaky", failures=4)
|
|
conn.commit()
|
|
flagged = review_sources(conn)
|
|
assert len(flagged) == 1 and "failing" in flagged[0]["reason"]
|
|
|
|
|
|
def test_stale_source_flagged(conn):
|
|
_source(conn, 1, "Stale")
|
|
old = (datetime.now(timezone.utc) - timedelta(days=40)).isoformat()
|
|
_article(conn, 1, 1, old)
|
|
conn.commit()
|
|
flagged = review_sources(conn)
|
|
assert "stale" in flagged[0]["reason"]
|
|
|
|
|
|
def test_low_acceptance_and_cortisol_flagged(conn):
|
|
_source(conn, 1, "Doomy")
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
for i in range(20):
|
|
_article(conn, 1, i, now, accepted=0, cortisol=8, ragebait=0)
|
|
conn.commit()
|
|
reason = review_sources(conn)[0]["reason"]
|
|
assert "low acceptance" in reason and "high cortisol" in reason
|
|
|
|
|
|
def test_review_never_deactivates(conn):
|
|
_source(conn, 1, "Flaky", failures=9)
|
|
conn.commit()
|
|
review_sources(conn)
|
|
assert conn.execute("SELECT active FROM sources WHERE id=1").fetchone()["active"] == 1
|
|
|
|
|
|
def test_recovered_source_flag_cleared(conn):
|
|
_source(conn, 1, "Recovered", failures=5)
|
|
conn.commit()
|
|
review_sources(conn) # flagged
|
|
conn.execute("UPDATE sources SET consecutive_failures=0 WHERE id=1")
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
for i in range(20):
|
|
_article(conn, 1, i, now, accepted=1, cortisol=1)
|
|
conn.commit()
|
|
review_sources(conn) # should clear
|
|
assert conn.execute("SELECT review_flag FROM sources WHERE id=1").fetchone()["review_flag"] == 0
|