Advisory source health: review flags, never auto-deactivate

- Add source health columns (last_success_at, last_error_at, last_error, consecutive_failures, review_flag, review_reason) via SCHEMA + migration. - poll_source maintains them: success resets the failure streak and records the success time; failure increments it and stores the latest error. - review_sources() flags active sources that are stale, repeatedly failing, low-acceptance, duplicate-heavy, or doom-skewed (high cortisol/ragebait) over a recent window. It is purely advisory: it sets review_flag/review_reason and never changes the active column (human stays in the loop), clearing the flag when a source recovers. - CLI review-sources; cycle runs it as a final step (--no-review to skip); source-report shows a review line for flagged feeds. - Tests: healthy/failing/stale/low-acceptance/recovery and never-deactivates. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 20:28:18 +00:00
parent aa4125ddec
commit 1e190c5e88
6 changed files with 243 additions and 4 deletions
@@ -22,6 +22,7 @@ python3 -m goodnews suggest-source https://example.com/feed/ --name "Example" --
 python3 -m goodnews list-candidates
 python3 -m goodnews promote-candidate 1        # copies into sources (inactive by default)
 python3 -m goodnews reject-candidate 1
 python3 -m goodnews review-sources             # advisory health flags (never deactivates)
 python3 -m goodnews build-brief --date 2026-05-27 --replace
 python3 -m goodnews show-brief
 python3 -m goodnews list-recent --limit 10
@@ -161,7 +162,7 @@ often as you like — it only polls sources that are *due* (per each source's
 rebuilds the current day's brief:
 ```bash
-python3 -m goodnews cycle                 # poll due -> classify new -> dedup -> rebuild today's brief
+python3 -m goodnews cycle                 # poll due -> classify -> dedup -> brief -> review flags
 python3 -m goodnews cycle --force         # poll every active source regardless of interval
 python3 -m goodnews cycle --no-classify   # skip the LLM step (e.g. model box offline)
 ```
@@ -195,9 +196,11 @@ Still ahead:
   previews a feed and stages it in the `source_candidates` table (status
   suggested/quarantined/rejected/promoted); `promote-candidate` copies it into
   `sources` (inactive by default — active on approval); promotion is never
-   automatic. Still ahead: advisory auto-degrade of stale/rejecting feeds (flag
+   automatic. Advisory health is done too: `review-sources` (also run at the end
-   for review, never auto-block), and an authenticated POST surface so the website
+   of `cycle`) flags stale, failing, low-acceptance, duplicate-heavy, or
-   can accept public suggestions once accounts exist.
+   doom-skewed feeds for human review — it never deactivates anything. Still
   ahead: an authenticated POST surface so the website can accept public
   suggestions once accounts exist.
 2. **Learned "Less like this" weighting** — replace the interim flavor-pause with
   real preference down-ranking.
 3. **Corpus rebalancing** — add calm/feelgood sources (currently science-heavy).
@@ -24,6 +24,7 @@ from .sources import (
    load_sources,
    promote_candidate,
    reject_candidate,
    review_sources,
    save_candidate,
    upsert_sources,
 )
@@ -95,6 +96,11 @@ def main() -> None:
    reject_parser = subparsers.add_parser("reject-candidate", help="Mark a candidate as rejected")
    reject_parser.add_argument("id", type=int)
    review_parser = subparsers.add_parser(
        "review-sources", help="Recompute advisory review flags (never deactivates anything)"
    )
    review_parser.add_argument("--stale-days", type=int, default=14)
    runs_parser = subparsers.add_parser("list-runs", help="Show recent ingest runs")
    runs_parser.add_argument("--limit", type=int, default=20)
@@ -114,6 +120,7 @@ def main() -> None:
    cycle_parser.add_argument("--no-classify", action="store_true", help="Skip the LLM classify step")
    cycle_parser.add_argument("--no-dedup", action="store_true", help="Skip the embedding dedup step")
    cycle_parser.add_argument("--no-brief", action="store_true", help="Skip rebuilding today's brief")
    cycle_parser.add_argument("--no-review", action="store_true", help="Skip recomputing source review flags")
    cycle_parser.add_argument("--force", action="store_true", help="Poll all active sources, ignoring intervals")
    cycle_parser.add_argument("--base-url", help="OpenAI-compatible base URL for classify")
    cycle_parser.add_argument("--model", help="Local model name for classify")
@@ -218,6 +225,15 @@ def main() -> None:
        init_db(conn)
        ok = reject_candidate(conn, args.id)
        print(f"Rejected candidate #{args.id}." if ok else f"No candidate #{args.id}.")
    elif args.command == "review-sources":
        init_db(conn)
        flagged = review_sources(conn, stale_days=args.stale_days)
        if not flagged:
            print("All active sources look healthy.")
        else:
            print(f"{len(flagged)} source(s) flagged for review (advisory — none deactivated):")
            for f in flagged:
                print(f"  [{f['id']}] {f['name']}: {f['reason']}")
    elif args.command == "list-runs":
        list_runs(conn, limit=args.limit)
    elif args.command == "rescore":
@@ -430,6 +446,13 @@ def _run_cycle_locked(conn: sqlite3.Connection, args: argparse.Namespace) -> Non
        except Exception as exc:
            print(f"brief: skipped ({exc})")
    if not args.no_review:
        try:
            flagged = review_sources(conn)
            print(f"review: {len(flagged)} source(s) flagged for review (advisory)")
        except Exception as exc:
            print(f"review: skipped ({exc})")
 def serve(args: argparse.Namespace) -> None:
    try:
@@ -530,6 +553,9 @@ def source_report(conn: sqlite3.Connection) -> None:
            src.default_category,
            src.trust_score,
            src.pr_risk_score AS source_pr_risk,
            src.review_flag,
            src.review_reason,
            src.consecutive_failures,
            COUNT(a.id) AS articles,
            SUM(CASE WHEN s.accepted = 1 THEN 1 ELSE 0 END) AS accepted,
            ROUND(AVG(s.constructive_score), 1) AS avg_constructive,
@@ -558,6 +584,8 @@ def source_report(conn: sqlite3.Connection) -> None:
            f"avg_ragebait={row['avg_ragebait']}"
        )
        print(f"  newest={row['newest_article'] or 'none'}")
        if row["review_flag"]:
            print(f"  ⚑ review: {row['review_reason']}")
 def list_runs(conn: sqlite3.Connection, limit: int) -> None:
@@ -19,6 +19,12 @@ CREATE TABLE IF NOT EXISTS sources (
    active INTEGER NOT NULL DEFAULT 1,
    poll_interval_minutes INTEGER NOT NULL DEFAULT 60,
    notes TEXT,
    last_success_at TEXT,
    last_error_at TEXT,
    last_error TEXT,
    consecutive_failures INTEGER NOT NULL DEFAULT 0,
    review_flag INTEGER NOT NULL DEFAULT 0,
    review_reason TEXT,
    created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
    updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
 );
@@ -148,3 +154,16 @@ def _migrate(conn: sqlite3.Connection) -> None:
        )
    # Created here (not in SCHEMA) so it runs after the column exists on upgrades.
    conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_duplicate_of ON articles(duplicate_of)")
    source_cols = {row["name"] for row in conn.execute("PRAGMA table_info(sources)")}
    health_columns = {
        "last_success_at": "TEXT",
        "last_error_at": "TEXT",
        "last_error": "TEXT",
        "consecutive_failures": "INTEGER NOT NULL DEFAULT 0",
        "review_flag": "INTEGER NOT NULL DEFAULT 0",
        "review_reason": "TEXT",
    }
    for column, decl in health_columns.items():
        if column not in source_cols:
            conn.execute(f"ALTER TABLE sources ADD COLUMN {column} {decl}")
@@ -108,6 +108,15 @@ def poll_source(conn: sqlite3.Connection, source: sqlite3.Row) -> dict:
            """,
            (seen, inserted, duplicate, run_id),
        )
        # A clean poll resets the failure streak and records the success time.
        conn.execute(
            """
            UPDATE sources
            SET last_success_at = CURRENT_TIMESTAMP, consecutive_failures = 0
            WHERE id = ?
            """,
            (source["id"],),
        )
        conn.commit()
        return {"status": "ok", "seen": seen, "inserted": inserted, "duplicate": duplicate}
    except Exception as exc:
@@ -124,6 +133,17 @@ def poll_source(conn: sqlite3.Connection, source: sqlite3.Row) -> dict:
            """,
            (seen, inserted, duplicate, str(exc), run_id),
        )
        # Track the failure streak and latest error for advisory review flags.
        conn.execute(
            """
            UPDATE sources
            SET consecutive_failures = consecutive_failures + 1,
                last_error_at = CURRENT_TIMESTAMP,
                last_error = ?
            WHERE id = ?
            """,
            (str(exc), source["id"]),
        )
        conn.commit()
        return {
            "status": "failed",
@@ -3,6 +3,7 @@ from __future__ import annotations
 import json
 import sqlite3
 import tomllib
 from datetime import datetime, timezone
 from pathlib import Path
 from urllib.parse import urlsplit
@@ -153,3 +154,82 @@ def promote_candidate(
    row = conn.execute("SELECT id FROM sources WHERE feed_url = ?", (cand["feed_url"],)).fetchone()
    return int(row["id"])
 # --- Advisory source health: flag for review, never auto-deactivate -----------
 def review_sources(
    conn: sqlite3.Connection,
    stale_days: int = 14,
    min_recent: int = 15,
    recent_window: int = 40,
 ) -> list[dict]:
    """Recompute advisory review flags for active sources.
    Sets review_flag/review_reason but NEVER changes `active` — the human stays
    in the loop. Returns the list of newly-flagged sources.
    """
    now = datetime.now(timezone.utc)
    flagged = []
    sources = conn.execute(
        "SELECT id, name, consecutive_failures FROM sources WHERE active = 1"
    ).fetchall()
    for s in sources:
        reasons: list[str] = []
        if (s["consecutive_failures"] or 0) >= 3:
            reasons.append(f"failing ({s['consecutive_failures']} consecutive)")
        recent = conn.execute(
            """
            SELECT sc.accepted, sc.cortisol_score, sc.ragebait_score, a.duplicate_of,
                   COALESCE(a.published_at, a.discovered_at) AS dt
            FROM articles a
            JOIN article_scores sc ON sc.article_id = a.id
            WHERE a.source_id = ?
            ORDER BY COALESCE(a.published_at, a.discovered_at) DESC
            LIMIT ?
            """,
            (s["id"], recent_window),
        ).fetchall()
        n = len(recent)
        if n == 0:
            reasons.append("no articles yet")
        else:
            try:
                newest = datetime.fromisoformat(recent[0]["dt"])
                if newest.tzinfo is None:
                    newest = newest.replace(tzinfo=timezone.utc)
                age = (now - newest).days
                if age > stale_days:
                    reasons.append(f"stale (newest {age}d ago)")
            except (ValueError, TypeError):
                pass
            if n >= min_recent:
                acc = sum(r["accepted"] or 0 for r in recent) / n
                if acc < 0.10:
                    reasons.append(f"low acceptance ({acc * 100:.0f}%)")
                dup = sum(1 for r in recent if r["duplicate_of"] is not None) / n
                if dup > 0.5:
                    reasons.append(f"duplicate-heavy ({dup * 100:.0f}%)")
                avg_cort = sum(r["cortisol_score"] or 0 for r in recent) / n
                if avg_cort > 5:
                    reasons.append(f"high cortisol (avg {avg_cort:.1f})")
                avg_rage = sum(r["ragebait_score"] or 0 for r in recent) / n
                if avg_rage > 3:
                    reasons.append(f"high ragebait (avg {avg_rage:.1f})")
        flag = 1 if reasons else 0
        reason = "; ".join(reasons) if reasons else None
        conn.execute(
            "UPDATE sources SET review_flag = ?, review_reason = ? WHERE id = ?",
            (flag, reason, s["id"]),
        )
        if flag:
            flagged.append({"id": s["id"], "name": s["name"], "reason": reason})
    conn.commit()
    return flagged
@@ -0,0 +1,89 @@
 from datetime import datetime, timedelta, timezone
 import pytest
 from goodnews.db import connect, init_db
 from goodnews.sources import review_sources
@pytest.fixture
 def conn():
    c = connect(":memory:")
    init_db(c)
    yield c
    c.close()
 def _source(conn, sid, name, failures=0):
    conn.execute(
        "INSERT INTO sources (id, name, feed_url, trust_score, consecutive_failures) VALUES (?,?,?,5,?)",
        (sid, name, f"http://s{sid}/feed", failures),
    )
 def _article(conn, sid, aid, when, accepted=1, cortisol=1, ragebait=0, dup=None):
    conn.execute(
        "INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash, duplicate_of) "
        "VALUES (?,?,?,?,?,?,?)",
        (aid, sid, f"http://s{sid}/{aid}", f"t{aid}", when, f"h{aid}", dup),
    )
    conn.execute(
        "INSERT INTO article_scores (article_id, cortisol_score, ragebait_score, accepted) VALUES (?,?,?,?)",
        (aid, cortisol, ragebait, accepted),
    )
 def test_healthy_source_not_flagged(conn):
    _source(conn, 1, "Healthy")
    now = datetime.now(timezone.utc)
    for i in range(20):
        _article(conn, 1, i, now.isoformat(), accepted=1, cortisol=1, ragebait=0)
    conn.commit()
    assert review_sources(conn) == []
    assert conn.execute("SELECT review_flag FROM sources WHERE id=1").fetchone()["review_flag"] == 0
 def test_repeated_failures_flagged(conn):
    _source(conn, 1, "Flaky", failures=4)
    conn.commit()
    flagged = review_sources(conn)
    assert len(flagged) == 1 and "failing" in flagged[0]["reason"]
 def test_stale_source_flagged(conn):
    _source(conn, 1, "Stale")
    old = (datetime.now(timezone.utc) - timedelta(days=40)).isoformat()
    _article(conn, 1, 1, old)
    conn.commit()
    flagged = review_sources(conn)
    assert "stale" in flagged[0]["reason"]
 def test_low_acceptance_and_cortisol_flagged(conn):
    _source(conn, 1, "Doomy")
    now = datetime.now(timezone.utc).isoformat()
    for i in range(20):
        _article(conn, 1, i, now, accepted=0, cortisol=8, ragebait=0)
    conn.commit()
    reason = review_sources(conn)[0]["reason"]
    assert "low acceptance" in reason and "high cortisol" in reason
 def test_review_never_deactivates(conn):
    _source(conn, 1, "Flaky", failures=9)
    conn.commit()
    review_sources(conn)
    assert conn.execute("SELECT active FROM sources WHERE id=1").fetchone()["active"] == 1
 def test_recovered_source_flag_cleared(conn):
    _source(conn, 1, "Recovered", failures=5)
    conn.commit()
    review_sources(conn)  # flagged
    conn.execute("UPDATE sources SET consecutive_failures=0 WHERE id=1")
    now = datetime.now(timezone.utc).isoformat()
    for i in range(20):
        _article(conn, 1, i, now, accepted=1, cortisol=1)
    conn.commit()
    review_sources(conn)  # should clear
    assert conn.execute("SELECT review_flag FROM sources WHERE id=1").fetchone()["review_flag"] == 0