#!/usr/bin/env python3 """PROTOTYPE substance audit (not production). The classifier scores emotional TONE (cortisol/ragebait/constructive) but not SUBSTANCE, so pleasant-but-empty filler (evergreen how-tos, B2B SEO, product listicles, recipes) slips through. Before adding a `not_newsworthy` rejection dimension to the live classifier, measure whether the model can reliably tell genuine news from filler against Codex's rubric, and what the reject rate would be. Read-only over a sample; writes a scratch JSON + prints a report. Does NOT change the classifier or reject anything. .venv/bin/python scripts/substance_audit.py --limit 250 --base-url http://127.0.0.1:8080/v1 """ from __future__ import annotations import argparse import json from collections import Counter from pathlib import Path from goodnews.cli import _default_db from goodnews.db import connect from goodnews.llm import LocalModelClient, parse_classifier_json # Codex's rubric. KEEP = real news; the rest are "positive but not news" filler. KINDS = ("news_event", "finding", "announcement", "feature_human_interest", "evergreen_advice", "marketing", "product_listicle", "opinion", "other") FILLER = {"evergreen_advice", "marketing", "product_listicle"} SYSTEM = ( "You judge whether a story is genuine NEWS or content-mill filler for a calm " "good-news site. GOOD (keep): a specific event or achievement, a recent " "development, a research finding, a credible announcement, or human/community/" "science/environmental uplift tied to something that actually happened. FILLER " "(not news): evergreen how-to/advice, marketing or B2B service explainers, generic " "'why X matters' SEO pieces, product round-ups/listicles, recipes. Judge SUBSTANCE, " "not tone — pleasant and non-negative is NOT the same as newsworthy. When genuinely " "unsure, lean KEEP (don't reject real good news). Reply with ONLY a JSON object." ) INSTRUCT = ( "Return JSON exactly like:\n" '{"kind": "", "newsworthy": , ' '"confidence": "", "rationale": ""}' ) def fetch(conn, limit): return conn.execute( """SELECT a.id, a.title, a.description, src.name AS source, sm.summary, sm.what_happened, sm.why_matters FROM articles a JOIN sources src ON src.id = a.source_id JOIN article_scores s ON s.article_id = a.id LEFT JOIN article_summaries sm ON sm.article_id = a.id WHERE s.accepted = 1 AND a.duplicate_of IS NULL ORDER BY a.discovered_at DESC LIMIT ?""", (limit,)).fetchall() def text(r): parts = [f"SOURCE: {r['source']}", f"TITLE: {r['title']}"] for lbl, k in (("SUMMARY", "summary"), ("WHAT HAPPENED", "what_happened"), ("WHY IT MATTERS", "why_matters"), ("BLURB", "description")): if r[k]: parts.append(f"{lbl}: {r[k]}") return "\n".join(parts) def judge(client, r): data = parse_classifier_json(client.chat_text([ {"role": "system", "content": SYSTEM}, {"role": "user", "content": text(r) + "\n\n" + INSTRUCT}, ])) kind = data.get("kind") if data.get("kind") in KINDS else "other" return {"kind": kind, "newsworthy": bool(data.get("newsworthy", True)), "confidence": data.get("confidence") if data.get("confidence") in ("high", "medium", "low") else "low", "rationale": (str(data.get("rationale") or "")[:200]) or None} def main(): ap = argparse.ArgumentParser() ap.add_argument("--db", default=None) ap.add_argument("--limit", type=int, default=250) ap.add_argument("--out", default="data/substance_audit.json") ap.add_argument("--base-url", default=None) ap.add_argument("--model", default=None) args = ap.parse_args() conn = connect(args.db or str(_default_db())) client = LocalModelClient.from_env() if args.base_url: client.base_url = args.base_url.rstrip("/") if args.model: client.model = args.model out = Path(args.out) res = json.loads(out.read_text()) if out.exists() else {} rows = fetch(conn, args.limit) by_id = {str(r["id"]): r for r in rows} done = 0 for r in rows: rid = str(r["id"]) if rid in res: continue try: res[rid] = judge(client, r) except Exception as exc: # noqa: BLE001 — prototype res[rid] = {"kind": "other", "newsworthy": True, "confidence": "low", "rationale": f"ERR {type(exc).__name__}", "error": True} done += 1 if done % 25 == 0: out.write_text(json.dumps(res, indent=1)); print(f" ...{done}") out.write_text(json.dumps(res, indent=1)) conn.close() n = len(res) or 1 kinds = Counter(v["kind"] for v in res.values()) filler = [rid for rid, v in res.items() if (not v["newsworthy"]) or v["kind"] in FILLER] print(f"\n===== SUBSTANCE AUDIT (n={len(res)}) =====") print("Kind:") for k in KINDS: print(f" {k:<24} {kinds.get(k,0):>4} {100*kinds.get(k,0)/n:.0f}%") print(f"\nWould-reject as filler: {len(filler)} ({100*len(filler)/n:.0f}%)") print("Confidence:", dict(Counter(v["confidence"] for v in res.values()))) # by source — which feeds are filler-heavy src = Counter(by_id[rid]["source"] for rid in filler if rid in by_id) print("\nFiller by source (top 12):") for s, c in src.most_common(12): print(f" {c:>3} {s}") print("\n--- sample WOULD-REJECT (eyeball for false positives) ---") for rid in filler[:18]: v = res[rid]; r = by_id.get(rid) if r: print(f" [{r['source'][:16]:16}] {v['kind']:<18} {v['confidence']:<6} | {r['title'][:52]}") print(f" {v['rationale'] or ''}") if __name__ == "__main__": main()