#!/usr/bin/env python3
"""PROTOTYPE substance audit (not production).

The classifier scores emotional TONE (cortisol/ragebait/constructive) but not
SUBSTANCE, so pleasant-but-empty filler (evergreen how-tos, B2B SEO, product
listicles, recipes) slips through. Before adding a `not_newsworthy` rejection
dimension to the live classifier, measure whether the model can reliably tell
genuine news from filler against Codex's rubric, and what the reject rate would be.

Read-only over a sample; writes a scratch JSON + prints a report. Does NOT change
the classifier or reject anything.

  .venv/bin/python scripts/substance_audit.py --limit 250 --base-url http://127.0.0.1:8080/v1
"""
from __future__ import annotations

import argparse
import json
from collections import Counter
from pathlib import Path

from goodnews.cli import _default_db
from goodnews.db import connect
from goodnews.llm import LocalModelClient, parse_classifier_json

# Codex's rubric. KEEP = real news; the rest are "positive but not news" filler.
KINDS = ("news_event", "finding", "announcement", "feature_human_interest",
         "evergreen_advice", "marketing", "product_listicle", "opinion", "other")
FILLER = {"evergreen_advice", "marketing", "product_listicle"}

SYSTEM = (
    "You judge whether a story is genuine NEWS or content-mill filler for a calm "
    "good-news site. GOOD (keep): a specific event or achievement, a recent "
    "development, a research finding, a credible announcement, or human/community/"
    "science/environmental uplift tied to something that actually happened. FILLER "
    "(not news): evergreen how-to/advice, marketing or B2B service explainers, generic "
    "'why X matters' SEO pieces, product round-ups/listicles, recipes. Judge SUBSTANCE, "
    "not tone — pleasant and non-negative is NOT the same as newsworthy. When genuinely "
    "unsure, lean KEEP (don't reject real good news). Reply with ONLY a JSON object."
)
INSTRUCT = (
    "Return JSON exactly like:\n"
    '{"kind": "<news_event|finding|announcement|feature_human_interest|evergreen_advice|'
    'marketing|product_listicle|opinion|other>", "newsworthy": <true|false>, '
    '"confidence": "<high|medium|low>", "rationale": "<one short clause>"}'
)


def fetch(conn, limit):
    return conn.execute(
        """SELECT a.id, a.title, a.description, src.name AS source,
                  sm.summary, sm.what_happened, sm.why_matters
           FROM articles a
           JOIN sources src ON src.id = a.source_id
           JOIN article_scores s ON s.article_id = a.id
           LEFT JOIN article_summaries sm ON sm.article_id = a.id
           WHERE s.accepted = 1 AND a.duplicate_of IS NULL
           ORDER BY a.discovered_at DESC LIMIT ?""", (limit,)).fetchall()


def text(r):
    parts = [f"SOURCE: {r['source']}", f"TITLE: {r['title']}"]
    for lbl, k in (("SUMMARY", "summary"), ("WHAT HAPPENED", "what_happened"),
                   ("WHY IT MATTERS", "why_matters"), ("BLURB", "description")):
        if r[k]:
            parts.append(f"{lbl}: {r[k]}")
    return "\n".join(parts)


def judge(client, r):
    data = parse_classifier_json(client.chat_text([
        {"role": "system", "content": SYSTEM},
        {"role": "user", "content": text(r) + "\n\n" + INSTRUCT},
    ]))
    kind = data.get("kind") if data.get("kind") in KINDS else "other"
    return {"kind": kind,
            "newsworthy": bool(data.get("newsworthy", True)),
            "confidence": data.get("confidence") if data.get("confidence") in ("high", "medium", "low") else "low",
            "rationale": (str(data.get("rationale") or "")[:200]) or None}


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--db", default=None)
    ap.add_argument("--limit", type=int, default=250)
    ap.add_argument("--out", default="data/substance_audit.json")
    ap.add_argument("--base-url", default=None)
    ap.add_argument("--model", default=None)
    args = ap.parse_args()

    conn = connect(args.db or str(_default_db()))
    client = LocalModelClient.from_env()
    if args.base_url:
        client.base_url = args.base_url.rstrip("/")
    if args.model:
        client.model = args.model

    out = Path(args.out)
    res = json.loads(out.read_text()) if out.exists() else {}
    rows = fetch(conn, args.limit)
    by_id = {str(r["id"]): r for r in rows}
    done = 0
    for r in rows:
        rid = str(r["id"])
        if rid in res:
            continue
        try:
            res[rid] = judge(client, r)
        except Exception as exc:  # noqa: BLE001 — prototype
            res[rid] = {"kind": "other", "newsworthy": True, "confidence": "low",
                        "rationale": f"ERR {type(exc).__name__}", "error": True}
        done += 1
        if done % 25 == 0:
            out.write_text(json.dumps(res, indent=1)); print(f"  ...{done}")
    out.write_text(json.dumps(res, indent=1))
    conn.close()

    n = len(res) or 1
    kinds = Counter(v["kind"] for v in res.values())
    filler = [rid for rid, v in res.items() if (not v["newsworthy"]) or v["kind"] in FILLER]
    print(f"\n===== SUBSTANCE AUDIT (n={len(res)}) =====")
    print("Kind:")
    for k in KINDS:
        print(f"  {k:<24} {kinds.get(k,0):>4}  {100*kinds.get(k,0)/n:.0f}%")
    print(f"\nWould-reject as filler: {len(filler)} ({100*len(filler)/n:.0f}%)")
    print("Confidence:", dict(Counter(v["confidence"] for v in res.values())))
    # by source — which feeds are filler-heavy
    src = Counter(by_id[rid]["source"] for rid in filler if rid in by_id)
    print("\nFiller by source (top 12):")
    for s, c in src.most_common(12):
        print(f"  {c:>3}  {s}")
    print("\n--- sample WOULD-REJECT (eyeball for false positives) ---")
    for rid in filler[:18]:
        v = res[rid]; r = by_id.get(rid)
        if r:
            print(f"  [{r['source'][:16]:16}] {v['kind']:<18} {v['confidence']:<6} | {r['title'][:52]}")
            print(f"        {v['rationale'] or ''}")


if __name__ == "__main__":
    main()