Local-first Brief: the landing leads with good news from your home

Per the owner's call (overrides the earlier "Brief sacred" stance): when a home is set, the homepage opens with local good news first, not global. This is the hook — you land and see awesome stories from YOUR corner first. - queries.home_brief: local-first highlights (high/medium-confidence near, blended out to country then world so it's always a full, strong set), preferring already- summarized stories so the calm read stays rich. Recent window, ranked within tier. - /api/brief gains a `home` param: private/no-store when set; over-fetches + caps so dismissal/boundary filtering never thins it; falls back to global top-up if needed. - Landing UI: a Local <-> Global toggle ("📍 Near you / 🌍 Everywhere") when a home is set, the calm picker invite when not (dismissible), and Change. Default leads local; one tap back to the global brief. No home set => exactly today's behavior. Backend + frontend tests green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-19 21:36:18 -04:00
parent 2239549799
commit d2a6293a13
7 changed files with 1784 additions and 16 deletions
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""PROTOTYPE substance audit (not production).
+
+The classifier scores emotional TONE (cortisol/ragebait/constructive) but not
+SUBSTANCE, so pleasant-but-empty filler (evergreen how-tos, B2B SEO, product
+listicles, recipes) slips through. Before adding a `not_newsworthy` rejection
+dimension to the live classifier, measure whether the model can reliably tell
+genuine news from filler against Codex's rubric, and what the reject rate would be.
+
+Read-only over a sample; writes a scratch JSON + prints a report. Does NOT change
+the classifier or reject anything.
+
+  .venv/bin/python scripts/substance_audit.py --limit 250 --base-url http://127.0.0.1:8080/v1
+"""
+from __future__ import annotations
+
+import argparse
+import json
+from collections import Counter
+from pathlib import Path
+
+from goodnews.cli import _default_db
+from goodnews.db import connect
+from goodnews.llm import LocalModelClient, parse_classifier_json
+
+# Codex's rubric. KEEP = real news; the rest are "positive but not news" filler.
+KINDS = ("news_event", "finding", "announcement", "feature_human_interest",
+         "evergreen_advice", "marketing", "product_listicle", "opinion", "other")
+FILLER = {"evergreen_advice", "marketing", "product_listicle"}
+
+SYSTEM = (
+    "You judge whether a story is genuine NEWS or content-mill filler for a calm "
+    "good-news site. GOOD (keep): a specific event or achievement, a recent "
+    "development, a research finding, a credible announcement, or human/community/"
+    "science/environmental uplift tied to something that actually happened. FILLER "
+    "(not news): evergreen how-to/advice, marketing or B2B service explainers, generic "
+    "'why X matters' SEO pieces, product round-ups/listicles, recipes. Judge SUBSTANCE, "
+    "not tone — pleasant and non-negative is NOT the same as newsworthy. When genuinely "
+    "unsure, lean KEEP (don't reject real good news). Reply with ONLY a JSON object."
+)
+INSTRUCT = (
+    "Return JSON exactly like:\n"
+    '{"kind": "<news_event|finding|announcement|feature_human_interest|evergreen_advice|'
+    'marketing|product_listicle|opinion|other>", "newsworthy": <true|false>, '
+    '"confidence": "<high|medium|low>", "rationale": "<one short clause>"}'
+)
+
+
+def fetch(conn, limit):
+    return conn.execute(
+        """SELECT a.id, a.title, a.description, src.name AS source,
+                  sm.summary, sm.what_happened, sm.why_matters
+           FROM articles a
+           JOIN sources src ON src.id = a.source_id
+           JOIN article_scores s ON s.article_id = a.id
+           LEFT JOIN article_summaries sm ON sm.article_id = a.id
+           WHERE s.accepted = 1 AND a.duplicate_of IS NULL
+           ORDER BY a.discovered_at DESC LIMIT ?""", (limit,)).fetchall()
+
+
+def text(r):
+    parts = [f"SOURCE: {r['source']}", f"TITLE: {r['title']}"]
+    for lbl, k in (("SUMMARY", "summary"), ("WHAT HAPPENED", "what_happened"),
+                   ("WHY IT MATTERS", "why_matters"), ("BLURB", "description")):
+        if r[k]:
+            parts.append(f"{lbl}: {r[k]}")
+    return "\n".join(parts)
+
+
+def judge(client, r):
+    data = parse_classifier_json(client.chat_text([
+        {"role": "system", "content": SYSTEM},
+        {"role": "user", "content": text(r) + "\n\n" + INSTRUCT},
+    ]))
+    kind = data.get("kind") if data.get("kind") in KINDS else "other"
+    return {"kind": kind,
+            "newsworthy": bool(data.get("newsworthy", True)),
+            "confidence": data.get("confidence") if data.get("confidence") in ("high", "medium", "low") else "low",
+            "rationale": (str(data.get("rationale") or "")[:200]) or None}
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--db", default=None)
+    ap.add_argument("--limit", type=int, default=250)
+    ap.add_argument("--out", default="data/substance_audit.json")
+    ap.add_argument("--base-url", default=None)
+    ap.add_argument("--model", default=None)
+    args = ap.parse_args()
+
+    conn = connect(args.db or str(_default_db()))
+    client = LocalModelClient.from_env()
+    if args.base_url:
+        client.base_url = args.base_url.rstrip("/")
+    if args.model:
+        client.model = args.model
+
+    out = Path(args.out)
+    res = json.loads(out.read_text()) if out.exists() else {}
+    rows = fetch(conn, args.limit)
+    by_id = {str(r["id"]): r for r in rows}
+    done = 0
+    for r in rows:
+        rid = str(r["id"])
+        if rid in res:
+            continue
+        try:
+            res[rid] = judge(client, r)
+        except Exception as exc:  # noqa: BLE001 — prototype
+            res[rid] = {"kind": "other", "newsworthy": True, "confidence": "low",
+                        "rationale": f"ERR {type(exc).__name__}", "error": True}
+        done += 1
+        if done % 25 == 0:
+            out.write_text(json.dumps(res, indent=1)); print(f"  ...{done}")
+    out.write_text(json.dumps(res, indent=1))
+    conn.close()
+
+    n = len(res) or 1
+    kinds = Counter(v["kind"] for v in res.values())
+    filler = [rid for rid, v in res.items() if (not v["newsworthy"]) or v["kind"] in FILLER]
+    print(f"\n===== SUBSTANCE AUDIT (n={len(res)}) =====")
+    print("Kind:")
+    for k in KINDS:
+        print(f"  {k:<24} {kinds.get(k,0):>4}  {100*kinds.get(k,0)/n:.0f}%")
+    print(f"\nWould-reject as filler: {len(filler)} ({100*len(filler)/n:.0f}%)")
+    print("Confidence:", dict(Counter(v["confidence"] for v in res.values())))
+    # by source — which feeds are filler-heavy
+    src = Counter(by_id[rid]["source"] for rid in filler if rid in by_id)
+    print("\nFiller by source (top 12):")
+    for s, c in src.most_common(12):
+        print(f"  {c:>3}  {s}")
+    print("\n--- sample WOULD-REJECT (eyeball for false positives) ---")
+    for rid in filler[:18]:
+        v = res[rid]; r = by_id.get(rid)
+        if r:
+            print(f"  [{r['source'][:16]:16}] {v['kind']:<18} {v['confidence']:<6} | {r['title'][:52]}")
+            print(f"        {v['rationale'] or ''}")
+
+
+if __name__ == "__main__":
+    main()