Local-first Brief: the landing leads with good news from your home
Per the owner's call (overrides the earlier "Brief sacred" stance): when a home is
set, the homepage opens with local good news first, not global. This is the hook —
you land and see awesome stories from YOUR corner first.
- queries.home_brief: local-first highlights (high/medium-confidence near, blended
out to country then world so it's always a full, strong set), preferring already-
summarized stories so the calm read stays rich. Recent window, ranked within tier.
- /api/brief gains a `home` param: private/no-store when set; over-fetches + caps so
dismissal/boundary filtering never thins it; falls back to global top-up if needed.
- Landing UI: a Local <-> Global toggle ("📍 Near you / 🌍 Everywhere") when a home
is set, the calm picker invite when not (dismissible), and Change. Default leads
local; one tap back to the global brief. No home set => exactly today's behavior.
Backend + frontend tests green.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""PROTOTYPE substance audit (not production).
|
||||
|
||||
The classifier scores emotional TONE (cortisol/ragebait/constructive) but not
|
||||
SUBSTANCE, so pleasant-but-empty filler (evergreen how-tos, B2B SEO, product
|
||||
listicles, recipes) slips through. Before adding a `not_newsworthy` rejection
|
||||
dimension to the live classifier, measure whether the model can reliably tell
|
||||
genuine news from filler against Codex's rubric, and what the reject rate would be.
|
||||
|
||||
Read-only over a sample; writes a scratch JSON + prints a report. Does NOT change
|
||||
the classifier or reject anything.
|
||||
|
||||
.venv/bin/python scripts/substance_audit.py --limit 250 --base-url http://127.0.0.1:8080/v1
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
from goodnews.cli import _default_db
|
||||
from goodnews.db import connect
|
||||
from goodnews.llm import LocalModelClient, parse_classifier_json
|
||||
|
||||
# Codex's rubric. KEEP = real news; the rest are "positive but not news" filler.
|
||||
KINDS = ("news_event", "finding", "announcement", "feature_human_interest",
|
||||
"evergreen_advice", "marketing", "product_listicle", "opinion", "other")
|
||||
FILLER = {"evergreen_advice", "marketing", "product_listicle"}
|
||||
|
||||
SYSTEM = (
|
||||
"You judge whether a story is genuine NEWS or content-mill filler for a calm "
|
||||
"good-news site. GOOD (keep): a specific event or achievement, a recent "
|
||||
"development, a research finding, a credible announcement, or human/community/"
|
||||
"science/environmental uplift tied to something that actually happened. FILLER "
|
||||
"(not news): evergreen how-to/advice, marketing or B2B service explainers, generic "
|
||||
"'why X matters' SEO pieces, product round-ups/listicles, recipes. Judge SUBSTANCE, "
|
||||
"not tone — pleasant and non-negative is NOT the same as newsworthy. When genuinely "
|
||||
"unsure, lean KEEP (don't reject real good news). Reply with ONLY a JSON object."
|
||||
)
|
||||
INSTRUCT = (
|
||||
"Return JSON exactly like:\n"
|
||||
'{"kind": "<news_event|finding|announcement|feature_human_interest|evergreen_advice|'
|
||||
'marketing|product_listicle|opinion|other>", "newsworthy": <true|false>, '
|
||||
'"confidence": "<high|medium|low>", "rationale": "<one short clause>"}'
|
||||
)
|
||||
|
||||
|
||||
def fetch(conn, limit):
|
||||
return conn.execute(
|
||||
"""SELECT a.id, a.title, a.description, src.name AS source,
|
||||
sm.summary, sm.what_happened, sm.why_matters
|
||||
FROM articles a
|
||||
JOIN sources src ON src.id = a.source_id
|
||||
JOIN article_scores s ON s.article_id = a.id
|
||||
LEFT JOIN article_summaries sm ON sm.article_id = a.id
|
||||
WHERE s.accepted = 1 AND a.duplicate_of IS NULL
|
||||
ORDER BY a.discovered_at DESC LIMIT ?""", (limit,)).fetchall()
|
||||
|
||||
|
||||
def text(r):
|
||||
parts = [f"SOURCE: {r['source']}", f"TITLE: {r['title']}"]
|
||||
for lbl, k in (("SUMMARY", "summary"), ("WHAT HAPPENED", "what_happened"),
|
||||
("WHY IT MATTERS", "why_matters"), ("BLURB", "description")):
|
||||
if r[k]:
|
||||
parts.append(f"{lbl}: {r[k]}")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def judge(client, r):
|
||||
data = parse_classifier_json(client.chat_text([
|
||||
{"role": "system", "content": SYSTEM},
|
||||
{"role": "user", "content": text(r) + "\n\n" + INSTRUCT},
|
||||
]))
|
||||
kind = data.get("kind") if data.get("kind") in KINDS else "other"
|
||||
return {"kind": kind,
|
||||
"newsworthy": bool(data.get("newsworthy", True)),
|
||||
"confidence": data.get("confidence") if data.get("confidence") in ("high", "medium", "low") else "low",
|
||||
"rationale": (str(data.get("rationale") or "")[:200]) or None}
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--db", default=None)
|
||||
ap.add_argument("--limit", type=int, default=250)
|
||||
ap.add_argument("--out", default="data/substance_audit.json")
|
||||
ap.add_argument("--base-url", default=None)
|
||||
ap.add_argument("--model", default=None)
|
||||
args = ap.parse_args()
|
||||
|
||||
conn = connect(args.db or str(_default_db()))
|
||||
client = LocalModelClient.from_env()
|
||||
if args.base_url:
|
||||
client.base_url = args.base_url.rstrip("/")
|
||||
if args.model:
|
||||
client.model = args.model
|
||||
|
||||
out = Path(args.out)
|
||||
res = json.loads(out.read_text()) if out.exists() else {}
|
||||
rows = fetch(conn, args.limit)
|
||||
by_id = {str(r["id"]): r for r in rows}
|
||||
done = 0
|
||||
for r in rows:
|
||||
rid = str(r["id"])
|
||||
if rid in res:
|
||||
continue
|
||||
try:
|
||||
res[rid] = judge(client, r)
|
||||
except Exception as exc: # noqa: BLE001 — prototype
|
||||
res[rid] = {"kind": "other", "newsworthy": True, "confidence": "low",
|
||||
"rationale": f"ERR {type(exc).__name__}", "error": True}
|
||||
done += 1
|
||||
if done % 25 == 0:
|
||||
out.write_text(json.dumps(res, indent=1)); print(f" ...{done}")
|
||||
out.write_text(json.dumps(res, indent=1))
|
||||
conn.close()
|
||||
|
||||
n = len(res) or 1
|
||||
kinds = Counter(v["kind"] for v in res.values())
|
||||
filler = [rid for rid, v in res.items() if (not v["newsworthy"]) or v["kind"] in FILLER]
|
||||
print(f"\n===== SUBSTANCE AUDIT (n={len(res)}) =====")
|
||||
print("Kind:")
|
||||
for k in KINDS:
|
||||
print(f" {k:<24} {kinds.get(k,0):>4} {100*kinds.get(k,0)/n:.0f}%")
|
||||
print(f"\nWould-reject as filler: {len(filler)} ({100*len(filler)/n:.0f}%)")
|
||||
print("Confidence:", dict(Counter(v["confidence"] for v in res.values())))
|
||||
# by source — which feeds are filler-heavy
|
||||
src = Counter(by_id[rid]["source"] for rid in filler if rid in by_id)
|
||||
print("\nFiller by source (top 12):")
|
||||
for s, c in src.most_common(12):
|
||||
print(f" {c:>3} {s}")
|
||||
print("\n--- sample WOULD-REJECT (eyeball for false positives) ---")
|
||||
for rid in filler[:18]:
|
||||
v = res[rid]; r = by_id.get(rid)
|
||||
if r:
|
||||
print(f" [{r['source'][:16]:16}] {v['kind']:<18} {v['confidence']:<6} | {r['title'][:52]}")
|
||||
print(f" {v['rationale'] or ''}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user