Local-first Brief: the landing leads with good news from your home
Per the owner's call (overrides the earlier "Brief sacred" stance): when a home is
set, the homepage opens with local good news first, not global. This is the hook —
you land and see awesome stories from YOUR corner first.
- queries.home_brief: local-first highlights (high/medium-confidence near, blended
out to country then world so it's always a full, strong set), preferring already-
summarized stories so the calm read stays rich. Recent window, ranked within tier.
- /api/brief gains a `home` param: private/no-store when set; over-fetches + caps so
dismissal/boundary filtering never thins it; falls back to global top-up if needed.
- Landing UI: a Local <-> Global toggle ("📍 Near you / 🌍 Everywhere") when a home
is set, the calm picker invite when not (dismissible), and Change. Default leads
local; one tap back to the global brief. No home set => exactly today's behavior.
Backend + frontend tests green.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -69,7 +69,8 @@
|
|||||||
let homeValue = $state('');
|
let homeValue = $state('');
|
||||||
let homePromptDismissed = $state(false);
|
let homePromptDismissed = $state(false);
|
||||||
let feedNextOffset = $state(null);
|
let feedNextOffset = $state(null);
|
||||||
const homeActive = () => selected === 'browse' && !!homeValue;
|
let showGlobalBrief = $state(false); // toggle: see the global brief even with a home set
|
||||||
|
const homeActive = () => selected === 'latest' && !!homeValue;
|
||||||
let showSignIn = $state(false);
|
let showSignIn = $state(false);
|
||||||
let showSaved = $state(false); // Saved flyout
|
let showSaved = $state(false); // Saved flyout
|
||||||
let loading = $state(true);
|
let loading = $state(true);
|
||||||
@@ -248,15 +249,17 @@
|
|||||||
// Instant-paint and the merge only reuse a saved brief when this still matches,
|
// Instant-paint and the merge only reuse a saved brief when this still matches,
|
||||||
// so a boundary change can never briefly resurface content it should now hide.
|
// so a boundary change can never briefly resurface content it should now hide.
|
||||||
function briefSig() {
|
function briefSig() {
|
||||||
return P.param(prefs.data) + '|' + Array.from(dismissed).sort().join(',');
|
const h = homeValue && !showGlobalBrief ? homeValue : '';
|
||||||
|
return P.param(prefs.data) + '|' + Array.from(dismissed).sort().join(',') + '|h:' + h;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function loadToday(fresh) {
|
async function loadToday(fresh) {
|
||||||
const q = P.param(prefs.data);
|
const q = P.param(prefs.data);
|
||||||
const ex = Array.from(dismissed).join(',');
|
const ex = Array.from(dismissed).join(',');
|
||||||
let fetched;
|
let fetched;
|
||||||
|
const homeq = homeValue && !showGlobalBrief ? `&home=${encodeURIComponent(homeValue)}` : '';
|
||||||
try {
|
try {
|
||||||
fetched = await getJSON(`/api/brief?limit=7${q ? '&' + q : ''}${ex ? '&exclude=' + ex : ''}`);
|
fetched = await getJSON(`/api/brief?limit=7${homeq}${q ? '&' + q : ''}${ex ? '&exclude=' + ex : ''}`);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (brief) return; // already showing a saved brief — a failed background refresh stays invisible
|
if (brief) return; // already showing a saved brief — a failed background refresh stays invisible
|
||||||
throw e; // true first load with nothing painted → let the caller surface the error
|
throw e; // true first load with nothing painted → let the caller surface the error
|
||||||
@@ -295,7 +298,9 @@
|
|||||||
}
|
}
|
||||||
if (key === 'latest') {
|
if (key === 'latest') {
|
||||||
const q = P.param(prefs.data);
|
const q = P.param(prefs.data);
|
||||||
return `/api/feed?sort=latest&limit=${PAGE}&offset=${offset}${q ? '&' + q : ''}${exq}`;
|
// Closer to Home lives on the all-news browse lane (Latest).
|
||||||
|
const homeq = homeValue ? `&home=${encodeURIComponent(homeValue)}` : '';
|
||||||
|
return `/api/feed?sort=latest&limit=${PAGE}&offset=${offset}${homeq}${q ? '&' + q : ''}${exq}`;
|
||||||
}
|
}
|
||||||
if (key === 'following') {
|
if (key === 'following') {
|
||||||
const q = P.param(prefs.data);
|
const q = P.param(prefs.data);
|
||||||
@@ -312,8 +317,7 @@
|
|||||||
return `/api/feed?source_id=${encodeURIComponent(key.slice(7))}&sort=latest&limit=${PAGE}&offset=${offset}${q ? '&' + q : ''}${exq}`;
|
return `/api/feed?source_id=${encodeURIComponent(key.slice(7))}&sort=latest&limit=${PAGE}&offset=${offset}${q ? '&' + q : ''}${exq}`;
|
||||||
}
|
}
|
||||||
const q = P.param(P.merge(prefs.data, viewFilter(key)));
|
const q = P.param(P.merge(prefs.data, viewFilter(key)));
|
||||||
const homeq = homeValue ? `&home=${encodeURIComponent(homeValue)}` : '';
|
return `/api/feed?limit=${PAGE}&offset=${offset}${q ? '&' + q : ''}${exq}`;
|
||||||
return `/api/feed?limit=${PAGE}&offset=${offset}${homeq}${q ? '&' + q : ''}${exq}`;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// All navigation goes through the URL (goto), so browser Back/Forward and the
|
// All navigation goes through the URL (goto), so browser Back/Forward and the
|
||||||
@@ -354,7 +358,7 @@
|
|||||||
feed = items;
|
feed = items;
|
||||||
feedNextOffset = resp.next_offset ?? null;
|
feedNextOffset = resp.next_offset ?? null;
|
||||||
// Home lane pages by the API's world cursor; other lanes by simple length.
|
// Home lane pages by the API's world cursor; other lanes by simple length.
|
||||||
feedDone = (key === 'browse' && homeValue) ? feedNextOffset == null : items.length < PAGE;
|
feedDone = (key === 'latest' && homeValue) ? feedNextOffset == null : items.length < PAGE;
|
||||||
markDisplayed(feed);
|
markDisplayed(feed);
|
||||||
if (key.startsWith('source:') && items[0]) {
|
if (key.startsWith('source:') && items[0]) {
|
||||||
sourceNames = { ...sourceNames, [key.slice(7)]: items[0].source };
|
sourceNames = { ...sourceNames, [key.slice(7)]: items[0].source };
|
||||||
@@ -499,8 +503,15 @@
|
|||||||
// --- Closer to Home: opt-in, localStorage-only, easy to clear ---
|
// --- Closer to Home: opt-in, localStorage-only, easy to clear ---
|
||||||
function setHome(v) {
|
function setHome(v) {
|
||||||
homeValue = v || '';
|
homeValue = v || '';
|
||||||
|
showGlobalBrief = false; // a fresh home choice leads with local
|
||||||
try { v ? localStorage.setItem('goodnews:home', v) : localStorage.removeItem('goodnews:home'); } catch { /* ignore */ }
|
try { v ? localStorage.setItem('goodnews:home', v) : localStorage.removeItem('goodnews:home'); } catch { /* ignore */ }
|
||||||
if (selected === 'browse') loadView('browse', true); // re-section the feed now
|
if (selected === 'today') loadToday(true); // re-lead the landing with local
|
||||||
|
else if (selected === 'latest') loadView('latest', true);
|
||||||
|
}
|
||||||
|
// The landing's Local ⟷ Global toggle (only meaningful with a home set).
|
||||||
|
function setBriefScope(global) {
|
||||||
|
showGlobalBrief = global;
|
||||||
|
if (selected === 'today') loadToday(true);
|
||||||
}
|
}
|
||||||
function clearHome() { setHome(''); }
|
function clearHome() { setHome(''); }
|
||||||
function dismissHomePrompt() {
|
function dismissHomePrompt() {
|
||||||
@@ -675,6 +686,32 @@
|
|||||||
{/if}
|
{/if}
|
||||||
{/if}
|
{/if}
|
||||||
{#if brief?.items?.length}
|
{#if brief?.items?.length}
|
||||||
|
{#if homeEditing || (!homeValue && !homePromptDismissed)}
|
||||||
|
<div class="homecard rise">
|
||||||
|
{#if !homeValue}<p class="homecopy">Want your good news closer to home?</p>{/if}
|
||||||
|
<div class="homepick">
|
||||||
|
<select bind:value={pickCountry} aria-label="Country">
|
||||||
|
<option value="">Pick a country…</option>
|
||||||
|
{#each HOME_COUNTRIES as [code, label] (code)}<option value={code}>{label}</option>{/each}
|
||||||
|
</select>
|
||||||
|
{#if pickCountry === 'US'}
|
||||||
|
<select bind:value={pickState} aria-label="State">
|
||||||
|
<option value="">All of the US</option>
|
||||||
|
{#each US_STATES as [code, label] (code)}<option value={code}>{label}</option>{/each}
|
||||||
|
</select>
|
||||||
|
{/if}
|
||||||
|
<button class="hset" onclick={applyHomePick} disabled={!pickCountry}>Show local first</button>
|
||||||
|
{#if homeValue}<button class="linkish" onclick={() => (homeEditing = false)}>Cancel</button>
|
||||||
|
{:else}<button class="linkish" onclick={dismissHomePrompt}>Not now</button>{/if}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{:else if homeValue}
|
||||||
|
<div class="briefscope rise">
|
||||||
|
<button class="bs-btn" class:on={!showGlobalBrief} onclick={() => setBriefScope(false)}>📍 Near you</button>
|
||||||
|
<button class="bs-btn" class:on={showGlobalBrief} onclick={() => setBriefScope(true)}>🌍 Everywhere</button>
|
||||||
|
<button class="linkish bs-change" onclick={openHomeEditor}>Change</button>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
<section class="rise">
|
<section class="rise">
|
||||||
<ArticleCard article={heroArticle} hero onaction={applyAction} onreplace={replaceArticle} ontag={(t) => drill('tag:' + t)} onsource={(id, name) => drill('source:' + id, { id, name })} onview={record} onimageerror={heroImageFailed} />
|
<ArticleCard article={heroArticle} hero onaction={applyAction} onreplace={replaceArticle} ontag={(t) => drill('tag:' + t)} onsource={(id, name) => drill('source:' + id, { id, name })} onview={record} onimageerror={heroImageFailed} />
|
||||||
{#if restArticles.length}
|
{#if restArticles.length}
|
||||||
@@ -715,7 +752,7 @@
|
|||||||
<p class="muted center pad">No highlights yet today — try a calmer filter, or check back soon.</p>
|
<p class="muted center pad">No highlights yet today — try a calmer filter, or check back soon.</p>
|
||||||
{/if}
|
{/if}
|
||||||
{:else if feed.length}
|
{:else if feed.length}
|
||||||
{#if selected === 'browse'}
|
{#if selected === 'latest'}
|
||||||
{#if homeEditing || (!homeValue && !homePromptDismissed)}
|
{#if homeEditing || (!homeValue && !homePromptDismissed)}
|
||||||
<div class="homecard rise">
|
<div class="homecard rise">
|
||||||
<p class="homecopy">Want good news closer to home?</p>
|
<p class="homecopy">Want good news closer to home?</p>
|
||||||
@@ -971,6 +1008,11 @@
|
|||||||
.linkish { background: none; border: none; color: var(--accent-deep); font: inherit; font-size: 0.86rem;
|
.linkish { background: none; border: none; color: var(--accent-deep); font: inherit; font-size: 0.86rem;
|
||||||
cursor: pointer; text-decoration: underline; padding: 0; }
|
cursor: pointer; text-decoration: underline; padding: 0; }
|
||||||
.homebar { font-size: 0.86rem; color: var(--muted); margin: 0 0 16px; }
|
.homebar { font-size: 0.86rem; color: var(--muted); margin: 0 0 16px; }
|
||||||
|
.briefscope { display: flex; gap: 8px; align-items: center; margin: 0 0 16px; }
|
||||||
|
.bs-btn { font: inherit; font-size: 0.88rem; font-weight: 600; padding: 7px 16px; border: 1px solid var(--line);
|
||||||
|
border-radius: 999px; background: var(--bg); color: var(--ink); cursor: pointer; }
|
||||||
|
.bs-btn.on { border-color: var(--accent); background: var(--accent-soft); color: var(--accent-deep); }
|
||||||
|
.bs-change { margin-left: auto; }
|
||||||
.feed-section { grid-column: 1 / -1; margin: 8px 0 2px; font-family: var(--label); font-size: 0.78rem;
|
.feed-section { grid-column: 1 / -1; margin: 8px 0 2px; font-family: var(--label); font-size: 0.78rem;
|
||||||
text-transform: uppercase; letter-spacing: 0.06em; color: var(--muted); }
|
text-transform: uppercase; letter-spacing: 0.06em; color: var(--muted); }
|
||||||
.grid > .feed-section:first-child { margin-top: 0; }
|
.grid > .feed-section:first-child { margin-top: 0; }
|
||||||
|
|||||||
+22
-5
@@ -2186,25 +2186,42 @@ def create_app() -> FastAPI:
|
|||||||
limit: int = Query(10, ge=1, le=50),
|
limit: int = Query(10, ge=1, le=50),
|
||||||
prefs: str | None = Query(None),
|
prefs: str | None = Query(None),
|
||||||
exclude: str = Query("", description="comma-separated article ids the reader has dismissed"),
|
exclude: str = Query("", description="comma-separated article ids the reader has dismissed"),
|
||||||
|
home: str | None = Query(None, max_length=8, description="local-first highlights: 'US' or 'US-NY'"),
|
||||||
) -> BriefResponse:
|
) -> BriefResponse:
|
||||||
# The default highlights are global (date-keyed, no session) → edge-cacheable
|
# The default highlights are global (date-keyed, no session) → edge-cacheable
|
||||||
# so a new visitor's "Gathering the good news…" resolves from their POP, not
|
# so a new visitor's "Gathering the good news…" resolves from their POP, not
|
||||||
# a pull to the residential origin. Personal filters stay private.
|
# a pull to the residential origin. Personal filters (incl. a home) stay private.
|
||||||
shareable = not prefs and not exclude.strip()
|
home_country = home_state = None
|
||||||
|
if home:
|
||||||
|
parts = home.upper().split("-", 1)
|
||||||
|
home_country = parts[0][:2] or None
|
||||||
|
if home_country == "US" and len(parts) > 1:
|
||||||
|
home_state = parts[1][:2] or None
|
||||||
|
shareable = not prefs and not exclude.strip() and not home_country
|
||||||
response.headers["Cache-Control"] = _EDGE_FEED if shareable else _PRIVATE
|
response.headers["Cache-Control"] = _EDGE_FEED if shareable else _PRIVATE
|
||||||
fp = prefs_from_json(prefs)
|
fp = prefs_from_json(prefs)
|
||||||
now = datetime.now(timezone.utc)
|
now = datetime.now(timezone.utc)
|
||||||
excl = {int(x) for x in exclude.split(",") if x.strip().lstrip("-").isdigit()}
|
excl = {int(x) for x in exclude.split(",") if x.strip().lstrip("-").isdigit()}
|
||||||
with get_conn() as conn:
|
with get_conn() as conn:
|
||||||
data = queries.brief(conn, brief_date=date, limit=limit)
|
if home_country:
|
||||||
|
# The reader's home leads the landing: local good news first, blended out
|
||||||
|
# to country/world so it's always a full, sexy set. Over-fetch to survive
|
||||||
|
# dismissal/boundary filtering, then cap to limit.
|
||||||
|
meta = queries.brief(conn, brief_date=date, limit=1)
|
||||||
|
data = {"brief_date": meta["brief_date"], "title": "Close to home", "created_at": meta.get("created_at")}
|
||||||
|
pool = queries.home_brief(conn, home_country, home_state, limit=limit + 12)
|
||||||
|
else:
|
||||||
|
data = queries.brief(conn, brief_date=date, limit=limit)
|
||||||
|
pool = data["items"]
|
||||||
# Drop dismissed (replaced-away) items and anything the reader's
|
# Drop dismissed (replaced-away) items and anything the reader's
|
||||||
# boundaries hide; avoid-terms take precedence over curation.
|
# boundaries hide; avoid-terms take precedence over curation.
|
||||||
items = [a for a in data["items"] if a["id"] not in excl]
|
items = [a for a in pool if a["id"] not in excl]
|
||||||
if not fp.is_empty():
|
if not fp.is_empty():
|
||||||
items = filter_articles(items, fp, now)
|
items = filter_articles(items, fp, now)
|
||||||
|
items = items[:limit] # home mode over-fetches to survive filtering; cap here
|
||||||
# Keep the highlights full: if a boundary or a dismissal removed a
|
# Keep the highlights full: if a boundary or a dismissal removed a
|
||||||
# story, top up with other readable, boundary-respecting good news
|
# story, top up with other readable, boundary-respecting good news
|
||||||
# rather than show fewer.
|
# rather than show fewer. (Home mode's home_brief already blends to world.)
|
||||||
if len(items) < limit:
|
if len(items) < limit:
|
||||||
have = {a["id"] for a in items} | excl
|
have = {a["id"] for a in items} | excl
|
||||||
pool = queries.feed(
|
pool = queries.feed(
|
||||||
|
|||||||
+8
-2
@@ -213,10 +213,16 @@ def tag_articles(conn: sqlite3.Connection, client: LocalModelClient, limit: int
|
|||||||
for r in rows:
|
for r in rows:
|
||||||
try:
|
try:
|
||||||
store_geo(conn, r["id"], classify_geo(client, r))
|
store_geo(conn, r["id"], classify_geo(client, r))
|
||||||
|
# Keep live auth/admin writes healthy while the scheduled cycle runs.
|
||||||
|
# Geo classification calls the LLM per article; if we batch commits, the
|
||||||
|
# first stored article opens a write transaction that can stay open while
|
||||||
|
# the next several LLM calls run. That starves login/session writes long
|
||||||
|
# enough to trip SQLite's busy timeout. Commit each successful article so
|
||||||
|
# the writer lock is held for milliseconds, not minutes.
|
||||||
|
conn.commit()
|
||||||
tagged += 1
|
tagged += 1
|
||||||
except Exception: # noqa: BLE001 — non-fatal, like other cycle steps
|
except Exception: # noqa: BLE001 — non-fatal, like other cycle steps
|
||||||
|
conn.rollback()
|
||||||
errors += 1
|
errors += 1
|
||||||
if (tagged + errors) % 25 == 0:
|
|
||||||
conn.commit()
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
return {"candidates": len(rows), "tagged": tagged, "errors": errors}
|
return {"candidates": len(rows), "tagged": tagged, "errors": errors}
|
||||||
|
|||||||
@@ -259,6 +259,51 @@ def reindex_search(conn: sqlite3.Connection) -> int:
|
|||||||
return conn.execute("SELECT COUNT(*) FROM article_search").fetchone()[0]
|
return conn.execute("SELECT COUNT(*) FROM article_search").fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
|
def home_brief(conn: sqlite3.Connection, home_country: str, home_state: str | None = None,
|
||||||
|
limit: int = 7, window_days: int = 3) -> list[dict]:
|
||||||
|
"""Local-first landing highlights. Leads with high/medium-confidence local good news,
|
||||||
|
then blends out to your country and the world so the set is always full (never the
|
||||||
|
sad thin-local look), and prefers already-summarized stories so the calm read stays
|
||||||
|
rich. Brief-shaped rows (incl. summary) tagged with a section, best-first within tier.
|
||||||
|
"""
|
||||||
|
if home_state:
|
||||||
|
near = ("(g.confidence IN ('high','medium') AND EXISTS (SELECT 1 FROM article_places p "
|
||||||
|
"WHERE p.article_id = a.id AND p.country_code = ? AND p.state_code = ?))")
|
||||||
|
country = "EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)"
|
||||||
|
section_case = f"CASE WHEN {near} THEN 0 WHEN {country} THEN 1 ELSE 2 END"
|
||||||
|
section_params = [home_country, home_state, home_country]
|
||||||
|
else:
|
||||||
|
near = ("(g.confidence IN ('high','medium') AND EXISTS (SELECT 1 FROM article_places p "
|
||||||
|
"WHERE p.article_id = a.id AND p.country_code = ?))")
|
||||||
|
section_case = f"CASE WHEN {near} THEN 0 ELSE 2 END" # no "country" tier without a state
|
||||||
|
section_params = [home_country]
|
||||||
|
rows = conn.execute(
|
||||||
|
f"""
|
||||||
|
SELECT {_ARTICLE_COLUMNS},
|
||||||
|
sm.summary AS summary,
|
||||||
|
{section_case} AS section_rank,
|
||||||
|
(sm.summary IS NOT NULL) AS has_summary
|
||||||
|
FROM articles a
|
||||||
|
JOIN sources src ON src.id = a.source_id
|
||||||
|
JOIN article_scores s ON s.article_id = a.id
|
||||||
|
LEFT JOIN article_geo g ON g.article_id = a.id
|
||||||
|
LEFT JOIN article_summaries sm ON sm.article_id = a.id
|
||||||
|
WHERE a.duplicate_of IS NULL AND src.content_visible = 1 AND s.accepted = 1
|
||||||
|
AND a.discovered_at >= datetime('now', ?)
|
||||||
|
ORDER BY section_rank ASC, has_summary DESC, rank_score DESC,
|
||||||
|
COALESCE(a.published_at, a.discovered_at) DESC
|
||||||
|
LIMIT ?
|
||||||
|
""",
|
||||||
|
section_params + [f"-{window_days} days", limit],
|
||||||
|
).fetchall()
|
||||||
|
out = []
|
||||||
|
for r in rows:
|
||||||
|
d = dict(r)
|
||||||
|
d["__section"] = {0: "near", 1: "country", 2: "world"}.get(d.pop("section_rank", 2), "world")
|
||||||
|
out.append(d)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> dict:
|
def brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> dict:
|
||||||
"""Return a stored daily brief (latest if no date) with its ranked items."""
|
"""Return a stored daily brief (latest if no date) with its ranked items."""
|
||||||
target_date = brief_date or _latest_brief_date(conn)
|
target_date = brief_date or _latest_brief_date(conn)
|
||||||
|
|||||||
@@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""PROTOTYPE substance audit (not production).
|
||||||
|
|
||||||
|
The classifier scores emotional TONE (cortisol/ragebait/constructive) but not
|
||||||
|
SUBSTANCE, so pleasant-but-empty filler (evergreen how-tos, B2B SEO, product
|
||||||
|
listicles, recipes) slips through. Before adding a `not_newsworthy` rejection
|
||||||
|
dimension to the live classifier, measure whether the model can reliably tell
|
||||||
|
genuine news from filler against Codex's rubric, and what the reject rate would be.
|
||||||
|
|
||||||
|
Read-only over a sample; writes a scratch JSON + prints a report. Does NOT change
|
||||||
|
the classifier or reject anything.
|
||||||
|
|
||||||
|
.venv/bin/python scripts/substance_audit.py --limit 250 --base-url http://127.0.0.1:8080/v1
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from collections import Counter
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from goodnews.cli import _default_db
|
||||||
|
from goodnews.db import connect
|
||||||
|
from goodnews.llm import LocalModelClient, parse_classifier_json
|
||||||
|
|
||||||
|
# Codex's rubric. KEEP = real news; the rest are "positive but not news" filler.
|
||||||
|
KINDS = ("news_event", "finding", "announcement", "feature_human_interest",
|
||||||
|
"evergreen_advice", "marketing", "product_listicle", "opinion", "other")
|
||||||
|
FILLER = {"evergreen_advice", "marketing", "product_listicle"}
|
||||||
|
|
||||||
|
SYSTEM = (
|
||||||
|
"You judge whether a story is genuine NEWS or content-mill filler for a calm "
|
||||||
|
"good-news site. GOOD (keep): a specific event or achievement, a recent "
|
||||||
|
"development, a research finding, a credible announcement, or human/community/"
|
||||||
|
"science/environmental uplift tied to something that actually happened. FILLER "
|
||||||
|
"(not news): evergreen how-to/advice, marketing or B2B service explainers, generic "
|
||||||
|
"'why X matters' SEO pieces, product round-ups/listicles, recipes. Judge SUBSTANCE, "
|
||||||
|
"not tone — pleasant and non-negative is NOT the same as newsworthy. When genuinely "
|
||||||
|
"unsure, lean KEEP (don't reject real good news). Reply with ONLY a JSON object."
|
||||||
|
)
|
||||||
|
INSTRUCT = (
|
||||||
|
"Return JSON exactly like:\n"
|
||||||
|
'{"kind": "<news_event|finding|announcement|feature_human_interest|evergreen_advice|'
|
||||||
|
'marketing|product_listicle|opinion|other>", "newsworthy": <true|false>, '
|
||||||
|
'"confidence": "<high|medium|low>", "rationale": "<one short clause>"}'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch(conn, limit):
|
||||||
|
return conn.execute(
|
||||||
|
"""SELECT a.id, a.title, a.description, src.name AS source,
|
||||||
|
sm.summary, sm.what_happened, sm.why_matters
|
||||||
|
FROM articles a
|
||||||
|
JOIN sources src ON src.id = a.source_id
|
||||||
|
JOIN article_scores s ON s.article_id = a.id
|
||||||
|
LEFT JOIN article_summaries sm ON sm.article_id = a.id
|
||||||
|
WHERE s.accepted = 1 AND a.duplicate_of IS NULL
|
||||||
|
ORDER BY a.discovered_at DESC LIMIT ?""", (limit,)).fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
def text(r):
|
||||||
|
parts = [f"SOURCE: {r['source']}", f"TITLE: {r['title']}"]
|
||||||
|
for lbl, k in (("SUMMARY", "summary"), ("WHAT HAPPENED", "what_happened"),
|
||||||
|
("WHY IT MATTERS", "why_matters"), ("BLURB", "description")):
|
||||||
|
if r[k]:
|
||||||
|
parts.append(f"{lbl}: {r[k]}")
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def judge(client, r):
|
||||||
|
data = parse_classifier_json(client.chat_text([
|
||||||
|
{"role": "system", "content": SYSTEM},
|
||||||
|
{"role": "user", "content": text(r) + "\n\n" + INSTRUCT},
|
||||||
|
]))
|
||||||
|
kind = data.get("kind") if data.get("kind") in KINDS else "other"
|
||||||
|
return {"kind": kind,
|
||||||
|
"newsworthy": bool(data.get("newsworthy", True)),
|
||||||
|
"confidence": data.get("confidence") if data.get("confidence") in ("high", "medium", "low") else "low",
|
||||||
|
"rationale": (str(data.get("rationale") or "")[:200]) or None}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--db", default=None)
|
||||||
|
ap.add_argument("--limit", type=int, default=250)
|
||||||
|
ap.add_argument("--out", default="data/substance_audit.json")
|
||||||
|
ap.add_argument("--base-url", default=None)
|
||||||
|
ap.add_argument("--model", default=None)
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
conn = connect(args.db or str(_default_db()))
|
||||||
|
client = LocalModelClient.from_env()
|
||||||
|
if args.base_url:
|
||||||
|
client.base_url = args.base_url.rstrip("/")
|
||||||
|
if args.model:
|
||||||
|
client.model = args.model
|
||||||
|
|
||||||
|
out = Path(args.out)
|
||||||
|
res = json.loads(out.read_text()) if out.exists() else {}
|
||||||
|
rows = fetch(conn, args.limit)
|
||||||
|
by_id = {str(r["id"]): r for r in rows}
|
||||||
|
done = 0
|
||||||
|
for r in rows:
|
||||||
|
rid = str(r["id"])
|
||||||
|
if rid in res:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
res[rid] = judge(client, r)
|
||||||
|
except Exception as exc: # noqa: BLE001 — prototype
|
||||||
|
res[rid] = {"kind": "other", "newsworthy": True, "confidence": "low",
|
||||||
|
"rationale": f"ERR {type(exc).__name__}", "error": True}
|
||||||
|
done += 1
|
||||||
|
if done % 25 == 0:
|
||||||
|
out.write_text(json.dumps(res, indent=1)); print(f" ...{done}")
|
||||||
|
out.write_text(json.dumps(res, indent=1))
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
n = len(res) or 1
|
||||||
|
kinds = Counter(v["kind"] for v in res.values())
|
||||||
|
filler = [rid for rid, v in res.items() if (not v["newsworthy"]) or v["kind"] in FILLER]
|
||||||
|
print(f"\n===== SUBSTANCE AUDIT (n={len(res)}) =====")
|
||||||
|
print("Kind:")
|
||||||
|
for k in KINDS:
|
||||||
|
print(f" {k:<24} {kinds.get(k,0):>4} {100*kinds.get(k,0)/n:.0f}%")
|
||||||
|
print(f"\nWould-reject as filler: {len(filler)} ({100*len(filler)/n:.0f}%)")
|
||||||
|
print("Confidence:", dict(Counter(v["confidence"] for v in res.values())))
|
||||||
|
# by source — which feeds are filler-heavy
|
||||||
|
src = Counter(by_id[rid]["source"] for rid in filler if rid in by_id)
|
||||||
|
print("\nFiller by source (top 12):")
|
||||||
|
for s, c in src.most_common(12):
|
||||||
|
print(f" {c:>3} {s}")
|
||||||
|
print("\n--- sample WOULD-REJECT (eyeball for false positives) ---")
|
||||||
|
for rid in filler[:18]:
|
||||||
|
v = res[rid]; r = by_id.get(rid)
|
||||||
|
if r:
|
||||||
|
print(f" [{r['source'][:16]:16}] {v['kind']:<18} {v['confidence']:<6} | {r['title'][:52]}")
|
||||||
|
print(f" {v['rationale'] or ''}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -81,6 +81,21 @@ def test_country_only_home_gates_near_on_confidence(app_db):
|
|||||||
assert a5["section"] == "world" # low-conf home-country -> world, not vanished
|
assert a5["section"] == "world" # low-conf home-country -> world, not vanished
|
||||||
|
|
||||||
|
|
||||||
|
def test_home_brief_leads_with_local(app_db):
|
||||||
|
# The landing's /api/brief?home=US-NY leads with high-confidence NY good news,
|
||||||
|
# tags items by section, and titles it "Close to home". The low-conf NY story (#5)
|
||||||
|
# is not elevated as local.
|
||||||
|
r = TestClient(app_db).get("/api/brief?home=US-NY&limit=10").json()
|
||||||
|
assert r["title"] == "Close to home"
|
||||||
|
near_ids = {it["id"] for it in r["items"] if it["section"] == "near"}
|
||||||
|
assert near_ids == {1, 2, 3, 4} # the high-conf NY stories
|
||||||
|
# near items all appear before any world item (local leads)
|
||||||
|
secs = [it["section"] for it in r["items"]]
|
||||||
|
if "near" in secs and "world" in secs:
|
||||||
|
assert max(i for i, s in enumerate(secs) if s == "near") < \
|
||||||
|
min(i for i, s in enumerate(secs) if s == "world")
|
||||||
|
|
||||||
|
|
||||||
def test_no_home_is_unchanged_and_unsectioned(app_db):
|
def test_no_home_is_unchanged_and_unsectioned(app_db):
|
||||||
r = TestClient(app_db).get("/api/feed?limit=50").json()
|
r = TestClient(app_db).get("/api/feed?limit=50").json()
|
||||||
assert all(it["section"] is None for it in r["items"])
|
assert all(it["section"] is None for it in r["items"])
|
||||||
|
|||||||
Reference in New Issue
Block a user