Files
upbeatBytes/goodnews/briefs.py
T
thejayman77 068073423f Initial commit: goodNews constructive-news ingestion prototype
Local-first RSS/Atom ingestion pipeline with metadata-only storage,
heuristic + local-LLM scoring, and daily brief builder.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 00:48:26 +00:00

168 lines
5.2 KiB
Python

from __future__ import annotations
import sqlite3
from datetime import date
def build_daily_brief(
conn: sqlite3.Connection,
brief_date: str | None = None,
limit: int = 5,
replace: bool = False,
) -> int:
target_date = brief_date or date.today().isoformat()
existing = conn.execute("SELECT id FROM daily_briefs WHERE brief_date = ?", (target_date,)).fetchone()
if existing and not replace:
return int(existing["id"])
if existing and replace:
conn.execute("DELETE FROM daily_briefs WHERE id = ?", (existing["id"],))
brief_id = conn.execute(
"INSERT INTO daily_briefs (brief_date, title) VALUES (?, ?)",
(target_date, f"Five Good Things Today - {target_date}"),
).lastrowid
rows = _candidate_articles(conn, target_date)
selected = _select_diverse(rows, limit)
for index, row in enumerate(selected, start=1):
conn.execute(
"""
INSERT INTO daily_brief_items (brief_id, article_id, rank, selection_reason)
VALUES (?, ?, ?, ?)
""",
(
brief_id,
row["id"],
index,
_selection_reason(row),
),
)
conn.commit()
return int(brief_id)
def show_brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> list[sqlite3.Row]:
target_date = brief_date or _latest_brief_date(conn)
if not target_date:
return []
return conn.execute(
"""
SELECT
b.brief_date,
bi.rank,
bi.selection_reason,
a.title,
a.description,
a.canonical_url,
a.published_at,
src.name AS source_name,
src.default_category,
s.constructive_score,
s.cortisol_score,
s.ragebait_score,
s.agency_score,
s.human_benefit_score,
s.reason_code,
s.reason_text,
s.model_name
FROM daily_briefs b
JOIN daily_brief_items bi ON bi.brief_id = b.id
JOIN articles a ON a.id = bi.article_id
JOIN sources src ON src.id = a.source_id
LEFT JOIN article_scores s ON s.article_id = a.id
WHERE b.brief_date = ?
ORDER BY bi.rank
LIMIT ?
""",
(target_date, limit),
).fetchall()
def _candidate_articles(conn: sqlite3.Connection, target_date: str) -> list[sqlite3.Row]:
return conn.execute(
"""
SELECT
a.id,
a.title,
a.description,
a.canonical_url,
a.published_at,
a.discovered_at,
src.name AS source_name,
src.default_category,
src.trust_score,
s.constructive_score,
s.cortisol_score,
s.ragebait_score,
s.agency_score,
s.human_benefit_score,
s.novelty_score,
s.pr_risk_score,
s.reason_code,
s.reason_text,
s.model_name
FROM articles a
JOIN sources src ON src.id = a.source_id
JOIN article_scores s ON s.article_id = a.id
WHERE s.accepted = 1
AND date(COALESCE(a.published_at, a.discovered_at)) = date(?)
ORDER BY
(s.constructive_score + s.agency_score + s.human_benefit_score + src.trust_score
- s.cortisol_score - s.ragebait_score - s.pr_risk_score) DESC,
COALESCE(a.published_at, a.discovered_at) DESC
LIMIT 50
""",
(target_date,),
).fetchall()
def _select_diverse(rows: list[sqlite3.Row], limit: int) -> list[sqlite3.Row]:
selected = []
seen_sources = set()
seen_categories = set()
for row in rows:
if len(selected) >= limit:
break
source = row["source_name"]
category = row["default_category"]
if source in seen_sources and len(rows) > limit:
continue
selected.append(row)
seen_sources.add(source)
seen_categories.add(category)
if len(selected) < limit:
selected_ids = {row["id"] for row in selected}
for row in rows:
if len(selected) >= limit:
break
if row["id"] in selected_ids:
continue
selected.append(row)
selected_ids.add(row["id"])
if len(seen_categories) < 2 and len(rows) > limit:
selected_ids = {row["id"] for row in selected}
for row in rows:
if row["id"] in selected_ids:
continue
if row["default_category"] not in seen_categories:
selected[-1] = row
break
return selected
def _selection_reason(row: sqlite3.Row) -> str:
return (
f"{row['reason_code']}; constructive={row['constructive_score']}, "
f"agency={row['agency_score']}, human_benefit={row['human_benefit_score']}, "
f"cortisol={row['cortisol_score']}, source={row['source_name']}"
)
def _latest_brief_date(conn: sqlite3.Connection) -> str | None:
row = conn.execute("SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT 1").fetchone()
return row["brief_date"] if row else None