from __future__ import annotations import sqlite3 from datetime import date def build_daily_brief( conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 5, replace: bool = False, window_days: int = 3, ) -> int: target_date = brief_date or date.today().isoformat() existing = conn.execute("SELECT id FROM daily_briefs WHERE brief_date = ?", (target_date,)).fetchone() if existing and not replace: return int(existing["id"]) if existing and replace: conn.execute("DELETE FROM daily_briefs WHERE id = ?", (existing["id"],)) brief_id = conn.execute( "INSERT INTO daily_briefs (brief_date, title) VALUES (?, ?)", (target_date, f"Five Good Things Today - {target_date}"), ).lastrowid rows = _candidate_articles(conn, target_date, window_days) selected = _select_diverse(rows, limit) for index, row in enumerate(selected, start=1): conn.execute( """ INSERT INTO daily_brief_items (brief_id, article_id, rank, selection_reason) VALUES (?, ?, ?, ?) """, ( brief_id, row["id"], index, _selection_reason(row), ), ) conn.commit() return int(brief_id) def show_brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> list[sqlite3.Row]: target_date = brief_date or _latest_brief_date(conn) if not target_date: return [] return conn.execute( """ SELECT b.brief_date, bi.rank, bi.selection_reason, a.title, a.description, a.canonical_url, a.published_at, src.name AS source_name, src.default_category, s.constructive_score, s.cortisol_score, s.ragebait_score, s.agency_score, s.human_benefit_score, s.reason_code, s.reason_text, s.model_name FROM daily_briefs b JOIN daily_brief_items bi ON bi.brief_id = b.id JOIN articles a ON a.id = bi.article_id JOIN sources src ON src.id = a.source_id LEFT JOIN article_scores s ON s.article_id = a.id WHERE b.brief_date = ? ORDER BY bi.rank LIMIT ? """, (target_date, limit), ).fetchall() def _candidate_articles( conn: sqlite3.Connection, target_date: str, window_days: int = 3 ) -> list[sqlite3.Row]: """Brief candidates, sparse-day-proof. Prefers articles dated on target_date, but widens to the preceding `window_days` so the brief still fills on slow news days. Anything already featured in a brief within the last 7 days (other than this same date, which is being rebuilt) is excluded so backfilled stories cannot linger across consecutive days. """ return conn.execute( """ SELECT a.id, a.title, a.description, a.canonical_url, a.published_at, a.discovered_at, src.name AS source_name, src.default_category, src.trust_score, s.constructive_score, s.cortisol_score, s.ragebait_score, s.agency_score, s.human_benefit_score, s.novelty_score, s.pr_risk_score, s.reason_code, s.reason_text, s.model_name, CASE WHEN date(COALESCE(a.published_at, a.discovered_at)) = date(?) THEN 1 ELSE 0 END AS is_today FROM articles a JOIN sources src ON src.id = a.source_id JOIN article_scores s ON s.article_id = a.id WHERE s.accepted = 1 AND date(COALESCE(a.published_at, a.discovered_at)) <= date(?) AND date(COALESCE(a.published_at, a.discovered_at)) > date(?, '-' || ? || ' days') AND a.id NOT IN ( SELECT bi.article_id FROM daily_brief_items bi JOIN daily_briefs b ON b.id = bi.brief_id WHERE b.brief_date <> ? AND b.brief_date <= date(?) AND b.brief_date > date(?, '-7 days') ) ORDER BY is_today DESC, (s.constructive_score + s.agency_score + s.human_benefit_score + src.trust_score - s.cortisol_score - s.ragebait_score - s.pr_risk_score) DESC, COALESCE(a.published_at, a.discovered_at) DESC LIMIT 50 """, (target_date, target_date, target_date, window_days, target_date, target_date, target_date), ).fetchall() def _select_diverse(rows: list[sqlite3.Row], limit: int) -> list[sqlite3.Row]: selected = [] seen_sources = set() seen_categories = set() for row in rows: if len(selected) >= limit: break source = row["source_name"] category = row["default_category"] if source in seen_sources and len(rows) > limit: continue selected.append(row) seen_sources.add(source) seen_categories.add(category) if len(selected) < limit: selected_ids = {row["id"] for row in selected} for row in rows: if len(selected) >= limit: break if row["id"] in selected_ids: continue selected.append(row) selected_ids.add(row["id"]) if len(seen_categories) < 2 and len(rows) > limit: selected_ids = {row["id"] for row in selected} for row in rows: if row["id"] in selected_ids: continue if row["default_category"] not in seen_categories: selected[-1] = row break return selected def _selection_reason(row: sqlite3.Row) -> str: return ( f"{row['reason_code']}; constructive={row['constructive_score']}, " f"agency={row['agency_score']}, human_benefit={row['human_benefit_score']}, " f"cortisol={row['cortisol_score']}, source={row['source_name']}" ) def _latest_brief_date(conn: sqlite3.Connection) -> str | None: row = conn.execute("SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT 1").fetchone() return row["brief_date"] if row else None