from __future__ import annotations import sqlite3 from .localtime import local_today from .paywall import is_paywalled, is_paywalled_for_source from .queries import paywalled_source_ids def build_daily_brief( conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 7, replace: bool = False, window_days: int = 3, ) -> int: target_date = brief_date or local_today() # Compose the selection first so we can tell whether anything actually changed. # A calm daily brief never hands the reader a locked door: paywalled-source # candidates are excluded in _candidate_articles (before LIMIT) — no unreadable news. rows = _candidate_articles(conn, target_date, window_days) selected = _select_diverse(rows, limit) selected_ids = [row["id"] for row in selected] existing = conn.execute("SELECT id FROM daily_briefs WHERE brief_date = ?", (target_date,)).fetchone() if existing: existing_ids = [ r["article_id"] for r in conn.execute( "SELECT article_id FROM daily_brief_items WHERE brief_id = ? ORDER BY rank", (existing["id"],), ) ] # Idempotent: if the selection is unchanged, leave the brief (and its # created_at freshness stamp) alone — a 15-minute rebuild with no new # data is a no-op, so a reader's pinned view holds. if existing_ids == selected_ids or not replace: return int(existing["id"]) conn.execute("DELETE FROM daily_briefs WHERE id = ?", (existing["id"],)) brief_id = conn.execute( "INSERT INTO daily_briefs (brief_date, title) VALUES (?, ?)", (target_date, f"Highlights from Today - {target_date}"), ).lastrowid for index, row in enumerate(selected, start=1): conn.execute( """ INSERT INTO daily_brief_items (brief_id, article_id, rank, selection_reason) VALUES (?, ?, ?, ?) """, ( brief_id, row["id"], index, _selection_reason(row), ), ) conn.commit() return int(brief_id) def show_brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> list[sqlite3.Row]: target_date = brief_date or _latest_brief_date(conn) if not target_date: return [] return conn.execute( """ SELECT b.brief_date, bi.rank, bi.selection_reason, a.title, a.description, a.canonical_url, a.published_at, src.name AS source_name, src.default_category, s.constructive_score, s.cortisol_score, s.ragebait_score, s.agency_score, s.human_benefit_score, s.reason_code, s.reason_text, s.model_name FROM daily_briefs b JOIN daily_brief_items bi ON bi.brief_id = b.id JOIN articles a ON a.id = bi.article_id JOIN sources src ON src.id = a.source_id LEFT JOIN article_scores s ON s.article_id = a.id WHERE b.brief_date = ? ORDER BY bi.rank LIMIT ? """, (target_date, limit), ).fetchall() def _candidate_articles( conn: sqlite3.Connection, target_date: str, window_days: int = 3 ) -> list[sqlite3.Row]: """Brief candidates, sparse-day-proof. Prefers articles dated on target_date, but widens to the preceding `window_days` so the brief still fills on slow news days. Anything already featured in a brief within the last 7 days (other than this same date, which is being rebuilt) is excluded so backfilled stories cannot linger across consecutive days. Paywalled sources are excluded here (before LIMIT) so they can't consume candidate slots and leave an otherwise-full brief thin. """ pwx = paywalled_source_ids(conn) pw_clause = f"AND a.source_id NOT IN ({','.join('?' * len(pwx))})" if pwx else "" return conn.execute( f""" SELECT a.id, a.title, a.description, a.canonical_url, a.published_at, a.discovered_at, src.name AS source_name, src.default_category, src.trust_score, src.paywall_override AS paywall_override, s.constructive_score, s.cortisol_score, s.ragebait_score, s.agency_score, s.human_benefit_score, s.novelty_score, s.pr_risk_score, s.reason_code, s.reason_text, s.model_name, s.topic, s.flavor, CASE WHEN date(COALESCE(a.published_at, a.discovered_at)) = date(?) THEN 1 ELSE 0 END AS is_today FROM articles a JOIN sources src ON src.id = a.source_id JOIN article_scores s ON s.article_id = a.id WHERE s.accepted = 1 AND src.content_visible = 1 AND a.duplicate_of IS NULL AND date(COALESCE(a.published_at, a.discovered_at)) <= date(?) AND date(COALESCE(a.published_at, a.discovered_at)) > date(?, '-' || ? || ' days') AND a.id NOT IN ( SELECT bi.article_id FROM daily_brief_items bi JOIN daily_briefs b ON b.id = bi.brief_id WHERE b.brief_date <> ? AND b.brief_date <= date(?) AND b.brief_date > date(?, '-7 days') ) {pw_clause} ORDER BY is_today DESC, (s.constructive_score + s.agency_score + s.human_benefit_score + src.trust_score - s.cortisol_score - s.ragebait_score - s.pr_risk_score) DESC, COALESCE(a.published_at, a.discovered_at) DESC LIMIT 50 """, (target_date, target_date, target_date, window_days, target_date, target_date, target_date, *pwx), ).fetchall() def _select_diverse(rows: list[sqlite3.Row], limit: int) -> list[sqlite3.Row]: """Pick up to `limit` items for the daily brief (rows ranked best-first). The daily five should feel like *good news*, not a research digest, so the emotional mix is guarded — not just topic count: - at most 1 health item, - at most 2 science+health items combined, - at most 2 of any single topic, - distinct sources. Because science/health are capped at 2 combined, at least three of the five are community/culture/animals/environment whenever those exist — so the page leads with breadth, not clustered medical/science breakthroughs. Caps are relaxed (topic first, then source) only as needed to still fill the count on thin days; we never return fewer when candidates exist. """ selected: list[sqlite3.Row] = [] selected_ids: set = set() seen_sources: set = set() topic_count: dict = {} def add(row: sqlite3.Row) -> None: selected.append(row) selected_ids.add(row["id"]) seen_sources.add(row["source_name"]) topic_count[row["topic"]] = topic_count.get(row["topic"], 0) + 1 def emotional_mix_ok(row: sqlite3.Row) -> bool: topic = row["topic"] health = topic_count.get("health", 0) science = topic_count.get("science", 0) if topic == "health" and health >= 1: return False if topic in ("science", "health") and (science + health) >= 2: return False return topic_count.get(topic, 0) < 2 def fill(enforce_mix: bool, enforce_source: bool) -> None: for row in rows: if len(selected) >= limit: return if row["id"] in selected_ids: continue if enforce_source and row["source_name"] in seen_sources: continue if enforce_mix and not emotional_mix_ok(row): continue add(row) fill(enforce_mix=True, enforce_source=True) # balanced mix, distinct sources fill(enforce_mix=False, enforce_source=True) # relax the mix caps to fill fill(enforce_mix=False, enforce_source=False) # relax source too, last resort return selected def _selection_reason(row: sqlite3.Row) -> str: return ( f"{row['reason_code']}; constructive={row['constructive_score']}, " f"agency={row['agency_score']}, human_benefit={row['human_benefit_score']}, " f"cortisol={row['cortisol_score']}, source={row['source_name']}" ) def _latest_brief_date(conn: sqlite3.Connection) -> str | None: row = conn.execute("SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT 1").fetchone() return row["brief_date"] if row else None