"""Read-only query helpers over the goodNews database. Pure stdlib and framework-agnostic: returns plain dicts so the same functions back both the CLI and the JSON API. All article output is metadata + a link to the original source — never stored bodies. """ from __future__ import annotations import sqlite3 # Composite ranking used everywhere a "best first" order is needed. Kept as one # expression so brief, category feeds, and the API all rank identically. RANK_SCORE_SQL = ( "(s.constructive_score + s.agency_score + s.human_benefit_score + src.trust_score " "- s.cortisol_score - s.ragebait_score - s.pr_risk_score)" ) _ARTICLE_COLUMNS = f""" a.id, a.title, a.description, a.canonical_url, a.published_at, a.image_url, src.name AS source_name, s.topic, s.flavor, s.accepted, s.constructive_score, s.cortisol_score, s.ragebait_score, s.agency_score, s.human_benefit_score, s.pr_risk_score, s.reason_code, s.reason_text, s.model_name, {RANK_SCORE_SQL} AS rank_score """ def feed( conn: sqlite3.Connection, topic: str | None = None, flavor: str | None = None, accepted_only: bool = True, limit: int = 30, offset: int = 0, include_topics: list[str] | None = None, include_flavors: list[str] | None = None, mute_topics: list[str] | None = None, mute_flavors: list[str] | None = None, max_cortisol: int | None = None, max_ragebait: int | None = None, ) -> list[dict]: """Return ranked articles with categorical filters applied in SQL. Categorical filters (topic/flavor include & mute, cortisol/ragebait ceilings) must be applied here, not after ranking — otherwise low-ranked-but-matching items (e.g. 'discovery' for a Wonder lane) fall outside any over-fetch window. Word-boundary avoid-terms remain a Python pass on the caller side. """ clauses = ["a.duplicate_of IS NULL"] params: list = [] if accepted_only: clauses.append("s.accepted = 1") if topic: clauses.append("s.topic = ?") params.append(topic.lower()) if flavor: clauses.append("s.flavor = ?") params.append(flavor.lower()) def _in(column: str, values: list[str], negate: bool = False) -> None: vals = [v.lower() for v in values] placeholders = ",".join("?" * len(vals)) op = "NOT IN" if negate else "IN" # COALESCE keeps NULL-category rows from being dropped by NOT IN. clauses.append(f"COALESCE({column}, '') {op} ({placeholders})") params.extend(vals) if include_topics: _in("s.topic", include_topics) if include_flavors: _in("s.flavor", include_flavors) if mute_topics: _in("s.topic", mute_topics, negate=True) if mute_flavors: _in("s.flavor", mute_flavors, negate=True) if max_cortisol is not None: clauses.append("COALESCE(s.cortisol_score, 0) <= ?") params.append(max_cortisol) if max_ragebait is not None: clauses.append("COALESCE(s.ragebait_score, 0) <= ?") params.append(max_ragebait) where = "WHERE " + " AND ".join(clauses) params.extend([limit, offset]) rows = conn.execute( f""" SELECT {_ARTICLE_COLUMNS} FROM articles a JOIN sources src ON src.id = a.source_id JOIN article_scores s ON s.article_id = a.id {where} ORDER BY rank_score DESC, COALESCE(a.published_at, a.discovered_at) DESC LIMIT ? OFFSET ? """, params, ).fetchall() return [dict(row) for row in rows] def brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> dict: """Return a stored daily brief (latest if no date) with its ranked items.""" target_date = brief_date or _latest_brief_date(conn) if not target_date: return {"brief_date": None, "title": None, "items": []} header = conn.execute( "SELECT brief_date, title FROM daily_briefs WHERE brief_date = ?", (target_date,), ).fetchone() if not header: return {"brief_date": target_date, "title": None, "items": []} rows = conn.execute( f""" SELECT bi.rank, bi.selection_reason, {_ARTICLE_COLUMNS} FROM daily_briefs b JOIN daily_brief_items bi ON bi.brief_id = b.id JOIN articles a ON a.id = bi.article_id JOIN sources src ON src.id = a.source_id LEFT JOIN article_scores s ON s.article_id = a.id WHERE b.brief_date = ? ORDER BY bi.rank LIMIT ? """, (target_date, limit), ).fetchall() return { "brief_date": header["brief_date"], "title": header["title"], "items": [dict(row) for row in rows], } def category_counts(conn: sqlite3.Connection, accepted_only: bool = True) -> list[dict]: """Return per topic/flavor article counts for building browse UIs. Joins articles and excludes duplicates so the counts match exactly what the feed endpoint will actually return for each topic/flavor. """ clauses = ["a.duplicate_of IS NULL"] clauses.append("s.accepted = 1" if accepted_only else "s.topic IS NOT NULL") rows = conn.execute( f""" SELECT s.topic, s.flavor, COUNT(*) AS count FROM article_scores s JOIN articles a ON a.id = s.article_id WHERE {" AND ".join(clauses)} GROUP BY s.topic, s.flavor ORDER BY s.topic, s.flavor """ ).fetchall() return [dict(row) for row in rows] def available_dates(conn: sqlite3.Connection, limit: int = 30) -> list[str]: rows = conn.execute( "SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT ?", (limit,), ).fetchall() return [row["brief_date"] for row in rows] def _latest_brief_date(conn: sqlite3.Connection) -> str | None: row = conn.execute( "SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT 1" ).fetchone() return row["brief_date"] if row else None