68a401eed6
Per the agreed model: the brief is server-authoritative and a client Replace is a soft override that yields when genuinely new data arrives. - build_daily_brief is now idempotent: if the composed selection is unchanged it leaves the brief (and its created_at) alone, so the timer's 15-min rebuilds are no-ops when no new data landed. - /api/brief exposes generated_at (the brief's created_at = a content-change stamp). The client pins its view against generated_at and keeps it across plain refreshes, but drops it and shows the fresh server brief when generated_at advances. Missed stories remain in the mood feeds. Tests: idempotent rebuild (no-op vs content change) — 93 total. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
185 lines
6.0 KiB
Python
185 lines
6.0 KiB
Python
"""Read-only query helpers over the goodNews database.
|
|
|
|
Pure stdlib and framework-agnostic: returns plain dicts so the same functions
|
|
back both the CLI and the JSON API. All article output is metadata + a link to
|
|
the original source — never stored bodies.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
|
|
# Composite ranking used everywhere a "best first" order is needed. Kept as one
|
|
# expression so brief, category feeds, and the API all rank identically.
|
|
RANK_SCORE_SQL = (
|
|
"(s.constructive_score + s.agency_score + s.human_benefit_score + src.trust_score "
|
|
"- s.cortisol_score - s.ragebait_score - s.pr_risk_score)"
|
|
)
|
|
|
|
_ARTICLE_COLUMNS = f"""
|
|
a.id,
|
|
a.title,
|
|
a.description,
|
|
a.canonical_url,
|
|
a.published_at,
|
|
a.image_url,
|
|
src.name AS source_name,
|
|
s.topic,
|
|
s.flavor,
|
|
s.accepted,
|
|
s.constructive_score,
|
|
s.cortisol_score,
|
|
s.ragebait_score,
|
|
s.agency_score,
|
|
s.human_benefit_score,
|
|
s.pr_risk_score,
|
|
s.reason_code,
|
|
s.reason_text,
|
|
s.model_name,
|
|
{RANK_SCORE_SQL} AS rank_score
|
|
"""
|
|
|
|
|
|
def feed(
|
|
conn: sqlite3.Connection,
|
|
topic: str | None = None,
|
|
flavor: str | None = None,
|
|
accepted_only: bool = True,
|
|
limit: int = 30,
|
|
offset: int = 0,
|
|
include_topics: list[str] | None = None,
|
|
include_flavors: list[str] | None = None,
|
|
mute_topics: list[str] | None = None,
|
|
mute_flavors: list[str] | None = None,
|
|
max_cortisol: int | None = None,
|
|
max_ragebait: int | None = None,
|
|
) -> list[dict]:
|
|
"""Return ranked articles with categorical filters applied in SQL.
|
|
|
|
Categorical filters (topic/flavor include & mute, cortisol/ragebait ceilings)
|
|
must be applied here, not after ranking — otherwise low-ranked-but-matching
|
|
items (e.g. 'discovery' for a Wonder lane) fall outside any over-fetch window.
|
|
Word-boundary avoid-terms remain a Python pass on the caller side.
|
|
"""
|
|
clauses = ["a.duplicate_of IS NULL"]
|
|
params: list = []
|
|
if accepted_only:
|
|
clauses.append("s.accepted = 1")
|
|
if topic:
|
|
clauses.append("s.topic = ?")
|
|
params.append(topic.lower())
|
|
if flavor:
|
|
clauses.append("s.flavor = ?")
|
|
params.append(flavor.lower())
|
|
|
|
def _in(column: str, values: list[str], negate: bool = False) -> None:
|
|
vals = [v.lower() for v in values]
|
|
placeholders = ",".join("?" * len(vals))
|
|
op = "NOT IN" if negate else "IN"
|
|
# COALESCE keeps NULL-category rows from being dropped by NOT IN.
|
|
clauses.append(f"COALESCE({column}, '') {op} ({placeholders})")
|
|
params.extend(vals)
|
|
|
|
if include_topics:
|
|
_in("s.topic", include_topics)
|
|
if include_flavors:
|
|
_in("s.flavor", include_flavors)
|
|
if mute_topics:
|
|
_in("s.topic", mute_topics, negate=True)
|
|
if mute_flavors:
|
|
_in("s.flavor", mute_flavors, negate=True)
|
|
if max_cortisol is not None:
|
|
clauses.append("COALESCE(s.cortisol_score, 0) <= ?")
|
|
params.append(max_cortisol)
|
|
if max_ragebait is not None:
|
|
clauses.append("COALESCE(s.ragebait_score, 0) <= ?")
|
|
params.append(max_ragebait)
|
|
|
|
where = "WHERE " + " AND ".join(clauses)
|
|
params.extend([limit, offset])
|
|
|
|
rows = conn.execute(
|
|
f"""
|
|
SELECT {_ARTICLE_COLUMNS}
|
|
FROM articles a
|
|
JOIN sources src ON src.id = a.source_id
|
|
JOIN article_scores s ON s.article_id = a.id
|
|
{where}
|
|
ORDER BY rank_score DESC, COALESCE(a.published_at, a.discovered_at) DESC
|
|
LIMIT ? OFFSET ?
|
|
""",
|
|
params,
|
|
).fetchall()
|
|
return [dict(row) for row in rows]
|
|
|
|
|
|
def brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> dict:
|
|
"""Return a stored daily brief (latest if no date) with its ranked items."""
|
|
target_date = brief_date or _latest_brief_date(conn)
|
|
if not target_date:
|
|
return {"brief_date": None, "title": None, "items": []}
|
|
|
|
header = conn.execute(
|
|
"SELECT brief_date, title, created_at FROM daily_briefs WHERE brief_date = ?",
|
|
(target_date,),
|
|
).fetchone()
|
|
if not header:
|
|
return {"brief_date": target_date, "title": None, "created_at": None, "items": []}
|
|
|
|
rows = conn.execute(
|
|
f"""
|
|
SELECT bi.rank, bi.selection_reason, {_ARTICLE_COLUMNS}
|
|
FROM daily_briefs b
|
|
JOIN daily_brief_items bi ON bi.brief_id = b.id
|
|
JOIN articles a ON a.id = bi.article_id
|
|
JOIN sources src ON src.id = a.source_id
|
|
LEFT JOIN article_scores s ON s.article_id = a.id
|
|
WHERE b.brief_date = ?
|
|
ORDER BY bi.rank
|
|
LIMIT ?
|
|
""",
|
|
(target_date, limit),
|
|
).fetchall()
|
|
return {
|
|
"brief_date": header["brief_date"],
|
|
"title": header["title"],
|
|
"created_at": header["created_at"],
|
|
"items": [dict(row) for row in rows],
|
|
}
|
|
|
|
|
|
def category_counts(conn: sqlite3.Connection, accepted_only: bool = True) -> list[dict]:
|
|
"""Return per topic/flavor article counts for building browse UIs.
|
|
|
|
Joins articles and excludes duplicates so the counts match exactly what the
|
|
feed endpoint will actually return for each topic/flavor.
|
|
"""
|
|
clauses = ["a.duplicate_of IS NULL"]
|
|
clauses.append("s.accepted = 1" if accepted_only else "s.topic IS NOT NULL")
|
|
rows = conn.execute(
|
|
f"""
|
|
SELECT s.topic, s.flavor, COUNT(*) AS count
|
|
FROM article_scores s
|
|
JOIN articles a ON a.id = s.article_id
|
|
WHERE {" AND ".join(clauses)}
|
|
GROUP BY s.topic, s.flavor
|
|
ORDER BY s.topic, s.flavor
|
|
"""
|
|
).fetchall()
|
|
return [dict(row) for row in rows]
|
|
|
|
|
|
def available_dates(conn: sqlite3.Connection, limit: int = 30) -> list[str]:
|
|
rows = conn.execute(
|
|
"SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT ?",
|
|
(limit,),
|
|
).fetchall()
|
|
return [row["brief_date"] for row in rows]
|
|
|
|
|
|
def _latest_brief_date(conn: sqlite3.Connection) -> str | None:
|
|
row = conn.execute(
|
|
"SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT 1"
|
|
).fetchone()
|
|
return row["brief_date"] if row else None
|