337dc3f901
Per Codex — make /a/<id> feel like Upbeat Bytes has editorial judgment, not just a summary wrapper. Trust-building, short, not an essay. * article_summaries gains what_happened / why_matters / why_belongs (+ migration). * summarize.explain_article: a separate, fallback-able LLM pass producing three short notes (parsed from a labelled WHAT/MATTERS/BELONGS format). generate_summary now stores them alongside the summary, and tops up older summaries on next view. get_explanation returns them only when all three are present. * API: share_page + /api/summary expose the explanation. * share.py: renders the three-part section (accent rule) when complete; otherwise the single "Why it's here" reason line is the calm fallback. The page polls and swaps in both the summary and the section as they cache. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
375 lines
15 KiB
Python
375 lines
15 KiB
Python
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
|
|
SCHEMA = """
|
|
PRAGMA foreign_keys = ON;
|
|
|
|
CREATE TABLE IF NOT EXISTS sources (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
name TEXT NOT NULL UNIQUE,
|
|
homepage_url TEXT,
|
|
feed_url TEXT NOT NULL UNIQUE,
|
|
source_type TEXT NOT NULL DEFAULT 'rss',
|
|
default_category TEXT,
|
|
trust_score INTEGER NOT NULL DEFAULT 5,
|
|
pr_risk_score INTEGER NOT NULL DEFAULT 3,
|
|
active INTEGER NOT NULL DEFAULT 1,
|
|
status TEXT NOT NULL DEFAULT 'active',
|
|
content_visible INTEGER NOT NULL DEFAULT 1,
|
|
poll_interval_minutes INTEGER NOT NULL DEFAULT 60,
|
|
notes TEXT,
|
|
last_success_at TEXT,
|
|
last_error_at TEXT,
|
|
last_error TEXT,
|
|
consecutive_failures INTEGER NOT NULL DEFAULT 0,
|
|
retry_after_at TEXT,
|
|
review_flag INTEGER NOT NULL DEFAULT 0,
|
|
review_reason TEXT,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS articles (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
|
canonical_url TEXT NOT NULL,
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
author TEXT,
|
|
published_at TEXT,
|
|
discovered_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
image_url TEXT,
|
|
language TEXT,
|
|
raw_guid TEXT,
|
|
url_hash TEXT NOT NULL UNIQUE,
|
|
title_hash TEXT,
|
|
duplicate_of INTEGER REFERENCES articles(id) ON DELETE SET NULL,
|
|
image_checked_at TEXT,
|
|
FOREIGN KEY (source_id) REFERENCES sources(id)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles(published_at);
|
|
CREATE INDEX IF NOT EXISTS idx_articles_source_id ON articles(source_id);
|
|
CREATE INDEX IF NOT EXISTS idx_articles_title_hash ON articles(title_hash);
|
|
|
|
CREATE TABLE IF NOT EXISTS article_scores (
|
|
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
|
constructive_score INTEGER,
|
|
cortisol_score INTEGER,
|
|
ragebait_score INTEGER,
|
|
agency_score INTEGER,
|
|
human_benefit_score INTEGER,
|
|
novelty_score INTEGER,
|
|
pr_risk_score INTEGER,
|
|
accepted INTEGER,
|
|
reason_code TEXT,
|
|
reason_text TEXT,
|
|
topic TEXT,
|
|
flavor TEXT,
|
|
model_name TEXT,
|
|
scored_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS article_tags (
|
|
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
|
tag TEXT NOT NULL,
|
|
PRIMARY KEY (article_id, tag)
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_article_tags_tag ON article_tags(tag);
|
|
|
|
CREATE TABLE IF NOT EXISTS article_embeddings (
|
|
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
|
vector BLOB NOT NULL,
|
|
dim INTEGER NOT NULL,
|
|
model TEXT NOT NULL,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS ingest_runs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_id INTEGER REFERENCES sources(id) ON DELETE SET NULL,
|
|
started_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
finished_at TEXT,
|
|
status TEXT NOT NULL DEFAULT 'running',
|
|
items_seen INTEGER NOT NULL DEFAULT 0,
|
|
items_inserted INTEGER NOT NULL DEFAULT 0,
|
|
items_duplicate INTEGER NOT NULL DEFAULT 0,
|
|
error TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS source_candidates (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
feed_url TEXT NOT NULL UNIQUE,
|
|
homepage_url TEXT,
|
|
name TEXT,
|
|
status TEXT NOT NULL DEFAULT 'suggested',
|
|
preview_json TEXT,
|
|
notes TEXT,
|
|
last_previewed_at TEXT,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS daily_briefs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
brief_date TEXT NOT NULL UNIQUE,
|
|
title TEXT NOT NULL,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
notes TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS daily_brief_items (
|
|
brief_id INTEGER NOT NULL REFERENCES daily_briefs(id) ON DELETE CASCADE,
|
|
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
|
rank INTEGER NOT NULL,
|
|
selection_reason TEXT,
|
|
PRIMARY KEY (brief_id, article_id),
|
|
UNIQUE (brief_id, rank)
|
|
);
|
|
|
|
-- ---- Accounts ----------------------------------------------------------------
|
|
-- Self-hosted, minimal-PII. The host ingestion owns the content tables above;
|
|
-- the API owns these (writes happen via the API, so the DB runs in WAL mode).
|
|
|
|
CREATE TABLE IF NOT EXISTS users (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
email TEXT NOT NULL UNIQUE,
|
|
display_name TEXT,
|
|
avatar_url TEXT,
|
|
is_admin INTEGER NOT NULL DEFAULT 0,
|
|
digest_enabled INTEGER NOT NULL DEFAULT 0,
|
|
digest_unsub_token TEXT,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- One row per sign-in method linked to a user; lets Google + magic-link
|
|
-- (same verified email) resolve to a single account.
|
|
CREATE TABLE IF NOT EXISTS identities (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
provider TEXT NOT NULL, -- 'email' | 'google' | 'apple'
|
|
provider_subject TEXT NOT NULL, -- email address, or the provider's stable user id
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE (provider, provider_subject)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_identities_user ON identities(user_id);
|
|
|
|
-- Single-use, short-lived magic-link tokens (stored hashed).
|
|
CREATE TABLE IF NOT EXISTS login_tokens (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
email TEXT NOT NULL,
|
|
token_hash TEXT NOT NULL UNIQUE,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
expires_at TEXT NOT NULL,
|
|
consumed_at TEXT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_login_tokens_email ON login_tokens(email);
|
|
|
|
-- Active sessions (opaque token stored hashed); validated for cookie or bearer.
|
|
CREATE TABLE IF NOT EXISTS sessions (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
token_hash TEXT NOT NULL UNIQUE,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
last_seen_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
expires_at TEXT NOT NULL,
|
|
user_agent TEXT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS saved_articles (
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
|
saved_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (user_id, article_id)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS user_history (
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
|
event TEXT NOT NULL DEFAULT 'seen', -- 'seen' | 'dismissed'
|
|
at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (user_id, article_id, event)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS user_prefs (
|
|
user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
|
|
prefs_json TEXT NOT NULL DEFAULT '{}',
|
|
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Our OWN short summary of an article (generated on demand, cached forever).
|
|
-- We store only our derived summary text — never the publisher's article body.
|
|
CREATE TABLE IF NOT EXISTS article_summaries (
|
|
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
|
summary TEXT NOT NULL,
|
|
what_happened TEXT,
|
|
why_matters TEXT,
|
|
why_belongs TEXT,
|
|
model TEXT,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Privacy-respecting, first-party analytics. NO IP / user-agent / referrer / raw
|
|
-- URL. visitor_hash is a hash of a random localStorage token (never email/IP).
|
|
-- The UNIQUE key dedups to one row per (kind, article, visitor, day) — that both
|
|
-- caps volume and makes counts mean "distinct visitor-days". Groupings are derived
|
|
-- from article_id at query time, never stored here.
|
|
CREATE TABLE IF NOT EXISTS events (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
kind TEXT NOT NULL,
|
|
article_id INTEGER NOT NULL DEFAULT 0,
|
|
visitor_hash TEXT NOT NULL DEFAULT '',
|
|
day TEXT NOT NULL,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE (kind, article_id, visitor_hash, day)
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_events_day ON events(day);
|
|
CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);
|
|
CREATE INDEX IF NOT EXISTS idx_events_article ON events(article_id);
|
|
|
|
-- User feedback (idea / concern / bug / praise). Anonymous-friendly; optional
|
|
-- contact email only if the person wants a reply. visitor_hash is for rate-limit
|
|
-- only (the same hashed anonymous token used by analytics).
|
|
CREATE TABLE IF NOT EXISTS feedback (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
category TEXT NOT NULL DEFAULT 'other',
|
|
message TEXT NOT NULL,
|
|
contact_email TEXT,
|
|
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
|
|
visitor_hash TEXT NOT NULL DEFAULT '',
|
|
day TEXT NOT NULL,
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
read_at TEXT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_created ON feedback(created_at);
|
|
|
|
CREATE TABLE IF NOT EXISTS feedback_replies (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
feedback_id INTEGER NOT NULL REFERENCES feedback(id) ON DELETE CASCADE,
|
|
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
|
|
message TEXT NOT NULL,
|
|
message_html TEXT,
|
|
sent_to TEXT NOT NULL,
|
|
sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_feedback_replies_fid ON feedback_replies(feedback_id);
|
|
|
|
CREATE TABLE IF NOT EXISTS user_follows (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
kind TEXT NOT NULL, -- 'source' | 'tag'
|
|
value TEXT NOT NULL, -- source id (as text) or tag key
|
|
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE (user_id, kind, value)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS digest_sends (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
|
brief_date TEXT NOT NULL,
|
|
item_count INTEGER NOT NULL DEFAULT 0,
|
|
sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
|
UNIQUE (user_id, brief_date)
|
|
);
|
|
"""
|
|
|
|
|
|
def connect(db_path: Path | str) -> sqlite3.Connection:
|
|
path = Path(db_path)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
conn = sqlite3.connect(path, check_same_thread=False)
|
|
conn.row_factory = sqlite3.Row
|
|
conn.execute("PRAGMA foreign_keys = ON")
|
|
# WAL lets the API write account data while the ingestion cycle writes content
|
|
# concurrently (readers never block the writer). busy_timeout rides out the
|
|
# brief moments the single writer lock is held. Both are no-ops if already set.
|
|
conn.execute("PRAGMA busy_timeout = 5000")
|
|
if str(path) != ":memory:":
|
|
conn.execute("PRAGMA journal_mode = WAL")
|
|
conn.execute("PRAGMA synchronous = NORMAL")
|
|
return conn
|
|
|
|
|
|
def init_db(conn: sqlite3.Connection) -> None:
|
|
conn.executescript(SCHEMA)
|
|
_migrate(conn)
|
|
conn.commit()
|
|
|
|
|
|
def _migrate(conn: sqlite3.Connection) -> None:
|
|
"""Add columns introduced after the initial schema to existing databases.
|
|
|
|
CREATE TABLE IF NOT EXISTS never alters an existing table, so new columns
|
|
need an explicit, idempotent ALTER guarded by the current column set.
|
|
"""
|
|
score_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_scores)")}
|
|
for column in ("topic", "flavor"):
|
|
if column not in score_cols:
|
|
conn.execute(f"ALTER TABLE article_scores ADD COLUMN {column} TEXT")
|
|
|
|
# users.avatar_url (Google pictures) + is_admin (admin dashboard) added later.
|
|
user_tbl = {row["name"] for row in conn.execute("PRAGMA table_info(users)")}
|
|
if user_tbl and "avatar_url" not in user_tbl:
|
|
conn.execute("ALTER TABLE users ADD COLUMN avatar_url TEXT")
|
|
if user_tbl and "is_admin" not in user_tbl:
|
|
conn.execute("ALTER TABLE users ADD COLUMN is_admin INTEGER NOT NULL DEFAULT 0")
|
|
if user_tbl and "digest_enabled" not in user_tbl:
|
|
conn.execute("ALTER TABLE users ADD COLUMN digest_enabled INTEGER NOT NULL DEFAULT 0")
|
|
if user_tbl and "digest_unsub_token" not in user_tbl:
|
|
conn.execute("ALTER TABLE users ADD COLUMN digest_unsub_token TEXT")
|
|
|
|
article_cols = {row["name"] for row in conn.execute("PRAGMA table_info(articles)")}
|
|
if "duplicate_of" not in article_cols:
|
|
conn.execute(
|
|
"ALTER TABLE articles ADD COLUMN duplicate_of INTEGER REFERENCES articles(id)"
|
|
)
|
|
if "image_checked_at" not in article_cols:
|
|
conn.execute("ALTER TABLE articles ADD COLUMN image_checked_at TEXT")
|
|
# Created here (not in SCHEMA) so it runs after the column exists on upgrades.
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_duplicate_of ON articles(duplicate_of)")
|
|
|
|
source_cols = {row["name"] for row in conn.execute("PRAGMA table_info(sources)")}
|
|
health_columns = {
|
|
"last_success_at": "TEXT",
|
|
"last_error_at": "TEXT",
|
|
"last_error": "TEXT",
|
|
"consecutive_failures": "INTEGER NOT NULL DEFAULT 0",
|
|
"review_flag": "INTEGER NOT NULL DEFAULT 0",
|
|
"review_reason": "TEXT",
|
|
}
|
|
for column, decl in health_columns.items():
|
|
if column not in source_cols:
|
|
conn.execute(f"ALTER TABLE sources ADD COLUMN {column} {decl}")
|
|
|
|
# Lifecycle: status (active/paused/retired) + content_visible. `active` is
|
|
# kept as a synced mirror so legacy code (scheduler/CLI) keeps working.
|
|
if "status" not in source_cols:
|
|
conn.execute("ALTER TABLE sources ADD COLUMN status TEXT NOT NULL DEFAULT 'active'")
|
|
conn.execute("UPDATE sources SET status = CASE WHEN active = 1 THEN 'active' ELSE 'paused' END")
|
|
if "content_visible" not in source_cols:
|
|
conn.execute("ALTER TABLE sources ADD COLUMN content_visible INTEGER NOT NULL DEFAULT 1")
|
|
if "retry_after_at" not in source_cols:
|
|
conn.execute("ALTER TABLE sources ADD COLUMN retry_after_at TEXT")
|
|
|
|
# feedback.read_at (admin inbox read/unread) added later.
|
|
fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")}
|
|
if fb_cols and "read_at" not in fb_cols:
|
|
conn.execute("ALTER TABLE feedback ADD COLUMN read_at TEXT")
|
|
|
|
# feedback_replies.message_html (rendered Markdown subset) added later.
|
|
rep_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback_replies)")}
|
|
if rep_cols and "message_html" not in rep_cols:
|
|
conn.execute("ALTER TABLE feedback_replies ADD COLUMN message_html TEXT")
|
|
|
|
# article_summaries: structured "Why it belongs" fields added later.
|
|
sum_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_summaries)")}
|
|
for column in ("what_happened", "why_matters", "why_belongs"):
|
|
if sum_cols and column not in sum_cols:
|
|
conn.execute(f"ALTER TABLE article_summaries ADD COLUMN {column} TEXT")
|