Files
upbeatBytes/goodnews/db.py
T
thejayman77 1c05554a28 Geo Stage 1-2: subject-geography model + classifier + pipeline wiring
"Closer to Home" foundation (audit greenlit by Codex). Durable geography, kept
decoupled from volatile scoring.

- Schema: article_geo (breadth/confidence/rationale/geo_version) + article_places
  (0..N ISO-coded places), separate from article_scores so re-runs/audits never
  disturb scoring or acceptance. "local" is never stored — it's relative to the
  reader; the UI computes "Near you" later.
- geo.py: LLM proposes place NAMES, code disposes to ISO codes (country alpha-2,
  US state 2-letter); region words like "Europe" can never become a country.
  'global'/placeless is first-class, not failure. Confidence calibrated so 'high'
  needs an explicit location. Geo is its OWN LLM pass, not merged into the scoring
  prompt (durable metadata, re-runnable, keeps the sensitive prompt untouched).
- store_geo replaces places (geo is re-derivable, unlike scores). tag_articles is
  idempotent by geo_version, only touches accepted non-duplicate articles.
- CLI `geo` command (cycle-locked, --limit/--reclassify) for backfill, plus a
  bounded geo step in the cycle (--geo-limit 60, --no-geo). scripts/geo_audit.py
  is the prototype audit tool.

360 tests green; live smoke tagged real articles correctly (Gaza->PS, London->GB,
placeless science->global). No UI / SEO pages yet — ranking/personalization only.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-19 16:56:49 -04:00

533 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import sqlite3
from pathlib import Path
SCHEMA = """
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
homepage_url TEXT,
feed_url TEXT NOT NULL UNIQUE,
source_type TEXT NOT NULL DEFAULT 'rss',
default_category TEXT,
trust_score INTEGER NOT NULL DEFAULT 5,
pr_risk_score INTEGER NOT NULL DEFAULT 3,
active INTEGER NOT NULL DEFAULT 1,
status TEXT NOT NULL DEFAULT 'active',
content_visible INTEGER NOT NULL DEFAULT 1,
poll_interval_minutes INTEGER NOT NULL DEFAULT 60,
notes TEXT,
last_success_at TEXT,
last_error_at TEXT,
last_error TEXT,
consecutive_failures INTEGER NOT NULL DEFAULT 0,
retry_after_at TEXT,
review_flag INTEGER NOT NULL DEFAULT 0,
review_reason TEXT,
x_handle TEXT, -- the source's own verified X handle, if known
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
canonical_url TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT,
author TEXT,
published_at TEXT,
discovered_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
image_url TEXT,
language TEXT,
raw_guid TEXT,
url_hash TEXT NOT NULL UNIQUE,
title_hash TEXT,
duplicate_of INTEGER REFERENCES articles(id) ON DELETE SET NULL,
image_checked_at TEXT,
FOREIGN KEY (source_id) REFERENCES sources(id)
);
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles(published_at);
CREATE INDEX IF NOT EXISTS idx_articles_source_id ON articles(source_id);
CREATE INDEX IF NOT EXISTS idx_articles_title_hash ON articles(title_hash);
CREATE TABLE IF NOT EXISTS article_scores (
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
constructive_score INTEGER,
cortisol_score INTEGER,
ragebait_score INTEGER,
agency_score INTEGER,
human_benefit_score INTEGER,
novelty_score INTEGER,
pr_risk_score INTEGER,
accepted INTEGER,
reason_code TEXT,
reason_text TEXT,
topic TEXT,
flavor TEXT,
language TEXT,
model_name TEXT,
scored_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS article_tags (
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
tag TEXT NOT NULL,
PRIMARY KEY (article_id, tag)
);
CREATE INDEX IF NOT EXISTS idx_article_tags_tag ON article_tags(tag);
CREATE TABLE IF NOT EXISTS article_embeddings (
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
vector BLOB NOT NULL,
dim INTEGER NOT NULL,
model TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS ingest_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER REFERENCES sources(id) ON DELETE SET NULL,
started_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
finished_at TEXT,
status TEXT NOT NULL DEFAULT 'running',
items_seen INTEGER NOT NULL DEFAULT 0,
items_inserted INTEGER NOT NULL DEFAULT 0,
items_duplicate INTEGER NOT NULL DEFAULT 0,
error TEXT
);
CREATE TABLE IF NOT EXISTS source_candidates (
id INTEGER PRIMARY KEY AUTOINCREMENT,
feed_url TEXT NOT NULL UNIQUE,
homepage_url TEXT,
name TEXT,
status TEXT NOT NULL DEFAULT 'suggested',
preview_json TEXT,
notes TEXT,
last_previewed_at TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS daily_briefs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
brief_date TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
notes TEXT
);
CREATE TABLE IF NOT EXISTS daily_brief_items (
brief_id INTEGER NOT NULL REFERENCES daily_briefs(id) ON DELETE CASCADE,
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
rank INTEGER NOT NULL,
selection_reason TEXT,
PRIMARY KEY (brief_id, article_id),
UNIQUE (brief_id, rank)
);
-- ---- Accounts ----------------------------------------------------------------
-- Self-hosted, minimal-PII. The host ingestion owns the content tables above;
-- the API owns these (writes happen via the API, so the DB runs in WAL mode).
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
email TEXT NOT NULL UNIQUE,
display_name TEXT,
avatar_url TEXT,
is_admin INTEGER NOT NULL DEFAULT 0,
digest_enabled INTEGER NOT NULL DEFAULT 0,
digest_unsub_token TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
-- One row per sign-in method linked to a user; lets Google + magic-link
-- (same verified email) resolve to a single account.
CREATE TABLE IF NOT EXISTS identities (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
provider TEXT NOT NULL, -- 'email' | 'google' | 'apple'
provider_subject TEXT NOT NULL, -- email address, or the provider's stable user id
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (provider, provider_subject)
);
CREATE INDEX IF NOT EXISTS idx_identities_user ON identities(user_id);
-- Single-use, short-lived magic-link tokens (stored hashed).
CREATE TABLE IF NOT EXISTS login_tokens (
id INTEGER PRIMARY KEY AUTOINCREMENT,
email TEXT NOT NULL,
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
expires_at TEXT NOT NULL,
consumed_at TEXT
);
CREATE INDEX IF NOT EXISTS idx_login_tokens_email ON login_tokens(email);
-- Active sessions (opaque token stored hashed); validated for cookie or bearer.
CREATE TABLE IF NOT EXISTS sessions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_seen_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
expires_at TEXT NOT NULL,
user_agent TEXT
);
CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id);
CREATE TABLE IF NOT EXISTS saved_articles (
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
saved_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (user_id, article_id)
);
CREATE TABLE IF NOT EXISTS user_history (
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
event TEXT NOT NULL DEFAULT 'seen', -- 'seen' | 'dismissed'
at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (user_id, article_id, event)
);
CREATE TABLE IF NOT EXISTS user_prefs (
user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
prefs_json TEXT NOT NULL DEFAULT '{}',
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
-- Our OWN short summary of an article (generated on demand, cached forever).
-- We store only our derived summary text — never the publisher's article body.
CREATE TABLE IF NOT EXISTS article_summaries (
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
summary TEXT NOT NULL,
what_happened TEXT,
why_matters TEXT,
why_belongs TEXT,
model TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
-- Where a story is ABOUT (subject geography), kept SEPARATE from article_scores so
-- durable geography isn't coupled to volatile scoring/acceptance. "local" is never
-- stored here — it's relative to the reader; the UI computes "Near you" by comparing
-- these places to the visitor's chosen home. geo_version lets us re-backfill cleanly
-- when the prompt/taxonomy changes. 'global' is a real category, not a failure.
CREATE TABLE IF NOT EXISTS article_geo (
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
breadth TEXT NOT NULL DEFAULT 'unknown', -- locality|regional|national|multinational|global|unknown
confidence TEXT NOT NULL DEFAULT 'low', -- high|medium|low
rationale TEXT,
geo_version TEXT,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
-- 0..N normalized places per article (a story can span regions). Codes are ISO
-- (country = alpha-2, state = US 2-letter / ISO-3166-2 subdivision), normalized in
-- code — never trusting the model's free text.
CREATE TABLE IF NOT EXISTS article_places (
id INTEGER PRIMARY KEY AUTOINCREMENT,
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
country_code TEXT,
state_code TEXT,
locality TEXT,
ord INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_article_places_article ON article_places(article_id);
CREATE INDEX IF NOT EXISTS idx_article_places_country ON article_places(country_code);
CREATE INDEX IF NOT EXISTS idx_article_geo_breadth ON article_geo(breadth);
-- Privacy-respecting, first-party analytics. NO IP / user-agent / referrer / raw
-- URL. visitor_hash is a hash of a random localStorage token (never email/IP).
-- The UNIQUE key dedups to one row per (kind, article, visitor, day) — that both
-- caps volume and makes counts mean "distinct visitor-days". Groupings are derived
-- from article_id at query time, never stored here.
CREATE TABLE IF NOT EXISTS events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
kind TEXT NOT NULL,
article_id INTEGER NOT NULL DEFAULT 0,
visitor_hash TEXT NOT NULL DEFAULT '',
day TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (kind, article_id, visitor_hash, day)
);
CREATE INDEX IF NOT EXISTS idx_events_day ON events(day);
CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);
CREATE INDEX IF NOT EXISTS idx_events_article ON events(article_id);
-- User feedback (idea / concern / bug / praise). Anonymous-friendly; optional
-- contact email only if the person wants a reply. visitor_hash is for rate-limit
-- only (the same hashed anonymous token used by analytics).
CREATE TABLE IF NOT EXISTS feedback (
id INTEGER PRIMARY KEY AUTOINCREMENT,
category TEXT NOT NULL DEFAULT 'other',
message TEXT NOT NULL,
contact_email TEXT,
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
visitor_hash TEXT NOT NULL DEFAULT '',
day TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
read_at TEXT
);
CREATE INDEX IF NOT EXISTS idx_feedback_created ON feedback(created_at);
CREATE TABLE IF NOT EXISTS feedback_replies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
feedback_id INTEGER NOT NULL REFERENCES feedback(id) ON DELETE CASCADE,
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
message TEXT NOT NULL,
message_html TEXT,
sent_to TEXT NOT NULL,
sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_feedback_replies_fid ON feedback_replies(feedback_id);
CREATE TABLE IF NOT EXISTS wordsearch_themes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
theme TEXT NOT NULL,
words_json TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS client_errors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
reason TEXT NOT NULL DEFAULT '',
path TEXT NOT NULL DEFAULT '',
user_agent TEXT NOT NULL DEFAULT '',
app_version TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS word_pool (
word TEXT NOT NULL,
variant TEXT NOT NULL, -- '5' | '6'
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (variant, word)
);
CREATE TABLE IF NOT EXISTS word_pool_removed (
word TEXT NOT NULL,
variant TEXT NOT NULL, -- '5' | '6'
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (variant, word)
);
CREATE TABLE IF NOT EXISTS daily_puzzles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
puzzle_date TEXT NOT NULL,
game TEXT NOT NULL, -- 'word' | 'wordsearch'
variant TEXT NOT NULL DEFAULT '',
payload_json TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (puzzle_date, game, variant)
);
-- Full-text search over the PUBLIC article corpus (title/description/source/tags).
-- Standalone FTS5 (not external-content) since the searchable text spans tables;
-- rebuilt from the accepted, non-duplicate set on each ingest cycle (+ lazily).
CREATE VIRTUAL TABLE IF NOT EXISTS article_search USING fts5(
article_id UNINDEXED, title, body, source_name, tags
);
CREATE TABLE IF NOT EXISTS game_state (
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
game TEXT NOT NULL, -- 'word' | 'wordsearch'
variant TEXT NOT NULL, -- '5'|'6' | 'small'|'med'|'large'
puzzle_date TEXT NOT NULL,
state_json TEXT NOT NULL, -- per-puzzle progress; merged server-side on save
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (user_id, game, variant, puzzle_date)
);
-- Bloom runtime word curation (no deploy needed). The accepted set is computed
-- live as: broad dictionary {allow} {block}. Admin-managed; one row per word.
CREATE TABLE IF NOT EXISTS bloom_word_overrides (
word TEXT PRIMARY KEY, -- lowercase
action TEXT NOT NULL, -- 'allow' | 'block'
reason TEXT,
created_by TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
-- Player "this should count" reports → admin queue (approve→allow / block / dismiss).
CREATE TABLE IF NOT EXISTS bloom_word_reports (
id INTEGER PRIMARY KEY AUTOINCREMENT,
word TEXT NOT NULL, -- lowercase
puzzle_date TEXT,
mode TEXT, -- 'daily' | 'free'
format TEXT, -- 'center' | 'wild'
letters TEXT, -- the wheel's 7 letters (for context)
reason TEXT, -- why it was rejected (e.g. 'not in the word list')
status TEXT NOT NULL DEFAULT 'pending', -- 'pending' | 'approved' | 'blocked' | 'dismissed'
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_bloom_reports_status ON bloom_word_reports(status, created_at);
CREATE TABLE IF NOT EXISTS user_follows (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
kind TEXT NOT NULL, -- 'source' | 'tag'
value TEXT NOT NULL, -- source id (as text) or tag key
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (user_id, kind, value)
);
CREATE TABLE IF NOT EXISTS digest_sends (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
brief_date TEXT NOT NULL,
item_count INTEGER NOT NULL DEFAULT 0,
sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (user_id, brief_date)
);
-- Publishing Desk: a platform-NEUTRAL outbound-share record (X first; Bluesky /
-- Threads / newsletter later reuse this). One row per (article, platform); the
-- queue tops up without ever overwriting saved text/handles. opened != posted —
-- Web Intents can't confirm a post, so the human confirms the terminal state.
CREATE TABLE IF NOT EXISTS outbound_shares (
id INTEGER PRIMARY KEY AUTOINCREMENT,
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
platform TEXT NOT NULL DEFAULT 'x',
status TEXT NOT NULL DEFAULT 'queued', -- queued|drafting|opened|posted|skipped|snoozed
social_score INTEGER, -- LLM "stop-scrolling" interest (0-10)
rationale TEXT, -- why someone would stop scrolling
talking_points TEXT, -- JSON array of factual points
angle TEXT, -- a suggested conversational angle
entities TEXT, -- JSON array of raw named entities (LLM-extracted)
suggested_handles TEXT, -- JSON array of {handle, profile_url, via}
draft_text TEXT, -- autosaved in-progress blurb (the human writes it)
final_text TEXT, -- what was actually posted (teaches voice later)
share_url TEXT, -- the exact /a/{id}?utm... link used
post_url TEXT, -- the resulting tweet URL, if captured
snooze_until TEXT, -- 'not right now' (re-eligible after this)
opened_at TEXT,
posted_at TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (article_id, platform)
);
CREATE INDEX IF NOT EXISTS idx_outbound_shares_status ON outbound_shares(platform, status);
-- Verified handle directory — the LLM only ever proposes NAMES; the @handle comes
-- only from here (or a source's own x_handle). Aliases resolve consistently by each
-- having its own row pointing at the same handle (e.g. "Johns Hopkins University"
-- and "Johns Hopkins").
CREATE TABLE IF NOT EXISTS entity_handles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entity_name TEXT NOT NULL, -- display name as entered
normalized_name TEXT NOT NULL, -- lowercased/stripped match key
platform TEXT NOT NULL DEFAULT 'x',
handle TEXT NOT NULL, -- e.g. @AnthropicAI
profile_url TEXT,
verified_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
UNIQUE (normalized_name, platform)
);
"""
def connect(db_path: Path | str) -> sqlite3.Connection:
path = Path(db_path)
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path, check_same_thread=False)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON")
# WAL lets the API write account data while the ingestion cycle writes content
# concurrently (readers never block the writer). busy_timeout rides out the
# brief moments the single writer lock is held. Both are no-ops if already set.
conn.execute("PRAGMA busy_timeout = 5000")
if str(path) != ":memory:":
conn.execute("PRAGMA journal_mode = WAL")
conn.execute("PRAGMA synchronous = NORMAL")
return conn
def init_db(conn: sqlite3.Connection) -> None:
conn.executescript(SCHEMA)
_migrate(conn)
conn.commit()
def _migrate(conn: sqlite3.Connection) -> None:
"""Add columns introduced after the initial schema to existing databases.
CREATE TABLE IF NOT EXISTS never alters an existing table, so new columns
need an explicit, idempotent ALTER guarded by the current column set.
"""
score_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_scores)")}
for column in ("topic", "flavor", "language"):
if column not in score_cols:
conn.execute(f"ALTER TABLE article_scores ADD COLUMN {column} TEXT")
# users.avatar_url (Google pictures) + is_admin (admin dashboard) added later.
user_tbl = {row["name"] for row in conn.execute("PRAGMA table_info(users)")}
if user_tbl and "avatar_url" not in user_tbl:
conn.execute("ALTER TABLE users ADD COLUMN avatar_url TEXT")
if user_tbl and "is_admin" not in user_tbl:
conn.execute("ALTER TABLE users ADD COLUMN is_admin INTEGER NOT NULL DEFAULT 0")
if user_tbl and "digest_enabled" not in user_tbl:
conn.execute("ALTER TABLE users ADD COLUMN digest_enabled INTEGER NOT NULL DEFAULT 0")
if user_tbl and "digest_unsub_token" not in user_tbl:
conn.execute("ALTER TABLE users ADD COLUMN digest_unsub_token TEXT")
article_cols = {row["name"] for row in conn.execute("PRAGMA table_info(articles)")}
if "duplicate_of" not in article_cols:
conn.execute(
"ALTER TABLE articles ADD COLUMN duplicate_of INTEGER REFERENCES articles(id)"
)
if "image_checked_at" not in article_cols:
conn.execute("ALTER TABLE articles ADD COLUMN image_checked_at TEXT")
# Created here (not in SCHEMA) so it runs after the column exists on upgrades.
conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_duplicate_of ON articles(duplicate_of)")
source_cols = {row["name"] for row in conn.execute("PRAGMA table_info(sources)")}
health_columns = {
"last_success_at": "TEXT",
"last_error_at": "TEXT",
"last_error": "TEXT",
"consecutive_failures": "INTEGER NOT NULL DEFAULT 0",
"review_flag": "INTEGER NOT NULL DEFAULT 0",
"review_reason": "TEXT",
"paywall_override": "TEXT", # NULL = use domain rule · 'free' · 'paywalled'
}
for column, decl in health_columns.items():
if column not in source_cols:
conn.execute(f"ALTER TABLE sources ADD COLUMN {column} {decl}")
# Publishing Desk: the source's own verified X handle (suggested when sharing).
if "x_handle" not in source_cols:
conn.execute("ALTER TABLE sources ADD COLUMN x_handle TEXT")
# Lifecycle: status (active/paused/retired) + content_visible. `active` is
# kept as a synced mirror so legacy code (scheduler/CLI) keeps working.
if "status" not in source_cols:
conn.execute("ALTER TABLE sources ADD COLUMN status TEXT NOT NULL DEFAULT 'active'")
conn.execute("UPDATE sources SET status = CASE WHEN active = 1 THEN 'active' ELSE 'paused' END")
if "content_visible" not in source_cols:
conn.execute("ALTER TABLE sources ADD COLUMN content_visible INTEGER NOT NULL DEFAULT 1")
if "retry_after_at" not in source_cols:
conn.execute("ALTER TABLE sources ADD COLUMN retry_after_at TEXT")
# feedback.read_at (admin inbox read/unread) added later.
fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")}
if fb_cols and "read_at" not in fb_cols:
conn.execute("ALTER TABLE feedback ADD COLUMN read_at TEXT")
# feedback_replies.message_html (rendered Markdown subset) added later.
rep_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback_replies)")}
if rep_cols and "message_html" not in rep_cols:
conn.execute("ALTER TABLE feedback_replies ADD COLUMN message_html TEXT")
# article_summaries: structured "Why it belongs" fields added later.
sum_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_summaries)")}
for column in ("what_happened", "why_matters", "why_belongs"):
if sum_cols and column not in sum_cols:
conn.execute(f"ALTER TABLE article_summaries ADD COLUMN {column} TEXT")