ed814c97b9
- daily_art gains blurb + palette columns (idempotent migration). - art._palette: Pillow median-cut to ~5 hex colors from the cached image (best- effort → [] on any failure). art._blurb: a warm 2-3 sentence "what you're looking at" note grounded in the Met catalogue (title/artist/bio/date/medium/ classification/culture/tags). Prompt leans on context/significance and the title+tags for subject — explicitly NOT asserting literal composition (figure counts/poses) it can't see, since the model can't view the image. Markdown stripped from the output. - pick_daily generates both (client optional → blurb skipped when absent); cycle + art CLI pass an LLM client. /api/art/today exposes blurb + palette. - Backfilled the last 3 days on host (Veteran / Magnolia Vase / Bierstadt). - scripts/art_blurb_palette_backfill.py for in-place backfill (no re-pick). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
672 lines
29 KiB
Python
672 lines
29 KiB
Python
from __future__ import annotations
|
||
|
||
import sqlite3
|
||
from pathlib import Path
|
||
|
||
|
||
SCHEMA = """
|
||
PRAGMA foreign_keys = ON;
|
||
|
||
CREATE TABLE IF NOT EXISTS sources (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
name TEXT NOT NULL UNIQUE,
|
||
homepage_url TEXT,
|
||
feed_url TEXT NOT NULL UNIQUE,
|
||
source_type TEXT NOT NULL DEFAULT 'rss',
|
||
default_category TEXT,
|
||
trust_score INTEGER NOT NULL DEFAULT 5,
|
||
pr_risk_score INTEGER NOT NULL DEFAULT 3,
|
||
active INTEGER NOT NULL DEFAULT 1,
|
||
status TEXT NOT NULL DEFAULT 'active',
|
||
content_visible INTEGER NOT NULL DEFAULT 1,
|
||
poll_interval_minutes INTEGER NOT NULL DEFAULT 60,
|
||
notes TEXT,
|
||
last_success_at TEXT,
|
||
last_error_at TEXT,
|
||
last_error TEXT,
|
||
consecutive_failures INTEGER NOT NULL DEFAULT 0,
|
||
retry_after_at TEXT,
|
||
review_flag INTEGER NOT NULL DEFAULT 0,
|
||
review_reason TEXT,
|
||
x_handle TEXT, -- the source's own verified X handle, if known
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS articles (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
|
||
canonical_url TEXT NOT NULL,
|
||
title TEXT NOT NULL,
|
||
description TEXT,
|
||
author TEXT,
|
||
published_at TEXT,
|
||
discovered_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
image_url TEXT,
|
||
language TEXT,
|
||
raw_guid TEXT,
|
||
url_hash TEXT NOT NULL UNIQUE,
|
||
title_hash TEXT,
|
||
duplicate_of INTEGER REFERENCES articles(id) ON DELETE SET NULL,
|
||
image_checked_at TEXT,
|
||
source_words INTEGER, -- full-article word count (metadata only; never the body)
|
||
read_checked_at TEXT, -- when we last tried to count words (retry guard)
|
||
FOREIGN KEY (source_id) REFERENCES sources(id)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles(published_at);
|
||
CREATE INDEX IF NOT EXISTS idx_articles_source_id ON articles(source_id);
|
||
CREATE INDEX IF NOT EXISTS idx_articles_title_hash ON articles(title_hash);
|
||
|
||
CREATE TABLE IF NOT EXISTS article_scores (
|
||
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
||
constructive_score INTEGER,
|
||
cortisol_score INTEGER,
|
||
ragebait_score INTEGER,
|
||
agency_score INTEGER,
|
||
human_benefit_score INTEGER,
|
||
novelty_score INTEGER,
|
||
pr_risk_score INTEGER,
|
||
accepted INTEGER,
|
||
reason_code TEXT,
|
||
reason_text TEXT,
|
||
topic TEXT,
|
||
flavor TEXT,
|
||
language TEXT,
|
||
model_name TEXT,
|
||
scored_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS article_tags (
|
||
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
||
tag TEXT NOT NULL,
|
||
PRIMARY KEY (article_id, tag)
|
||
);
|
||
|
||
CREATE INDEX IF NOT EXISTS idx_article_tags_tag ON article_tags(tag);
|
||
|
||
CREATE TABLE IF NOT EXISTS article_embeddings (
|
||
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
||
vector BLOB NOT NULL,
|
||
dim INTEGER NOT NULL,
|
||
model TEXT NOT NULL,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS ingest_runs (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
source_id INTEGER REFERENCES sources(id) ON DELETE SET NULL,
|
||
started_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
finished_at TEXT,
|
||
status TEXT NOT NULL DEFAULT 'running',
|
||
items_seen INTEGER NOT NULL DEFAULT 0,
|
||
items_inserted INTEGER NOT NULL DEFAULT 0,
|
||
items_duplicate INTEGER NOT NULL DEFAULT 0,
|
||
error TEXT
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS source_candidates (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
feed_url TEXT NOT NULL UNIQUE,
|
||
homepage_url TEXT,
|
||
name TEXT,
|
||
status TEXT NOT NULL DEFAULT 'suggested',
|
||
preview_json TEXT,
|
||
notes TEXT,
|
||
last_previewed_at TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS daily_briefs (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
brief_date TEXT NOT NULL UNIQUE,
|
||
title TEXT NOT NULL,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
notes TEXT
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS daily_brief_items (
|
||
brief_id INTEGER NOT NULL REFERENCES daily_briefs(id) ON DELETE CASCADE,
|
||
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
||
rank INTEGER NOT NULL,
|
||
selection_reason TEXT,
|
||
PRIMARY KEY (brief_id, article_id),
|
||
UNIQUE (brief_id, rank)
|
||
);
|
||
|
||
-- ---- Accounts ----------------------------------------------------------------
|
||
-- Self-hosted, minimal-PII. The host ingestion owns the content tables above;
|
||
-- the API owns these (writes happen via the API, so the DB runs in WAL mode).
|
||
|
||
CREATE TABLE IF NOT EXISTS users (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
email TEXT NOT NULL UNIQUE,
|
||
display_name TEXT,
|
||
avatar_url TEXT,
|
||
is_admin INTEGER NOT NULL DEFAULT 0,
|
||
digest_enabled INTEGER NOT NULL DEFAULT 0,
|
||
digest_unsub_token TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- One row per sign-in method linked to a user; lets Google + magic-link
|
||
-- (same verified email) resolve to a single account.
|
||
CREATE TABLE IF NOT EXISTS identities (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
provider TEXT NOT NULL, -- 'email' | 'google' | 'apple'
|
||
provider_subject TEXT NOT NULL, -- email address, or the provider's stable user id
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (provider, provider_subject)
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_identities_user ON identities(user_id);
|
||
|
||
-- Single-use, short-lived magic-link tokens (stored hashed).
|
||
CREATE TABLE IF NOT EXISTS login_tokens (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
email TEXT NOT NULL,
|
||
token_hash TEXT NOT NULL UNIQUE,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
expires_at TEXT NOT NULL,
|
||
consumed_at TEXT
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_login_tokens_email ON login_tokens(email);
|
||
|
||
-- Active sessions (opaque token stored hashed); validated for cookie or bearer.
|
||
CREATE TABLE IF NOT EXISTS sessions (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
token_hash TEXT NOT NULL UNIQUE,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
last_seen_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
expires_at TEXT NOT NULL,
|
||
user_agent TEXT
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id);
|
||
|
||
CREATE TABLE IF NOT EXISTS saved_articles (
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
||
saved_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
PRIMARY KEY (user_id, article_id)
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS user_history (
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
||
event TEXT NOT NULL DEFAULT 'seen', -- 'seen' | 'dismissed'
|
||
at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
PRIMARY KEY (user_id, article_id, event)
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS user_prefs (
|
||
user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
|
||
prefs_json TEXT NOT NULL DEFAULT '{}',
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- Our OWN short summary of an article (generated on demand, cached forever).
|
||
-- We store only our derived summary text — never the publisher's article body.
|
||
CREATE TABLE IF NOT EXISTS article_summaries (
|
||
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
||
summary TEXT NOT NULL,
|
||
what_happened TEXT,
|
||
why_matters TEXT,
|
||
why_belongs TEXT,
|
||
model TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- Where a story is ABOUT (subject geography), kept SEPARATE from article_scores so
|
||
-- durable geography isn't coupled to volatile scoring/acceptance. "local" is never
|
||
-- stored here — it's relative to the reader; the UI computes "Near you" by comparing
|
||
-- these places to the visitor's chosen home. geo_version lets us re-backfill cleanly
|
||
-- when the prompt/taxonomy changes. 'global' is a real category, not a failure.
|
||
CREATE TABLE IF NOT EXISTS article_geo (
|
||
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
|
||
breadth TEXT NOT NULL DEFAULT 'unknown', -- locality|regional|national|multinational|global|unknown
|
||
confidence TEXT NOT NULL DEFAULT 'low', -- high|medium|low
|
||
rationale TEXT,
|
||
geo_version TEXT,
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
-- 0..N normalized places per article (a story can span regions). Codes are ISO
|
||
-- (country = alpha-2, state = US 2-letter / ISO-3166-2 subdivision), normalized in
|
||
-- code — never trusting the model's free text.
|
||
CREATE TABLE IF NOT EXISTS article_places (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
||
country_code TEXT,
|
||
state_code TEXT,
|
||
locality TEXT,
|
||
ord INTEGER NOT NULL DEFAULT 0
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_article_places_article ON article_places(article_id);
|
||
CREATE INDEX IF NOT EXISTS idx_article_places_country ON article_places(country_code);
|
||
CREATE INDEX IF NOT EXISTS idx_article_geo_breadth ON article_geo(breadth);
|
||
|
||
-- Daily Art (the /art room). art_pool = a curated set of public-domain, highlighted
|
||
-- museum object IDs (so the daily pick never hits a potsherd). daily_art = one cached
|
||
-- piece per day (metadata + a locally-cached image), so the homepage never waits on or
|
||
-- hotlinks the museum. shown_at lets us avoid repeating a piece too soon.
|
||
CREATE TABLE IF NOT EXISTS art_pool (
|
||
object_id INTEGER NOT NULL,
|
||
source TEXT NOT NULL DEFAULT 'met',
|
||
shown_at TEXT,
|
||
blocked INTEGER NOT NULL DEFAULT 0, -- manual lever: skip an odd/unsuitable piece
|
||
added_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
PRIMARY KEY (source, object_id)
|
||
);
|
||
CREATE TABLE IF NOT EXISTS daily_art (
|
||
art_date TEXT PRIMARY KEY,
|
||
source TEXT NOT NULL DEFAULT 'met',
|
||
object_id INTEGER NOT NULL,
|
||
title TEXT,
|
||
artist TEXT,
|
||
date_text TEXT,
|
||
medium TEXT,
|
||
department TEXT,
|
||
credit TEXT, -- museum credit line
|
||
source_url TEXT, -- canonical museum object page
|
||
image_file TEXT, -- our cached (web-large) image
|
||
image_url_full TEXT, -- source full-res URL, for a later richer /art view
|
||
is_public_domain INTEGER, -- license marker (CC0/public domain), stored for citizenship
|
||
blurb TEXT, -- LLM "museum guide" note: what you're looking at (cached)
|
||
palette TEXT, -- JSON array of hex colors extracted from the image
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- "Small joys" daily features. On This Day: a good/neutral thing that happened on
|
||
-- today's calendar date, harvested + tone-filtered into a pool, then one picked per day.
|
||
CREATE TABLE IF NOT EXISTS onthisday_pool (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
source TEXT NOT NULL DEFAULT 'wikimedia', -- multi-source ready (wikimedia | admin | ...)
|
||
md TEXT NOT NULL, -- 'MM-DD'
|
||
year INTEGER,
|
||
ckey TEXT NOT NULL UNIQUE, -- dedup hash so re-harvest never duplicates
|
||
text TEXT NOT NULL,
|
||
summary TEXT,
|
||
image_url TEXT,
|
||
page_url TEXT,
|
||
shown_at TEXT, -- last date this was the pick (no-soon-repeat)
|
||
blocked INTEGER NOT NULL DEFAULT 0, -- admin lever: never pick this
|
||
featured INTEGER NOT NULL DEFAULT 0, -- admin lever: prefer this for its date
|
||
added_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
CREATE TABLE IF NOT EXISTS daily_onthisday (
|
||
feature_date TEXT PRIMARY KEY, -- 'YYYY-MM-DD'
|
||
pool_id INTEGER NOT NULL,
|
||
source TEXT NOT NULL DEFAULT 'wikimedia',
|
||
md TEXT NOT NULL,
|
||
year INTEGER,
|
||
text TEXT,
|
||
summary TEXT,
|
||
image_url TEXT,
|
||
page_url TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- Quote of the Day: curated public-domain quotes; one picked per day. `meaning` is an
|
||
-- AI explainer of the (real) quote, filled lazily the first time it's shown.
|
||
CREATE TABLE IF NOT EXISTS quote_pool (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
source TEXT NOT NULL DEFAULT 'curated',
|
||
ckey TEXT NOT NULL UNIQUE,
|
||
text TEXT NOT NULL,
|
||
author TEXT,
|
||
work TEXT,
|
||
year TEXT,
|
||
meaning TEXT,
|
||
shown_at TEXT,
|
||
blocked INTEGER NOT NULL DEFAULT 0,
|
||
featured INTEGER NOT NULL DEFAULT 0,
|
||
added_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
CREATE TABLE IF NOT EXISTS daily_quote (
|
||
feature_date TEXT PRIMARY KEY,
|
||
pool_id INTEGER NOT NULL,
|
||
source TEXT NOT NULL DEFAULT 'curated',
|
||
text TEXT, author TEXT, work TEXT, year TEXT, meaning TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- Word of the Day: LLM-proposed positive words, validated/enriched against a real
|
||
-- dictionary (definition, IPA, examples, cached audio clip); one picked per day.
|
||
CREATE TABLE IF NOT EXISTS wotd_pool (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
source TEXT NOT NULL DEFAULT 'llm',
|
||
word TEXT NOT NULL UNIQUE,
|
||
part_of_speech TEXT,
|
||
phonetic TEXT, -- IPA
|
||
audio_file TEXT, -- our cached pronunciation clip (or null → browser TTS)
|
||
audio_url TEXT, -- source clip URL
|
||
definition TEXT NOT NULL, -- raw dictionary gloss (anchor / ground truth)
|
||
examples TEXT, -- JSON array of raw dictionary example sentences (anchor)
|
||
gloss TEXT, -- LLM plain-language rewrite of the definition (for display)
|
||
usage TEXT, -- JSON array of LLM everyday example sentences (for display)
|
||
shown_at TEXT,
|
||
blocked INTEGER NOT NULL DEFAULT 0,
|
||
featured INTEGER NOT NULL DEFAULT 0,
|
||
added_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
CREATE TABLE IF NOT EXISTS daily_wotd (
|
||
feature_date TEXT PRIMARY KEY,
|
||
pool_id INTEGER NOT NULL,
|
||
word TEXT, part_of_speech TEXT, phonetic TEXT, audio_file TEXT,
|
||
definition TEXT, examples TEXT, gloss TEXT, usage TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- Privacy-respecting, first-party analytics. NO IP / user-agent / referrer / raw
|
||
-- URL. visitor_hash is a hash of a random localStorage token (never email/IP).
|
||
-- The UNIQUE key dedups to one row per (kind, article, visitor, day) — that both
|
||
-- caps volume and makes counts mean "distinct visitor-days". Groupings are derived
|
||
-- from article_id at query time, never stored here.
|
||
CREATE TABLE IF NOT EXISTS events (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
kind TEXT NOT NULL,
|
||
article_id INTEGER NOT NULL DEFAULT 0,
|
||
visitor_hash TEXT NOT NULL DEFAULT '',
|
||
day TEXT NOT NULL,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (kind, article_id, visitor_hash, day)
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_events_day ON events(day);
|
||
CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);
|
||
CREATE INDEX IF NOT EXISTS idx_events_article ON events(article_id);
|
||
|
||
-- User feedback (idea / concern / bug / praise). Anonymous-friendly; optional
|
||
-- contact email only if the person wants a reply. visitor_hash is for rate-limit
|
||
-- only (the same hashed anonymous token used by analytics).
|
||
CREATE TABLE IF NOT EXISTS feedback (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
category TEXT NOT NULL DEFAULT 'other',
|
||
message TEXT NOT NULL,
|
||
contact_email TEXT,
|
||
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
|
||
visitor_hash TEXT NOT NULL DEFAULT '',
|
||
day TEXT NOT NULL,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
read_at TEXT
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_feedback_created ON feedback(created_at);
|
||
|
||
CREATE TABLE IF NOT EXISTS feedback_replies (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
feedback_id INTEGER NOT NULL REFERENCES feedback(id) ON DELETE CASCADE,
|
||
user_id INTEGER REFERENCES users(id) ON DELETE SET NULL,
|
||
message TEXT NOT NULL,
|
||
message_html TEXT,
|
||
sent_to TEXT NOT NULL,
|
||
sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_feedback_replies_fid ON feedback_replies(feedback_id);
|
||
|
||
CREATE TABLE IF NOT EXISTS wordsearch_themes (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
theme TEXT NOT NULL,
|
||
words_json TEXT NOT NULL,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS client_errors (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
reason TEXT NOT NULL DEFAULT '',
|
||
path TEXT NOT NULL DEFAULT '',
|
||
user_agent TEXT NOT NULL DEFAULT '',
|
||
app_version TEXT NOT NULL DEFAULT '',
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS word_pool (
|
||
word TEXT NOT NULL,
|
||
variant TEXT NOT NULL, -- '5' | '6'
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
PRIMARY KEY (variant, word)
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS word_pool_removed (
|
||
word TEXT NOT NULL,
|
||
variant TEXT NOT NULL, -- '5' | '6'
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
PRIMARY KEY (variant, word)
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS daily_puzzles (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
puzzle_date TEXT NOT NULL,
|
||
game TEXT NOT NULL, -- 'word' | 'wordsearch'
|
||
variant TEXT NOT NULL DEFAULT '',
|
||
payload_json TEXT NOT NULL,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (puzzle_date, game, variant)
|
||
);
|
||
|
||
-- Full-text search over the PUBLIC article corpus (title/description/source/tags).
|
||
-- Standalone FTS5 (not external-content) since the searchable text spans tables;
|
||
-- rebuilt from the accepted, non-duplicate set on each ingest cycle (+ lazily).
|
||
CREATE VIRTUAL TABLE IF NOT EXISTS article_search USING fts5(
|
||
article_id UNINDEXED, title, body, source_name, tags
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS game_state (
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
game TEXT NOT NULL, -- 'word' | 'wordsearch'
|
||
variant TEXT NOT NULL, -- '5'|'6' | 'small'|'med'|'large'
|
||
puzzle_date TEXT NOT NULL,
|
||
state_json TEXT NOT NULL, -- per-puzzle progress; merged server-side on save
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
PRIMARY KEY (user_id, game, variant, puzzle_date)
|
||
);
|
||
|
||
-- Bloom runtime word curation (no deploy needed). The accepted set is computed
|
||
-- live as: broad dictionary ∪ {allow} − {block}. Admin-managed; one row per word.
|
||
CREATE TABLE IF NOT EXISTS bloom_word_overrides (
|
||
word TEXT PRIMARY KEY, -- lowercase
|
||
action TEXT NOT NULL, -- 'allow' | 'block'
|
||
reason TEXT,
|
||
created_by TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
|
||
-- Player "this should count" reports → admin queue (approve→allow / block / dismiss).
|
||
CREATE TABLE IF NOT EXISTS bloom_word_reports (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
word TEXT NOT NULL, -- lowercase
|
||
puzzle_date TEXT,
|
||
mode TEXT, -- 'daily' | 'free'
|
||
format TEXT, -- 'center' | 'wild'
|
||
letters TEXT, -- the wheel's 7 letters (for context)
|
||
reason TEXT, -- why it was rejected (e.g. 'not in the word list')
|
||
status TEXT NOT NULL DEFAULT 'pending', -- 'pending' | 'approved' | 'blocked' | 'dismissed'
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_bloom_reports_status ON bloom_word_reports(status, created_at);
|
||
|
||
CREATE TABLE IF NOT EXISTS user_follows (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
kind TEXT NOT NULL, -- 'source' | 'tag'
|
||
value TEXT NOT NULL, -- source id (as text) or tag key
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (user_id, kind, value)
|
||
);
|
||
|
||
CREATE TABLE IF NOT EXISTS digest_sends (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
|
||
brief_date TEXT NOT NULL,
|
||
item_count INTEGER NOT NULL DEFAULT 0,
|
||
sent_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (user_id, brief_date)
|
||
);
|
||
|
||
-- Publishing Desk: a platform-NEUTRAL outbound-share record (X first; Bluesky /
|
||
-- Threads / newsletter later reuse this). One row per (article, platform); the
|
||
-- queue tops up without ever overwriting saved text/handles. opened != posted —
|
||
-- Web Intents can't confirm a post, so the human confirms the terminal state.
|
||
CREATE TABLE IF NOT EXISTS outbound_shares (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
|
||
platform TEXT NOT NULL DEFAULT 'x',
|
||
status TEXT NOT NULL DEFAULT 'queued', -- queued|drafting|opened|posted|skipped|snoozed
|
||
social_score INTEGER, -- LLM "stop-scrolling" interest (0-10)
|
||
rationale TEXT, -- why someone would stop scrolling
|
||
talking_points TEXT, -- JSON array of factual points
|
||
angle TEXT, -- a suggested conversational angle
|
||
entities TEXT, -- JSON array of raw named entities (LLM-extracted)
|
||
suggested_handles TEXT, -- JSON array of {handle, profile_url, via}
|
||
draft_text TEXT, -- autosaved in-progress blurb (the human writes it)
|
||
final_text TEXT, -- what was actually posted (teaches voice later)
|
||
share_url TEXT, -- the exact /a/{id}?utm... link used
|
||
post_url TEXT, -- the resulting tweet URL, if captured
|
||
snooze_until TEXT, -- 'not right now' (re-eligible after this)
|
||
opened_at TEXT,
|
||
posted_at TEXT,
|
||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (article_id, platform)
|
||
);
|
||
CREATE INDEX IF NOT EXISTS idx_outbound_shares_status ON outbound_shares(platform, status);
|
||
|
||
-- Verified handle directory — the LLM only ever proposes NAMES; the @handle comes
|
||
-- only from here (or a source's own x_handle). Aliases resolve consistently by each
|
||
-- having its own row pointing at the same handle (e.g. "Johns Hopkins University"
|
||
-- and "Johns Hopkins").
|
||
CREATE TABLE IF NOT EXISTS entity_handles (
|
||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||
entity_name TEXT NOT NULL, -- display name as entered
|
||
normalized_name TEXT NOT NULL, -- lowercased/stripped match key
|
||
platform TEXT NOT NULL DEFAULT 'x',
|
||
handle TEXT NOT NULL, -- e.g. @AnthropicAI
|
||
profile_url TEXT,
|
||
verified_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
UNIQUE (normalized_name, platform)
|
||
);
|
||
"""
|
||
|
||
|
||
def connect(db_path: Path | str) -> sqlite3.Connection:
|
||
path = Path(db_path)
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
conn = sqlite3.connect(path, check_same_thread=False)
|
||
conn.row_factory = sqlite3.Row
|
||
conn.execute("PRAGMA foreign_keys = ON")
|
||
# WAL lets the API write account data while the ingestion cycle writes content
|
||
# concurrently (readers never block the writer). busy_timeout rides out the
|
||
# brief moments the single writer lock is held. Both are no-ops if already set.
|
||
conn.execute("PRAGMA busy_timeout = 5000")
|
||
if str(path) != ":memory:":
|
||
conn.execute("PRAGMA journal_mode = WAL")
|
||
conn.execute("PRAGMA synchronous = NORMAL")
|
||
return conn
|
||
|
||
|
||
def init_db(conn: sqlite3.Connection) -> None:
|
||
conn.executescript(SCHEMA)
|
||
_migrate(conn)
|
||
conn.commit()
|
||
|
||
|
||
def _migrate(conn: sqlite3.Connection) -> None:
|
||
"""Add columns introduced after the initial schema to existing databases.
|
||
|
||
CREATE TABLE IF NOT EXISTS never alters an existing table, so new columns
|
||
need an explicit, idempotent ALTER guarded by the current column set.
|
||
"""
|
||
score_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_scores)")}
|
||
for column in ("topic", "flavor", "language"):
|
||
if column not in score_cols:
|
||
conn.execute(f"ALTER TABLE article_scores ADD COLUMN {column} TEXT")
|
||
|
||
# users.avatar_url (Google pictures) + is_admin (admin dashboard) added later.
|
||
user_tbl = {row["name"] for row in conn.execute("PRAGMA table_info(users)")}
|
||
if user_tbl and "avatar_url" not in user_tbl:
|
||
conn.execute("ALTER TABLE users ADD COLUMN avatar_url TEXT")
|
||
if user_tbl and "is_admin" not in user_tbl:
|
||
conn.execute("ALTER TABLE users ADD COLUMN is_admin INTEGER NOT NULL DEFAULT 0")
|
||
if user_tbl and "digest_enabled" not in user_tbl:
|
||
conn.execute("ALTER TABLE users ADD COLUMN digest_enabled INTEGER NOT NULL DEFAULT 0")
|
||
if user_tbl and "digest_unsub_token" not in user_tbl:
|
||
conn.execute("ALTER TABLE users ADD COLUMN digest_unsub_token TEXT")
|
||
|
||
article_cols = {row["name"] for row in conn.execute("PRAGMA table_info(articles)")}
|
||
if "duplicate_of" not in article_cols:
|
||
conn.execute(
|
||
"ALTER TABLE articles ADD COLUMN duplicate_of INTEGER REFERENCES articles(id)"
|
||
)
|
||
if "image_checked_at" not in article_cols:
|
||
conn.execute("ALTER TABLE articles ADD COLUMN image_checked_at TEXT")
|
||
if "source_words" not in article_cols: # full-article read-time (count only, no body)
|
||
conn.execute("ALTER TABLE articles ADD COLUMN source_words INTEGER")
|
||
if "read_checked_at" not in article_cols:
|
||
conn.execute("ALTER TABLE articles ADD COLUMN read_checked_at TEXT")
|
||
# Created here (not in SCHEMA) so it runs after the column exists on upgrades.
|
||
conn.execute("CREATE INDEX IF NOT EXISTS idx_articles_duplicate_of ON articles(duplicate_of)")
|
||
|
||
source_cols = {row["name"] for row in conn.execute("PRAGMA table_info(sources)")}
|
||
health_columns = {
|
||
"last_success_at": "TEXT",
|
||
"last_error_at": "TEXT",
|
||
"last_error": "TEXT",
|
||
"consecutive_failures": "INTEGER NOT NULL DEFAULT 0",
|
||
"review_flag": "INTEGER NOT NULL DEFAULT 0",
|
||
"review_reason": "TEXT",
|
||
"paywall_override": "TEXT", # NULL = use domain rule · 'free' · 'paywalled'
|
||
}
|
||
for column, decl in health_columns.items():
|
||
if column not in source_cols:
|
||
conn.execute(f"ALTER TABLE sources ADD COLUMN {column} {decl}")
|
||
# Publishing Desk: the source's own verified X handle (suggested when sharing).
|
||
if "x_handle" not in source_cols:
|
||
conn.execute("ALTER TABLE sources ADD COLUMN x_handle TEXT")
|
||
|
||
# Lifecycle: status (active/paused/retired) + content_visible. `active` is
|
||
# kept as a synced mirror so legacy code (scheduler/CLI) keeps working.
|
||
if "status" not in source_cols:
|
||
conn.execute("ALTER TABLE sources ADD COLUMN status TEXT NOT NULL DEFAULT 'active'")
|
||
conn.execute("UPDATE sources SET status = CASE WHEN active = 1 THEN 'active' ELSE 'paused' END")
|
||
if "content_visible" not in source_cols:
|
||
conn.execute("ALTER TABLE sources ADD COLUMN content_visible INTEGER NOT NULL DEFAULT 1")
|
||
if "retry_after_at" not in source_cols:
|
||
conn.execute("ALTER TABLE sources ADD COLUMN retry_after_at TEXT")
|
||
|
||
# Daily Art columns added after the tables first shipped.
|
||
pool_cols = {row["name"] for row in conn.execute("PRAGMA table_info(art_pool)")}
|
||
if pool_cols and "blocked" not in pool_cols:
|
||
conn.execute("ALTER TABLE art_pool ADD COLUMN blocked INTEGER NOT NULL DEFAULT 0")
|
||
art_cols = {row["name"] for row in conn.execute("PRAGMA table_info(daily_art)")}
|
||
if art_cols and "image_url_full" not in art_cols:
|
||
conn.execute("ALTER TABLE daily_art ADD COLUMN image_url_full TEXT")
|
||
if art_cols and "is_public_domain" not in art_cols:
|
||
conn.execute("ALTER TABLE daily_art ADD COLUMN is_public_domain INTEGER")
|
||
for column in ("blurb", "palette"): # richer /art page: guide note + extracted colors
|
||
if art_cols and column not in art_cols:
|
||
conn.execute(f"ALTER TABLE daily_art ADD COLUMN {column} TEXT")
|
||
|
||
# feedback.read_at (admin inbox read/unread) added later.
|
||
fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")}
|
||
if fb_cols and "read_at" not in fb_cols:
|
||
conn.execute("ALTER TABLE feedback ADD COLUMN read_at TEXT")
|
||
|
||
# feedback_replies.message_html (rendered Markdown subset) added later.
|
||
rep_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback_replies)")}
|
||
if rep_cols and "message_html" not in rep_cols:
|
||
conn.execute("ALTER TABLE feedback_replies ADD COLUMN message_html TEXT")
|
||
|
||
# article_summaries: structured "Why it belongs" fields added later.
|
||
sum_cols = {row["name"] for row in conn.execute("PRAGMA table_info(article_summaries)")}
|
||
for column in ("what_happened", "why_matters", "why_belongs"):
|
||
if sum_cols and column not in sum_cols:
|
||
conn.execute(f"ALTER TABLE article_summaries ADD COLUMN {column} TEXT")
|
||
|
||
# WOTD display polish: LLM plain-language gloss + everyday example sentences, kept
|
||
# alongside the raw dictionary def/examples (which stay the anchor / ground truth).
|
||
for tbl in ("wotd_pool", "daily_wotd"):
|
||
cols = {row["name"] for row in conn.execute(f"PRAGMA table_info({tbl})")}
|
||
for column in ("gloss", "usage"):
|
||
if cols and column not in cols:
|
||
conn.execute(f"ALTER TABLE {tbl} ADD COLUMN {column} TEXT")
|