Files
upbeatBytes/goodnews/db.py
T
thejayman77 068073423f Initial commit: goodNews constructive-news ingestion prototype
Local-first RSS/Atom ingestion pipeline with metadata-only storage,
heuristic + local-LLM scoring, and daily brief builder.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 00:48:26 +00:00

106 lines
3.1 KiB
Python

from __future__ import annotations
import sqlite3
from pathlib import Path
SCHEMA = """
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
homepage_url TEXT,
feed_url TEXT NOT NULL UNIQUE,
source_type TEXT NOT NULL DEFAULT 'rss',
default_category TEXT,
trust_score INTEGER NOT NULL DEFAULT 5,
pr_risk_score INTEGER NOT NULL DEFAULT 3,
active INTEGER NOT NULL DEFAULT 1,
poll_interval_minutes INTEGER NOT NULL DEFAULT 60,
notes TEXT,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL REFERENCES sources(id) ON DELETE CASCADE,
canonical_url TEXT NOT NULL,
title TEXT NOT NULL,
description TEXT,
author TEXT,
published_at TEXT,
discovered_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
image_url TEXT,
language TEXT,
raw_guid TEXT,
url_hash TEXT NOT NULL UNIQUE,
title_hash TEXT,
FOREIGN KEY (source_id) REFERENCES sources(id)
);
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles(published_at);
CREATE INDEX IF NOT EXISTS idx_articles_source_id ON articles(source_id);
CREATE INDEX IF NOT EXISTS idx_articles_title_hash ON articles(title_hash);
CREATE TABLE IF NOT EXISTS article_scores (
article_id INTEGER PRIMARY KEY REFERENCES articles(id) ON DELETE CASCADE,
constructive_score INTEGER,
cortisol_score INTEGER,
ragebait_score INTEGER,
agency_score INTEGER,
human_benefit_score INTEGER,
novelty_score INTEGER,
pr_risk_score INTEGER,
accepted INTEGER,
reason_code TEXT,
reason_text TEXT,
model_name TEXT,
scored_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS ingest_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER REFERENCES sources(id) ON DELETE SET NULL,
started_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
finished_at TEXT,
status TEXT NOT NULL DEFAULT 'running',
items_seen INTEGER NOT NULL DEFAULT 0,
items_inserted INTEGER NOT NULL DEFAULT 0,
items_duplicate INTEGER NOT NULL DEFAULT 0,
error TEXT
);
CREATE TABLE IF NOT EXISTS daily_briefs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
brief_date TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
notes TEXT
);
CREATE TABLE IF NOT EXISTS daily_brief_items (
brief_id INTEGER NOT NULL REFERENCES daily_briefs(id) ON DELETE CASCADE,
article_id INTEGER NOT NULL REFERENCES articles(id) ON DELETE CASCADE,
rank INTEGER NOT NULL,
selection_reason TEXT,
PRIMARY KEY (brief_id, article_id),
UNIQUE (brief_id, rank)
);
"""
def connect(db_path: Path | str) -> sqlite3.Connection:
path = Path(db_path)
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(path)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA foreign_keys = ON")
return conn
def init_db(conn: sqlite3.Connection) -> None:
conn.executescript(SCHEMA)
conn.commit()