068073423f
Local-first RSS/Atom ingestion pipeline with metadata-only storage, heuristic + local-LLM scoring, and daily brief builder. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
56 lines
1.9 KiB
Python
56 lines
1.9 KiB
Python
from __future__ import annotations
|
|
|
|
import sqlite3
|
|
import tomllib
|
|
from pathlib import Path
|
|
|
|
|
|
def load_sources(path: Path | str) -> list[dict]:
|
|
data = tomllib.loads(Path(path).read_text(encoding="utf-8"))
|
|
sources = data.get("sources", [])
|
|
if not isinstance(sources, list):
|
|
raise ValueError("sources.toml must contain [[sources]] entries")
|
|
return sources
|
|
|
|
|
|
def upsert_sources(conn: sqlite3.Connection, source_defs: list[dict]) -> int:
|
|
count = 0
|
|
for source in source_defs:
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO sources (
|
|
name, homepage_url, feed_url, source_type, default_category,
|
|
trust_score, pr_risk_score, active, poll_interval_minutes, notes,
|
|
updated_at
|
|
)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
|
|
ON CONFLICT(feed_url) DO UPDATE SET
|
|
name = excluded.name,
|
|
homepage_url = excluded.homepage_url,
|
|
source_type = excluded.source_type,
|
|
default_category = excluded.default_category,
|
|
trust_score = excluded.trust_score,
|
|
pr_risk_score = excluded.pr_risk_score,
|
|
active = excluded.active,
|
|
poll_interval_minutes = excluded.poll_interval_minutes,
|
|
notes = excluded.notes,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
""",
|
|
(
|
|
source["name"],
|
|
source.get("homepage_url"),
|
|
source["feed_url"],
|
|
source.get("source_type", "rss"),
|
|
source.get("default_category"),
|
|
int(source.get("trust_score", 5)),
|
|
int(source.get("pr_risk_score", 3)),
|
|
1 if source.get("active", True) else 0,
|
|
int(source.get("poll_interval_minutes", 60)),
|
|
source.get("notes"),
|
|
),
|
|
)
|
|
count += 1
|
|
conn.commit()
|
|
return count
|
|
|