"""FastAPI service for goodNews. A read-only JSON API over the ingestion database, plus a small static site that consumes it. The same endpoints back both the website and any future companion app; the auto-generated OpenAPI docs at /docs are that shared contract. Run with the bundled CLI: goodnews serve Or directly: uvicorn goodnews.api:app --host 0.0.0.0 --port 8000 The database path comes from GOODNEWS_DB (falling back to the repo's data dir), so the API and CLI always read the same file. """ from __future__ import annotations import json import os import re import sqlite3 from collections import Counter from contextlib import contextmanager from datetime import datetime, timezone from pathlib import Path from fastapi import FastAPI, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from pydantic import BaseModel from . import feeds, queries from .db import connect, init_db from .filters import filter_articles, prefs_from_json from .llm import LocalModelClient from .moods import MOODS from .taxonomy import FLAVORS, TOPICS ROOT = Path(__file__).resolve().parents[1] DEFAULT_DB = ROOT / "data" / "goodnews.sqlite3" # Prefer the built SvelteKit site; fall back to the legacy single-page harness. FRONTEND_DIR = ROOT / "frontend" / "build" LEGACY_STATIC = Path(__file__).resolve().parent / "static" STATIC_DIR = FRONTEND_DIR if FRONTEND_DIR.is_dir() else LEGACY_STATIC def db_path() -> Path: return Path(os.environ.get("GOODNEWS_DB", str(DEFAULT_DB))) @contextmanager def get_conn(): conn = connect(db_path()) try: yield conn finally: conn.close() # --- Response models (the companion-app contract) --------------------------- class Category(BaseModel): key: str description: str class CategoriesResponse(BaseModel): topics: list[Category] flavors: list[Category] class CategoryCount(BaseModel): topic: str | None flavor: str | None count: int class Article(BaseModel): id: int title: str description: str | None = None url: str image_url: str | None = None published_at: str | None = None source: str topic: str | None = None flavor: str | None = None accepted: bool rank_score: int | None = None reason_code: str | None = None reason_text: str | None = None model_name: str | None = None rank: int | None = None # position within a brief, when applicable @classmethod def from_row(cls, row: dict) -> "Article": return cls( id=row["id"], title=row["title"], description=row.get("description"), url=row["canonical_url"], image_url=row.get("image_url"), published_at=row.get("published_at"), source=row["source_name"], topic=row.get("topic"), flavor=row.get("flavor"), accepted=bool(row.get("accepted")), rank_score=row.get("rank_score"), reason_code=row.get("reason_code"), reason_text=row.get("reason_text"), model_name=row.get("model_name"), rank=row.get("rank"), ) class FeedResponse(BaseModel): topic: str | None flavor: str | None count: int items: list[Article] class BriefResponse(BaseModel): brief_date: str | None title: str | None items: list[Article] class RejectedExample(BaseModel): title: str reason: str class Candidate(BaseModel): id: int feed_url: str homepage_url: str | None = None name: str | None = None status: str preview: dict | None = None notes: str | None = None last_previewed_at: str | None = None created_at: str | None = None updated_at: str | None = None class SourcePreview(BaseModel): url: str sampled: int classified: bool accepted: int acceptance_rate: float avg_cortisol: float avg_ragebait: float avg_pr_risk: float newest_published: str | None recent_7d: int topic_mix: dict[str, int] flavor_mix: dict[str, int] examples_accepted: list[str] examples_rejected: list[RejectedExample] # --- App -------------------------------------------------------------------- def create_app() -> FastAPI: app = FastAPI( title="goodNews API", version="0.1.0", description="Constructive, uplifting news — metadata and links only.", ) # The website and companion app may live on other origins; allow them. app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["GET"], allow_headers=["*"], ) @app.get("/healthz") def healthz() -> dict: with get_conn() as conn: init_db(conn) scored = conn.execute("SELECT COUNT(*) FROM article_scores").fetchone()[0] return {"status": "ok", "scored_articles": scored} @app.get("/api/categories", response_model=CategoriesResponse) def categories() -> CategoriesResponse: return CategoriesResponse( topics=[Category(key=k, description=v) for k, v in TOPICS.items()], flavors=[Category(key=k, description=v) for k, v in FLAVORS.items()], ) @app.get("/api/moods") def moods() -> list[dict]: # The humane front door: each mood resolves to a filter preset the # client merges with the user's own Calm Filters. return MOODS @app.get("/api/category-counts", response_model=list[CategoryCount]) def category_counts(accepted_only: bool = True, prefs: str | None = Query(None)) -> list[CategoryCount]: fp = prefs_from_json(prefs) with get_conn() as conn: if fp.is_empty(): rows = queries.category_counts(conn, accepted_only=accepted_only) else: # Count over the SAME filtered set the feed would return, so the # browse numbers always match what the user actually sees. allrows = queries.feed(conn, accepted_only=accepted_only, limit=100000, offset=0) kept = filter_articles(allrows, fp, datetime.now(timezone.utc)) counts = Counter((r["topic"], r["flavor"]) for r in kept) rows = [ {"topic": t, "flavor": f, "count": n} for (t, f), n in sorted(counts.items(), key=lambda kv: (str(kv[0][0]), str(kv[0][1]))) ] return [CategoryCount(**row) for row in rows] @app.get("/api/feed", response_model=FeedResponse) def feed( topic: str | None = Query(None), flavor: str | None = Query(None), accepted_only: bool = True, limit: int = Query(30, ge=1, le=100), offset: int = Query(0, ge=0), prefs: str | None = Query(None), ) -> FeedResponse: if topic and topic.lower() not in TOPICS: raise HTTPException(400, f"unknown topic: {topic}") if flavor and flavor.lower() not in FLAVORS: raise HTTPException(400, f"unknown flavor: {flavor}") fp = prefs_from_json(prefs) now = datetime.now(timezone.utc) with get_conn() as conn: if fp.is_empty(): rows = queries.feed( conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=limit, offset=offset ) else: # Categorical filters (include/mute topics+flavors incl. active # pauses, cortisol ceiling) go to SQL so nothing is truncated by # ranking. Only word-boundary avoid-terms need a Python pass, so # over-fetch just enough to cover what they might remove. kw = dict( include_topics=fp.include_topics or None, include_flavors=fp.include_flavors or None, mute_topics=list(fp.muted_topics(now)) or None, mute_flavors=list(fp.muted_flavors(now)) or None, max_cortisol=fp.max_cortisol, max_ragebait=fp.max_ragebait, ) if fp.avoid_terms: raw = queries.feed( conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=min(2000, (offset + limit) * 4 + 50), offset=0, **kw, ) kept = filter_articles(raw, fp, now) # drops avoid-term matches rows = kept[offset : offset + limit] else: rows = queries.feed( conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=limit, offset=offset, **kw, ) return FeedResponse( topic=topic, flavor=flavor, count=len(rows), items=[Article.from_row(r) for r in rows], ) @app.get("/api/brief", response_model=BriefResponse) def brief( date: str | None = Query(None), limit: int = Query(10, ge=1, le=50), prefs: str | None = Query(None), ) -> BriefResponse: fp = prefs_from_json(prefs) with get_conn() as conn: data = queries.brief(conn, brief_date=date, limit=limit) items = data["items"] if not fp.is_empty(): # MVP: filter the stored brief DOWN; no refill from outside the brief. items = filter_articles(items, fp, datetime.now(timezone.utc)) return BriefResponse( brief_date=data["brief_date"], title=data["title"], items=[Article.from_row(r) for r in items], ) @app.get("/api/brief-dates", response_model=list[str]) def brief_dates(limit: int = Query(30, ge=1, le=365)) -> list[str]: with get_conn() as conn: return queries.available_dates(conn, limit=limit) @app.get("/api/candidates", response_model=list[Candidate]) def candidates(status: str | None = Query(None)) -> list[Candidate]: from .sources import list_candidates with get_conn() as conn: rows = list_candidates(conn, status=status) out = [] for r in rows: d = dict(r) pj = d.pop("preview_json", None) d["preview"] = json.loads(pj) if pj else None out.append(Candidate(**d)) return out @app.get("/api/source-preview", response_model=SourcePreview) def source_preview( url: str = Query(..., max_length=2048), sample: int = Query(25, ge=1, le=50), classify: bool = Query(False, description="Also classify with the local model (accurate but slower)"), ) -> SourcePreview: # Read-only sample scoring; nothing is persisted. Only http(s) is allowed. # NOTE: fetching a user-supplied URL is an SSRF surface — before exposing # this publicly, also block private/loopback/link-local address ranges. if not re.match(r"^https?://", url, re.IGNORECASE): raise HTTPException(400, "url must start with http:// or https://") client = LocalModelClient.from_env() if classify else None try: data = feeds.preview_feed(url, sample=sample, client=client) except Exception as exc: raise HTTPException(502, f"could not preview feed: {exc}") return SourcePreview(**data) # Static site last, mounted at root, so /api/* and /healthz win. if STATIC_DIR.is_dir(): app.mount("/", StaticFiles(directory=str(STATIC_DIR), html=True), name="site") return app app = create_app()