From 2f4bdf2d009ca2bfaffafaee647cc5c95b23e909 Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 30 May 2026 13:51:07 +0000 Subject: [PATCH] Add FastAPI web/API layer and static site - queries.py: shared read-only query helpers (feed, brief, category counts) returning plain dicts, used by the API and available to the CLI. - api.py: FastAPI service with Pydantic response models (the companion-app contract), CORS, and endpoints for categories, feed, brief, and health; mounts a static site at /. - static/index.html: minimal dependency-free site rendering the daily five and topic/flavor category browsing. - 'goodnews serve' command launches uvicorn (lazy import; core CLI stays pure-stdlib). Web deps live behind the optional [web] extra. - Dockerfile + .dockerignore + build-system metadata so the service installs and deploys cleanly, with the DB mounted as a shared volume. - README: web/API and deployment docs. Co-Authored-By: Claude Opus 4.8 (1M context) --- .dockerignore | 7 ++ .gitignore | 1 + Dockerfile | 21 ++++ README.md | 40 +++++++ goodnews/api.py | 206 +++++++++++++++++++++++++++++++++++++ goodnews/cli.py | 25 +++++ goodnews/queries.py | 141 +++++++++++++++++++++++++ goodnews/static/index.html | 163 +++++++++++++++++++++++++++++ ideas.md | 2 + pyproject.toml | 18 ++++ 10 files changed, 624 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 goodnews/api.py create mode 100644 goodnews/queries.py create mode 100644 goodnews/static/index.html diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..5e91356 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.venv +data +__pycache__ +*.pyc +*.sqlite3 +*.sqlite3-* diff --git a/.gitignore b/.gitignore index a4e49ce..4052efb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ __pycache__/ *.py[cod] .venv/ +*.egg-info/ data/*.sqlite3 data/*.sqlite3-* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4b5c574 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,21 @@ +# goodNews web/API image. +# +# The SQLite database is NOT baked into the image — mount it at /data so the API +# and the ingestion CLI (run separately, e.g. via cron on the host) share one +# file. Build: docker build -t goodnews . +# Run: docker run -p 8000:8000 -v /srv/goodnews/data:/data goodnews +FROM python:3.13-slim + +WORKDIR /app + +# Install dependencies first for better layer caching. +COPY pyproject.toml README.md ./ +COPY goodnews ./goodnews +RUN pip install --no-cache-dir ".[web]" + +# API reads the database from here; mount a host dir or named volume. +ENV GOODNEWS_DB=/data/goodnews.sqlite3 +VOLUME ["/data"] + +EXPOSE 8000 +CMD ["uvicorn", "goodnews.api:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 90bc4d3..e15d62c 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,46 @@ For each article, the database stores: - hashes used for dedupe - heuristic scores and reason codes +## Web / API + +The optional `web` extra adds a FastAPI service and a small static site that +consumes it. The same JSON API backs both the website and any future companion +app; its auto-generated OpenAPI docs at `/docs` are the shared contract. + +```bash +pip install -e '.[web]' # or: .venv/bin/pip install -e '.[web]' +python3 -m goodnews serve # http://127.0.0.1:8000 +python3 -m goodnews serve --host 0.0.0.0 # expose on the network +``` + +Endpoints: + +- `GET /` — the static site (daily five + topic/flavor browsing) +- `GET /healthz` — liveness + scored-article count +- `GET /api/categories` — the topic/flavor taxonomy +- `GET /api/category-counts` — article counts per topic/flavor +- `GET /api/feed?topic=&flavor=&limit=&offset=` — ranked, filtered articles +- `GET /api/brief?date=&limit=` — a daily brief (latest if no date) +- `GET /api/brief-dates` — available brief dates +- `GET /docs` — interactive OpenAPI documentation + +The ingestion CLI stays pure-stdlib; only the `web` extra pulls in FastAPI/uvicorn, +so the two halves can be deployed and upgraded independently. + +## Deployment + +The database is never baked into the image — the API and the ingestion CLI share +one SQLite file via a mounted volume. Run ingestion (`poll`, `classify`, +`build-brief`) on a schedule against the same file. + +```bash +docker build -t goodnews . +docker run -p 8000:8000 -v /srv/goodnews/data:/data goodnews +``` + +`GOODNEWS_DB` controls the database path (defaults to `data/goodnews.sqlite3`). +Put a reverse proxy (Caddy/nginx) in front for TLS once a domain is attached. + ## Next Steps 1. Run the poller for a few days and inspect which sources produce useful candidates. diff --git a/goodnews/api.py b/goodnews/api.py new file mode 100644 index 0000000..8327657 --- /dev/null +++ b/goodnews/api.py @@ -0,0 +1,206 @@ +"""FastAPI service for goodNews. + +A read-only JSON API over the ingestion database, plus a small static site that +consumes it. The same endpoints back both the website and any future companion +app; the auto-generated OpenAPI docs at /docs are that shared contract. + +Run with the bundled CLI: goodnews serve +Or directly: uvicorn goodnews.api:app --host 0.0.0.0 --port 8000 + +The database path comes from GOODNEWS_DB (falling back to the repo's data dir), +so the API and CLI always read the same file. +""" + +from __future__ import annotations + +import os +import sqlite3 +from contextlib import contextmanager +from pathlib import Path + +from fastapi import FastAPI, HTTPException, Query +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel + +from . import queries +from .db import connect, init_db +from .taxonomy import FLAVORS, TOPICS + +ROOT = Path(__file__).resolve().parents[1] +DEFAULT_DB = ROOT / "data" / "goodnews.sqlite3" +STATIC_DIR = Path(__file__).resolve().parent / "static" + + +def db_path() -> Path: + return Path(os.environ.get("GOODNEWS_DB", str(DEFAULT_DB))) + + +@contextmanager +def get_conn(): + conn = connect(db_path()) + try: + yield conn + finally: + conn.close() + + +# --- Response models (the companion-app contract) --------------------------- + + +class Category(BaseModel): + key: str + description: str + + +class CategoriesResponse(BaseModel): + topics: list[Category] + flavors: list[Category] + + +class CategoryCount(BaseModel): + topic: str | None + flavor: str | None + count: int + + +class Article(BaseModel): + id: int + title: str + description: str | None = None + url: str + image_url: str | None = None + published_at: str | None = None + source: str + topic: str | None = None + flavor: str | None = None + accepted: bool + rank_score: int | None = None + reason_code: str | None = None + reason_text: str | None = None + model_name: str | None = None + rank: int | None = None # position within a brief, when applicable + + @classmethod + def from_row(cls, row: dict) -> "Article": + return cls( + id=row["id"], + title=row["title"], + description=row.get("description"), + url=row["canonical_url"], + image_url=row.get("image_url"), + published_at=row.get("published_at"), + source=row["source_name"], + topic=row.get("topic"), + flavor=row.get("flavor"), + accepted=bool(row.get("accepted")), + rank_score=row.get("rank_score"), + reason_code=row.get("reason_code"), + reason_text=row.get("reason_text"), + model_name=row.get("model_name"), + rank=row.get("rank"), + ) + + +class FeedResponse(BaseModel): + topic: str | None + flavor: str | None + count: int + items: list[Article] + + +class BriefResponse(BaseModel): + brief_date: str | None + title: str | None + items: list[Article] + + +# --- App -------------------------------------------------------------------- + + +def create_app() -> FastAPI: + app = FastAPI( + title="goodNews API", + version="0.1.0", + description="Constructive, uplifting news — metadata and links only.", + ) + + # The website and companion app may live on other origins; allow them. + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["GET"], + allow_headers=["*"], + ) + + @app.get("/healthz") + def healthz() -> dict: + with get_conn() as conn: + init_db(conn) + scored = conn.execute("SELECT COUNT(*) FROM article_scores").fetchone()[0] + return {"status": "ok", "scored_articles": scored} + + @app.get("/api/categories", response_model=CategoriesResponse) + def categories() -> CategoriesResponse: + return CategoriesResponse( + topics=[Category(key=k, description=v) for k, v in TOPICS.items()], + flavors=[Category(key=k, description=v) for k, v in FLAVORS.items()], + ) + + @app.get("/api/category-counts", response_model=list[CategoryCount]) + def category_counts(accepted_only: bool = True) -> list[CategoryCount]: + with get_conn() as conn: + rows = queries.category_counts(conn, accepted_only=accepted_only) + return [CategoryCount(**row) for row in rows] + + @app.get("/api/feed", response_model=FeedResponse) + def feed( + topic: str | None = Query(None), + flavor: str | None = Query(None), + accepted_only: bool = True, + limit: int = Query(30, ge=1, le=100), + offset: int = Query(0, ge=0), + ) -> FeedResponse: + if topic and topic.lower() not in TOPICS: + raise HTTPException(400, f"unknown topic: {topic}") + if flavor and flavor.lower() not in FLAVORS: + raise HTTPException(400, f"unknown flavor: {flavor}") + with get_conn() as conn: + rows = queries.feed( + conn, + topic=topic, + flavor=flavor, + accepted_only=accepted_only, + limit=limit, + offset=offset, + ) + return FeedResponse( + topic=topic, + flavor=flavor, + count=len(rows), + items=[Article.from_row(r) for r in rows], + ) + + @app.get("/api/brief", response_model=BriefResponse) + def brief(date: str | None = Query(None), limit: int = Query(10, ge=1, le=50)) -> BriefResponse: + with get_conn() as conn: + data = queries.brief(conn, brief_date=date, limit=limit) + return BriefResponse( + brief_date=data["brief_date"], + title=data["title"], + items=[Article.from_row(r) for r in data["items"]], + ) + + @app.get("/api/brief-dates", response_model=list[str]) + def brief_dates(limit: int = Query(30, ge=1, le=365)) -> list[str]: + with get_conn() as conn: + return queries.available_dates(conn, limit=limit) + + # Static site last, mounted at root, so /api/* and /healthz win. + if STATIC_DIR.is_dir(): + app.mount("/", StaticFiles(directory=str(STATIC_DIR), html=True), name="site") + + return app + + +app = create_app() diff --git a/goodnews/cli.py b/goodnews/cli.py index 3f2c735..72a6a40 100644 --- a/goodnews/cli.py +++ b/goodnews/cli.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import os import sqlite3 from pathlib import Path @@ -71,7 +72,17 @@ def main() -> None: show_brief_parser.add_argument("--date", help="Brief date in YYYY-MM-DD format; defaults to latest brief") show_brief_parser.add_argument("--limit", type=int, default=10) + serve_parser = subparsers.add_parser("serve", help="Run the web/API server (requires the 'web' extra)") + serve_parser.add_argument("--host", default="127.0.0.1", help="Bind host; use 0.0.0.0 to expose") + serve_parser.add_argument("--port", type=int, default=8000) + serve_parser.add_argument("--reload", action="store_true", help="Auto-reload on code changes (dev)") + args = parser.parse_args() + + if args.command == "serve": + serve(args) + return + conn = connect(args.db) if args.command == "init-db": @@ -190,6 +201,20 @@ def list_recent(conn: sqlite3.Connection, limit: int, accepted_only: bool) -> No print(f" {row['canonical_url']}") +def serve(args: argparse.Namespace) -> None: + try: + import uvicorn + except ModuleNotFoundError: + raise SystemExit( + "The web server needs the optional 'web' extra. Install it with:\n" + " pip install -e '.[web]'" + ) + # Make sure the API reads the same database the CLI was pointed at. + os.environ.setdefault("GOODNEWS_DB", str(args.db)) + print(f"Serving goodNews on http://{args.host}:{args.port} (docs at /docs)") + uvicorn.run("goodnews.api:app", host=args.host, port=args.port, reload=args.reload) + + def list_category( conn: sqlite3.Connection, topic: str | None, diff --git a/goodnews/queries.py b/goodnews/queries.py new file mode 100644 index 0000000..e41ba43 --- /dev/null +++ b/goodnews/queries.py @@ -0,0 +1,141 @@ +"""Read-only query helpers over the goodNews database. + +Pure stdlib and framework-agnostic: returns plain dicts so the same functions +back both the CLI and the JSON API. All article output is metadata + a link to +the original source — never stored bodies. +""" + +from __future__ import annotations + +import sqlite3 + +# Composite ranking used everywhere a "best first" order is needed. Kept as one +# expression so brief, category feeds, and the API all rank identically. +RANK_SCORE_SQL = ( + "(s.constructive_score + s.agency_score + s.human_benefit_score + src.trust_score " + "- s.cortisol_score - s.ragebait_score - s.pr_risk_score)" +) + +_ARTICLE_COLUMNS = f""" + a.id, + a.title, + a.description, + a.canonical_url, + a.published_at, + a.image_url, + src.name AS source_name, + s.topic, + s.flavor, + s.accepted, + s.constructive_score, + s.cortisol_score, + s.ragebait_score, + s.agency_score, + s.human_benefit_score, + s.pr_risk_score, + s.reason_code, + s.reason_text, + s.model_name, + {RANK_SCORE_SQL} AS rank_score +""" + + +def feed( + conn: sqlite3.Connection, + topic: str | None = None, + flavor: str | None = None, + accepted_only: bool = True, + limit: int = 30, + offset: int = 0, +) -> list[dict]: + """Return ranked articles, optionally filtered by topic and/or flavor.""" + clauses = [] + params: list = [] + if accepted_only: + clauses.append("s.accepted = 1") + if topic: + clauses.append("s.topic = ?") + params.append(topic.lower()) + if flavor: + clauses.append("s.flavor = ?") + params.append(flavor.lower()) + where = ("WHERE " + " AND ".join(clauses)) if clauses else "" + params.extend([limit, offset]) + + rows = conn.execute( + f""" + SELECT {_ARTICLE_COLUMNS} + FROM articles a + JOIN sources src ON src.id = a.source_id + JOIN article_scores s ON s.article_id = a.id + {where} + ORDER BY rank_score DESC, COALESCE(a.published_at, a.discovered_at) DESC + LIMIT ? OFFSET ? + """, + params, + ).fetchall() + return [dict(row) for row in rows] + + +def brief(conn: sqlite3.Connection, brief_date: str | None = None, limit: int = 10) -> dict: + """Return a stored daily brief (latest if no date) with its ranked items.""" + target_date = brief_date or _latest_brief_date(conn) + if not target_date: + return {"brief_date": None, "title": None, "items": []} + + header = conn.execute( + "SELECT brief_date, title FROM daily_briefs WHERE brief_date = ?", + (target_date,), + ).fetchone() + if not header: + return {"brief_date": target_date, "title": None, "items": []} + + rows = conn.execute( + f""" + SELECT bi.rank, bi.selection_reason, {_ARTICLE_COLUMNS} + FROM daily_briefs b + JOIN daily_brief_items bi ON bi.brief_id = b.id + JOIN articles a ON a.id = bi.article_id + JOIN sources src ON src.id = a.source_id + LEFT JOIN article_scores s ON s.article_id = a.id + WHERE b.brief_date = ? + ORDER BY bi.rank + LIMIT ? + """, + (target_date, limit), + ).fetchall() + return { + "brief_date": header["brief_date"], + "title": header["title"], + "items": [dict(row) for row in rows], + } + + +def category_counts(conn: sqlite3.Connection, accepted_only: bool = True) -> list[dict]: + """Return per topic/flavor article counts for building browse UIs.""" + where = "WHERE s.accepted = 1" if accepted_only else "WHERE s.topic IS NOT NULL" + rows = conn.execute( + f""" + SELECT s.topic, s.flavor, COUNT(*) AS count + FROM article_scores s + {where} + GROUP BY s.topic, s.flavor + ORDER BY s.topic, s.flavor + """ + ).fetchall() + return [dict(row) for row in rows] + + +def available_dates(conn: sqlite3.Connection, limit: int = 30) -> list[str]: + rows = conn.execute( + "SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT ?", + (limit,), + ).fetchall() + return [row["brief_date"] for row in rows] + + +def _latest_brief_date(conn: sqlite3.Connection) -> str | None: + row = conn.execute( + "SELECT brief_date FROM daily_briefs ORDER BY brief_date DESC LIMIT 1" + ).fetchone() + return row["brief_date"] if row else None diff --git a/goodnews/static/index.html b/goodnews/static/index.html new file mode 100644 index 0000000..926a262 --- /dev/null +++ b/goodnews/static/index.html @@ -0,0 +1,163 @@ + + + + + + goodNews — calm, constructive news + + + +
+

goodNews

+

Calm, constructive news worth your attention — and nothing that isn't.

+
+
+
+
Five Good Things
+
Loading…
+
+ +
Browse by category
+
+
+
+
+
+ goodNews · metadata & links only, no stored articles · + API +
+ + + + diff --git a/ideas.md b/ideas.md index af37098..8a52c78 100644 --- a/ideas.md +++ b/ideas.md @@ -1 +1,3 @@ - Ability to silence some categories temporarily (Maybe a user doesn't even want to see health-related articles, even good ones, so they're not reminded of an ongoing medical issue -- a way to avoid something purposely for a bit) +- Terms to avoid list (To filter even good news that you'd rather not hear about) +- \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 7e879e8..22eab94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,28 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + [project] name = "goodnews" version = "0.1.0" description = "Local-first constructive news ingestion and filtering prototype." requires-python = ">=3.11" +# The ingestion CLI is intentionally pure-stdlib. The optional `web` extra adds +# the API/site layer so the two halves can be deployed and upgraded independently. dependencies = [] +[project.optional-dependencies] +web = [ + "fastapi>=0.110", + "uvicorn[standard]>=0.29", +] + [project.scripts] goodnews = "goodnews.cli:main" +[tool.setuptools] +packages = ["goodnews"] + +[tool.setuptools.package-data] +goodnews = ["static/*"] +