95195daff8
- feeds.preview_feed(): fetch + score a sample WITHOUT persisting; returns freshness, acceptance rate, cortisol/ragebait/PR averages, and example accepted/rejected items. With an LLM client it also returns topic/flavor mix and the model's (accurate) acceptance view. - CLI 'preview-source URL [--sample] [--classify]'. - API 'GET /api/source-preview?url=&sample=&classify=' with an http(s)-only guard (SSRF note left for go-public hardening). - Site 'Suggest a source' panel with Quick check (heuristic, instant) and Deep check (model, accurate), rendered DOM-safely. - Tests: network-free preview_feed tests via monkeypatched fetch (45 total). - README documents the command, endpoint, and updated roadmap. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
280 lines
9.3 KiB
Python
280 lines
9.3 KiB
Python
"""FastAPI service for goodNews.
|
|
|
|
A read-only JSON API over the ingestion database, plus a small static site that
|
|
consumes it. The same endpoints back both the website and any future companion
|
|
app; the auto-generated OpenAPI docs at /docs are that shared contract.
|
|
|
|
Run with the bundled CLI: goodnews serve
|
|
Or directly: uvicorn goodnews.api:app --host 0.0.0.0 --port 8000
|
|
|
|
The database path comes from GOODNEWS_DB (falling back to the repo's data dir),
|
|
so the API and CLI always read the same file.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import sqlite3
|
|
from collections import Counter
|
|
from contextlib import contextmanager
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
from fastapi import FastAPI, HTTPException, Query
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.staticfiles import StaticFiles
|
|
from pydantic import BaseModel
|
|
|
|
from . import feeds, queries
|
|
from .db import connect, init_db
|
|
from .filters import filter_articles, prefs_from_json
|
|
from .llm import LocalModelClient
|
|
from .taxonomy import FLAVORS, TOPICS
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
DEFAULT_DB = ROOT / "data" / "goodnews.sqlite3"
|
|
STATIC_DIR = Path(__file__).resolve().parent / "static"
|
|
|
|
|
|
def db_path() -> Path:
|
|
return Path(os.environ.get("GOODNEWS_DB", str(DEFAULT_DB)))
|
|
|
|
|
|
@contextmanager
|
|
def get_conn():
|
|
conn = connect(db_path())
|
|
try:
|
|
yield conn
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
# --- Response models (the companion-app contract) ---------------------------
|
|
|
|
|
|
class Category(BaseModel):
|
|
key: str
|
|
description: str
|
|
|
|
|
|
class CategoriesResponse(BaseModel):
|
|
topics: list[Category]
|
|
flavors: list[Category]
|
|
|
|
|
|
class CategoryCount(BaseModel):
|
|
topic: str | None
|
|
flavor: str | None
|
|
count: int
|
|
|
|
|
|
class Article(BaseModel):
|
|
id: int
|
|
title: str
|
|
description: str | None = None
|
|
url: str
|
|
image_url: str | None = None
|
|
published_at: str | None = None
|
|
source: str
|
|
topic: str | None = None
|
|
flavor: str | None = None
|
|
accepted: bool
|
|
rank_score: int | None = None
|
|
reason_code: str | None = None
|
|
reason_text: str | None = None
|
|
model_name: str | None = None
|
|
rank: int | None = None # position within a brief, when applicable
|
|
|
|
@classmethod
|
|
def from_row(cls, row: dict) -> "Article":
|
|
return cls(
|
|
id=row["id"],
|
|
title=row["title"],
|
|
description=row.get("description"),
|
|
url=row["canonical_url"],
|
|
image_url=row.get("image_url"),
|
|
published_at=row.get("published_at"),
|
|
source=row["source_name"],
|
|
topic=row.get("topic"),
|
|
flavor=row.get("flavor"),
|
|
accepted=bool(row.get("accepted")),
|
|
rank_score=row.get("rank_score"),
|
|
reason_code=row.get("reason_code"),
|
|
reason_text=row.get("reason_text"),
|
|
model_name=row.get("model_name"),
|
|
rank=row.get("rank"),
|
|
)
|
|
|
|
|
|
class FeedResponse(BaseModel):
|
|
topic: str | None
|
|
flavor: str | None
|
|
count: int
|
|
items: list[Article]
|
|
|
|
|
|
class BriefResponse(BaseModel):
|
|
brief_date: str | None
|
|
title: str | None
|
|
items: list[Article]
|
|
|
|
|
|
class RejectedExample(BaseModel):
|
|
title: str
|
|
reason: str
|
|
|
|
|
|
class SourcePreview(BaseModel):
|
|
url: str
|
|
sampled: int
|
|
classified: bool
|
|
accepted: int
|
|
acceptance_rate: float
|
|
avg_cortisol: float
|
|
avg_ragebait: float
|
|
avg_pr_risk: float
|
|
newest_published: str | None
|
|
recent_7d: int
|
|
topic_mix: dict[str, int]
|
|
flavor_mix: dict[str, int]
|
|
examples_accepted: list[str]
|
|
examples_rejected: list[RejectedExample]
|
|
|
|
|
|
# --- App --------------------------------------------------------------------
|
|
|
|
|
|
def create_app() -> FastAPI:
|
|
app = FastAPI(
|
|
title="goodNews API",
|
|
version="0.1.0",
|
|
description="Constructive, uplifting news — metadata and links only.",
|
|
)
|
|
|
|
# The website and companion app may live on other origins; allow them.
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_methods=["GET"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
@app.get("/healthz")
|
|
def healthz() -> dict:
|
|
with get_conn() as conn:
|
|
init_db(conn)
|
|
scored = conn.execute("SELECT COUNT(*) FROM article_scores").fetchone()[0]
|
|
return {"status": "ok", "scored_articles": scored}
|
|
|
|
@app.get("/api/categories", response_model=CategoriesResponse)
|
|
def categories() -> CategoriesResponse:
|
|
return CategoriesResponse(
|
|
topics=[Category(key=k, description=v) for k, v in TOPICS.items()],
|
|
flavors=[Category(key=k, description=v) for k, v in FLAVORS.items()],
|
|
)
|
|
|
|
@app.get("/api/category-counts", response_model=list[CategoryCount])
|
|
def category_counts(accepted_only: bool = True, prefs: str | None = Query(None)) -> list[CategoryCount]:
|
|
fp = prefs_from_json(prefs)
|
|
with get_conn() as conn:
|
|
if fp.is_empty():
|
|
rows = queries.category_counts(conn, accepted_only=accepted_only)
|
|
else:
|
|
# Count over the SAME filtered set the feed would return, so the
|
|
# browse numbers always match what the user actually sees.
|
|
allrows = queries.feed(conn, accepted_only=accepted_only, limit=100000, offset=0)
|
|
kept = filter_articles(allrows, fp, datetime.now(timezone.utc))
|
|
counts = Counter((r["topic"], r["flavor"]) for r in kept)
|
|
rows = [
|
|
{"topic": t, "flavor": f, "count": n}
|
|
for (t, f), n in sorted(counts.items(), key=lambda kv: (str(kv[0][0]), str(kv[0][1])))
|
|
]
|
|
return [CategoryCount(**row) for row in rows]
|
|
|
|
@app.get("/api/feed", response_model=FeedResponse)
|
|
def feed(
|
|
topic: str | None = Query(None),
|
|
flavor: str | None = Query(None),
|
|
accepted_only: bool = True,
|
|
limit: int = Query(30, ge=1, le=100),
|
|
offset: int = Query(0, ge=0),
|
|
prefs: str | None = Query(None),
|
|
) -> FeedResponse:
|
|
if topic and topic.lower() not in TOPICS:
|
|
raise HTTPException(400, f"unknown topic: {topic}")
|
|
if flavor and flavor.lower() not in FLAVORS:
|
|
raise HTTPException(400, f"unknown flavor: {flavor}")
|
|
fp = prefs_from_json(prefs)
|
|
with get_conn() as conn:
|
|
if fp.is_empty():
|
|
rows = queries.feed(
|
|
conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=limit, offset=offset
|
|
)
|
|
else:
|
|
# Over-fetch, apply the calm filters in Python (word-boundary
|
|
# avoid-terms can't be done in SQL), then slice to the page.
|
|
fetch_n = min(2000, (offset + limit) * 4 + 50)
|
|
raw = queries.feed(
|
|
conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=fetch_n, offset=0
|
|
)
|
|
filtered = filter_articles(raw, fp, datetime.now(timezone.utc))
|
|
rows = filtered[offset : offset + limit]
|
|
return FeedResponse(
|
|
topic=topic,
|
|
flavor=flavor,
|
|
count=len(rows),
|
|
items=[Article.from_row(r) for r in rows],
|
|
)
|
|
|
|
@app.get("/api/brief", response_model=BriefResponse)
|
|
def brief(
|
|
date: str | None = Query(None),
|
|
limit: int = Query(10, ge=1, le=50),
|
|
prefs: str | None = Query(None),
|
|
) -> BriefResponse:
|
|
fp = prefs_from_json(prefs)
|
|
with get_conn() as conn:
|
|
data = queries.brief(conn, brief_date=date, limit=limit)
|
|
items = data["items"]
|
|
if not fp.is_empty():
|
|
# MVP: filter the stored brief DOWN; no refill from outside the brief.
|
|
items = filter_articles(items, fp, datetime.now(timezone.utc))
|
|
return BriefResponse(
|
|
brief_date=data["brief_date"],
|
|
title=data["title"],
|
|
items=[Article.from_row(r) for r in items],
|
|
)
|
|
|
|
@app.get("/api/brief-dates", response_model=list[str])
|
|
def brief_dates(limit: int = Query(30, ge=1, le=365)) -> list[str]:
|
|
with get_conn() as conn:
|
|
return queries.available_dates(conn, limit=limit)
|
|
|
|
@app.get("/api/source-preview", response_model=SourcePreview)
|
|
def source_preview(
|
|
url: str = Query(..., max_length=2048),
|
|
sample: int = Query(25, ge=1, le=50),
|
|
classify: bool = Query(False, description="Also classify with the local model (accurate but slower)"),
|
|
) -> SourcePreview:
|
|
# Read-only sample scoring; nothing is persisted. Only http(s) is allowed.
|
|
# NOTE: fetching a user-supplied URL is an SSRF surface — before exposing
|
|
# this publicly, also block private/loopback/link-local address ranges.
|
|
if not re.match(r"^https?://", url, re.IGNORECASE):
|
|
raise HTTPException(400, "url must start with http:// or https://")
|
|
client = LocalModelClient.from_env() if classify else None
|
|
try:
|
|
data = feeds.preview_feed(url, sample=sample, client=client)
|
|
except Exception as exc:
|
|
raise HTTPException(502, f"could not preview feed: {exc}")
|
|
return SourcePreview(**data)
|
|
|
|
# Static site last, mounted at root, so /api/* and /healthz win.
|
|
if STATIC_DIR.is_dir():
|
|
app.mount("/", StaticFiles(directory=str(STATIC_DIR), html=True), name="site")
|
|
|
|
return app
|
|
|
|
|
|
app = create_app()
|