Geo Stage 4 (server): home-aware feed sectioning (Near you / country / world)

Completes the server side of "Closer to Home". /api/feed gains a `home` param
('US' or 'US-NY'); when set the response is private (like prefs) and sectioned:

- Near you (+ Elsewhere in your country when a state is set) is a ONE-TIME lead
  block on page 0; the world is the paginated body. next_offset tells the client
  where to continue, so the lead block never skews world paging.
- Thin tiers fold down (MIN_TIER=3) so a header is never shown empty (lead, don't trap).
- State match counts only on high/medium geo confidence; the "country" tier excludes
  exactly what went to "near", so a low-confidence home-state story still surfaces
  (it doesn't vanish between tiers — caught + tested).
- Items carry a `section` tag; paywalled sort is now within-section. No home => exact
  prior behavior (section null, default/edge-cached feed unchanged), Brief untouched.

364 tests green. Frontend next: Home picker + sectioned feed rendering.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-19 19:35:22 -04:00
parent ad4e88c8f2
commit e7e8f5515e
3 changed files with 165 additions and 19 deletions
+75 -16
View File
@@ -327,6 +327,7 @@ class Article(BaseModel):
geo_breadth: str | None = None
geo_confidence: str | None = None
geo_places: list[dict] = [] # e.g. [{"country": "US", "state": "NY"}, {"country": "GB", "state": None}]
section: str | None = None # 'near' | 'country' | 'world' when a home is set (Closer to Home)
@classmethod
def from_row(cls, row: dict) -> "Article":
@@ -339,6 +340,7 @@ class Article(BaseModel):
cc, _, sc = tok.partition("-")
places.append({"country": cc, "state": sc or None})
return cls(
section=row.get("__section"),
geo_breadth=row.get("geo_breadth"),
geo_confidence=row.get("geo_confidence"),
geo_places=places,
@@ -369,6 +371,7 @@ class FeedResponse(BaseModel):
flavor: str | None
count: int
items: list[Article]
next_offset: int | None = None # world-tier offset for the next page (Closer to Home paging)
class BriefResponse(BaseModel):
@@ -1792,6 +1795,7 @@ def create_app() -> FastAPI:
source_id: int | None = Query(None, ge=1, description="show only this source's articles"),
sort: str = Query("ranked", pattern="^(ranked|latest)$", description="ranked (best-first) or latest (newest-first)"),
following: bool = Query(False, description="restrict to the signed-in user's followed sources/tags"),
home: str | None = Query(None, max_length=8, description="Closer to Home: reader's home as 'US' or 'US-NY'"),
request: Request = None,
) -> FeedResponse:
# Edge-cacheable ONLY when the response depends purely on the URL: not the
@@ -1799,12 +1803,19 @@ def create_app() -> FastAPI:
# (prefs/dismissals are per-reader). The shareable cases — the default
# home feed, topic/flavor/tag/source browse — are identical for everyone,
# so the edge can serve one copy to all. Everything else stays private.
shareable = not following and not prefs and not exclude.strip()
shareable = not following and not prefs and not exclude.strip() and not home
response.headers["Cache-Control"] = _EDGE_FEED if shareable else _PRIVATE
if topic and topic.lower() not in TOPICS:
raise HTTPException(400, f"unknown topic: {topic}")
if flavor and flavor.lower() not in FLAVORS:
raise HTTPException(400, f"unknown flavor: {flavor}")
# Parse the reader's home: 'US' or 'US-NY'. State granularity is US-only for v1.
home_country = home_state = None
if home:
parts = home.upper().split("-", 1)
home_country = (parts[0][:2] or None)
if home_country == "US" and len(parts) > 1:
home_state = parts[1][:2] or None
fp = prefs_from_json(prefs)
now = datetime.now(timezone.utc)
excl = {int(x) for x in exclude.split(",") if x.strip().lstrip("-").isdigit()}
@@ -1822,28 +1833,76 @@ def create_app() -> FastAPI:
).fetchall()
kw["follow_sources"] = [int(r["value"]) for r in frows if r["kind"] == "source" and r["value"].isdigit()]
kw["follow_tags"] = [r["value"] for r in frows if r["kind"] == "tag"]
if fp.avoid_terms or excl:
# Over-fetch enough to cover what the Python pass might remove.
fetch_n = min(2000, (offset + limit) * 4 + 50 + len(excl))
raw = queries.feed(
conn, topic=topic, flavor=flavor, accepted_only=accepted_only,
limit=fetch_n, offset=0, tag=tag, source_id=source_id, sort=sort, **kw,
def _fetch(scope, lim, off):
# One scoped page, applying the avoid-terms/dismissal Python pass when needed.
if fp.avoid_terms or excl:
fetch_n = min(2000, (off + lim) * 4 + 50 + len(excl))
raw = queries.feed(
conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=fetch_n, offset=0,
tag=tag, source_id=source_id, sort=sort,
home_country=home_country, home_state=home_state, geo_scope=scope, **kw,
)
kept = [a for a in filter_articles(raw, fp, now) if a["id"] not in excl]
return kept[off : off + lim]
return queries.feed(
conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=lim, offset=off,
tag=tag, source_id=source_id, sort=sort,
home_country=home_country, home_state=home_state, geo_scope=scope, **kw,
)
kept = [a for a in filter_articles(raw, fp, now) if a["id"] not in excl]
rows = kept[offset : offset + limit]
next_offset = None
if home_country:
# Closer to Home. Near you (+ Elsewhere in your country when a state is set)
# is a ONE-TIME lead block on page 0; the world is the paginated body. Thin
# tiers fold down so a header is never shown empty (Codex: lead, don't trap).
NEAR_CAP, COUNTRY_CAP, MIN_TIER = 8, 8, 3
if offset == 0:
near = _fetch("near", NEAR_CAP, 0)
country = _fetch("country", COUNTRY_CAP, 0) if home_state else []
world = _fetch("world", limit, 0)
next_offset = limit if len(world) == limit else None
tiers = []
if home_state:
if len(near) >= MIN_TIER:
tiers.append(("near", near))
else:
country = near + country # fold sparse "near" into your country
if len(country) >= MIN_TIER:
tiers.append(("country", country))
else:
world = country + world # fold sparse country into the world
elif len(near) >= MIN_TIER:
tiers.append(("near", near)) # near == your whole country here
else:
world = near + world
tiers.append(("world", world))
rows = []
for key, group in tiers:
for r in group:
r["__section"] = key
rows.append(r)
else:
rows = _fetch("world", limit, offset)
for r in rows:
r["__section"] = "world"
next_offset = offset + limit if len(rows) == limit else None
else:
rows = queries.feed(
conn, topic=topic, flavor=flavor, accepted_only=accepted_only,
limit=limit, offset=offset, tag=tag, source_id=source_id, sort=sort, **kw,
)
# Keep the top of a browse view readable: stable-sort paywalled items
# below readable ones (composite order preserved within each group).
rows = sorted(rows, key=lambda r: is_paywalled_for_source(r["canonical_url"], r["paywall_override"]))
rows = _fetch(None, limit, offset)
next_offset = offset + len(rows) if len(rows) == limit else None
# Paywalled below readable WITHIN each section (so tiers stay grouped); non-home
# rows all share section rank 0, preserving the original global behavior.
_SEC = {"near": 0, "country": 1, "world": 2}
rows = sorted(rows, key=lambda r: (
_SEC.get(r.get("__section"), 0),
is_paywalled_for_source(r["canonical_url"], r["paywall_override"]),
))
return FeedResponse(
topic=topic,
flavor=flavor,
count=len(rows),
items=[Article.from_row(r) for r in rows],
next_offset=next_offset,
)
@app.get("/api/search", response_model=FeedResponse)
+8 -3
View File
@@ -182,9 +182,14 @@ def feed(
elif geo_scope == "country" and home_country:
clauses.append("EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
params.append(home_country)
if home_state: # "elsewhere in your country" = your country, but not your state
clauses.append("NOT EXISTS (SELECT 1 FROM article_places p2 WHERE p2.article_id = a.id AND p2.state_code = ?)")
params.append(home_state)
if home_state:
# "elsewhere in your country" excludes ONLY what actually went to "near" (a
# high/medium-confidence home-state match). A low-confidence home-state story
# isn't near, so it must still surface here, not vanish between tiers.
clauses.append(
"NOT (g.confidence IN ('high','medium') AND EXISTS (SELECT 1 FROM article_places p2 "
"WHERE p2.article_id = a.id AND p2.country_code = ? AND p2.state_code = ?))")
params.extend([home_country, home_state])
elif geo_scope == "world" and home_country:
clauses.append("NOT EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
params.append(home_country)
+82
View File
@@ -0,0 +1,82 @@
"""Closer to Home: /api/feed?home=... sections the feed into near / country / world,
state-match only on high/medium confidence, sparse tiers fold down, Brief untouched."""
import pytest
from fastapi.testclient import TestClient
from goodnews.db import connect, init_db
@pytest.fixture
def app_db(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
import importlib
import goodnews.api as api
importlib.reload(api)
c = connect(str(db)); init_db(c)
c.execute("INSERT INTO sources (id,name,feed_url,trust_score,content_visible) VALUES (1,'S','http://s/f',5,1)")
def art(aid, *, breadth, conf, country=None, state=None):
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
"VALUES (?,1,?,?,?,?)", (aid, f"https://x/{aid}", f"Story {aid}", f"h{aid}", "2026-06-18T08:00:00"))
c.execute("INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score) "
"VALUES (?,1,5,5)", (aid,))
c.execute("INSERT INTO article_geo (article_id,breadth,confidence,geo_version) VALUES (?,?,?, 'geo-v1')",
(aid, breadth, conf))
if country:
c.execute("INSERT INTO article_places (article_id,country_code,state_code,ord) VALUES (?,?,?,0)",
(aid, country, state))
# 4 NY high-conf (near), 1 NY LOW-conf (must NOT be near), 3 US/CA (country), 4 world
for i in range(1, 5):
art(i, breadth="locality", conf="high", country="US", state="NY")
art(5, breadth="locality", conf="low", country="US", state="NY")
for i in range(6, 9):
art(i, breadth="regional", conf="high", country="US", state="CA")
for i in range(9, 13):
art(i, breadth="global", conf="high") # placeless -> world
c.commit(); c.close()
return api.create_app()
def _sections(items):
return [it["section"] for it in items]
def test_home_state_sections_near_country_world(app_db):
r = TestClient(app_db).get("/api/feed?home=US-NY&limit=50").json()
items = r["items"]
secs = _sections(items)
# order: all near, then all country, then all world (no interleaving)
assert secs == sorted(secs, key=["near", "country", "world"].index)
near = [it for it in items if it["section"] == "near"]
assert len(near) == 4 and all(it["geo_confidence"] == "high" for it in near)
# the LOW-confidence NY story is NOT "near" — state match needs high/medium
a5 = next(it for it in items if it["id"] == 5)
assert a5["section"] == "country"
# world holds the placeless/global ones
assert {it["id"] for it in items if it["section"] == "world"} == {9, 10, 11, 12}
def test_sparse_near_folds_into_country(app_db):
# home in a state with no high-conf local stories -> "near" is empty, no near header;
# its (none) items fold away and we still get country + world, never an empty tier.
r = TestClient(app_db).get("/api/feed?home=US-TX&limit=50").json()
assert "near" not in _sections(r["items"]) # nothing local to TX -> no near section
assert "country" in _sections(r["items"]) # US stories still surface as your country
def test_country_only_home_has_no_country_tier(app_db):
r = TestClient(app_db).get("/api/feed?home=US&limit=50").json()
secs = set(_sections(r["items"]))
assert "country" not in secs # no state -> near IS the whole country
assert secs <= {"near", "world"}
near_ids = {it["id"] for it in r["items"] if it["section"] == "near"}
assert near_ids == {1, 2, 3, 4, 5, 6, 7, 8} # all US (incl. the low-conf one, country match)
def test_no_home_is_unchanged_and_unsectioned(app_db):
r = TestClient(app_db).get("/api/feed?limit=50").json()
assert all(it["section"] is None for it in r["items"])
assert r["next_offset"] is None or isinstance(r["next_offset"], int)