Geo Stage 4 (server): home-aware feed sectioning (Near you / country / world)
Completes the server side of "Closer to Home". /api/feed gains a `home` param
('US' or 'US-NY'); when set the response is private (like prefs) and sectioned:
- Near you (+ Elsewhere in your country when a state is set) is a ONE-TIME lead
block on page 0; the world is the paginated body. next_offset tells the client
where to continue, so the lead block never skews world paging.
- Thin tiers fold down (MIN_TIER=3) so a header is never shown empty (lead, don't trap).
- State match counts only on high/medium geo confidence; the "country" tier excludes
exactly what went to "near", so a low-confidence home-state story still surfaces
(it doesn't vanish between tiers — caught + tested).
- Items carry a `section` tag; paywalled sort is now within-section. No home => exact
prior behavior (section null, default/edge-cached feed unchanged), Brief untouched.
364 tests green. Frontend next: Home picker + sectioned feed rendering.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+75
-16
@@ -327,6 +327,7 @@ class Article(BaseModel):
|
||||
geo_breadth: str | None = None
|
||||
geo_confidence: str | None = None
|
||||
geo_places: list[dict] = [] # e.g. [{"country": "US", "state": "NY"}, {"country": "GB", "state": None}]
|
||||
section: str | None = None # 'near' | 'country' | 'world' when a home is set (Closer to Home)
|
||||
|
||||
@classmethod
|
||||
def from_row(cls, row: dict) -> "Article":
|
||||
@@ -339,6 +340,7 @@ class Article(BaseModel):
|
||||
cc, _, sc = tok.partition("-")
|
||||
places.append({"country": cc, "state": sc or None})
|
||||
return cls(
|
||||
section=row.get("__section"),
|
||||
geo_breadth=row.get("geo_breadth"),
|
||||
geo_confidence=row.get("geo_confidence"),
|
||||
geo_places=places,
|
||||
@@ -369,6 +371,7 @@ class FeedResponse(BaseModel):
|
||||
flavor: str | None
|
||||
count: int
|
||||
items: list[Article]
|
||||
next_offset: int | None = None # world-tier offset for the next page (Closer to Home paging)
|
||||
|
||||
|
||||
class BriefResponse(BaseModel):
|
||||
@@ -1792,6 +1795,7 @@ def create_app() -> FastAPI:
|
||||
source_id: int | None = Query(None, ge=1, description="show only this source's articles"),
|
||||
sort: str = Query("ranked", pattern="^(ranked|latest)$", description="ranked (best-first) or latest (newest-first)"),
|
||||
following: bool = Query(False, description="restrict to the signed-in user's followed sources/tags"),
|
||||
home: str | None = Query(None, max_length=8, description="Closer to Home: reader's home as 'US' or 'US-NY'"),
|
||||
request: Request = None,
|
||||
) -> FeedResponse:
|
||||
# Edge-cacheable ONLY when the response depends purely on the URL: not the
|
||||
@@ -1799,12 +1803,19 @@ def create_app() -> FastAPI:
|
||||
# (prefs/dismissals are per-reader). The shareable cases — the default
|
||||
# home feed, topic/flavor/tag/source browse — are identical for everyone,
|
||||
# so the edge can serve one copy to all. Everything else stays private.
|
||||
shareable = not following and not prefs and not exclude.strip()
|
||||
shareable = not following and not prefs and not exclude.strip() and not home
|
||||
response.headers["Cache-Control"] = _EDGE_FEED if shareable else _PRIVATE
|
||||
if topic and topic.lower() not in TOPICS:
|
||||
raise HTTPException(400, f"unknown topic: {topic}")
|
||||
if flavor and flavor.lower() not in FLAVORS:
|
||||
raise HTTPException(400, f"unknown flavor: {flavor}")
|
||||
# Parse the reader's home: 'US' or 'US-NY'. State granularity is US-only for v1.
|
||||
home_country = home_state = None
|
||||
if home:
|
||||
parts = home.upper().split("-", 1)
|
||||
home_country = (parts[0][:2] or None)
|
||||
if home_country == "US" and len(parts) > 1:
|
||||
home_state = parts[1][:2] or None
|
||||
fp = prefs_from_json(prefs)
|
||||
now = datetime.now(timezone.utc)
|
||||
excl = {int(x) for x in exclude.split(",") if x.strip().lstrip("-").isdigit()}
|
||||
@@ -1822,28 +1833,76 @@ def create_app() -> FastAPI:
|
||||
).fetchall()
|
||||
kw["follow_sources"] = [int(r["value"]) for r in frows if r["kind"] == "source" and r["value"].isdigit()]
|
||||
kw["follow_tags"] = [r["value"] for r in frows if r["kind"] == "tag"]
|
||||
if fp.avoid_terms or excl:
|
||||
# Over-fetch enough to cover what the Python pass might remove.
|
||||
fetch_n = min(2000, (offset + limit) * 4 + 50 + len(excl))
|
||||
raw = queries.feed(
|
||||
conn, topic=topic, flavor=flavor, accepted_only=accepted_only,
|
||||
limit=fetch_n, offset=0, tag=tag, source_id=source_id, sort=sort, **kw,
|
||||
|
||||
def _fetch(scope, lim, off):
|
||||
# One scoped page, applying the avoid-terms/dismissal Python pass when needed.
|
||||
if fp.avoid_terms or excl:
|
||||
fetch_n = min(2000, (off + lim) * 4 + 50 + len(excl))
|
||||
raw = queries.feed(
|
||||
conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=fetch_n, offset=0,
|
||||
tag=tag, source_id=source_id, sort=sort,
|
||||
home_country=home_country, home_state=home_state, geo_scope=scope, **kw,
|
||||
)
|
||||
kept = [a for a in filter_articles(raw, fp, now) if a["id"] not in excl]
|
||||
return kept[off : off + lim]
|
||||
return queries.feed(
|
||||
conn, topic=topic, flavor=flavor, accepted_only=accepted_only, limit=lim, offset=off,
|
||||
tag=tag, source_id=source_id, sort=sort,
|
||||
home_country=home_country, home_state=home_state, geo_scope=scope, **kw,
|
||||
)
|
||||
kept = [a for a in filter_articles(raw, fp, now) if a["id"] not in excl]
|
||||
rows = kept[offset : offset + limit]
|
||||
|
||||
next_offset = None
|
||||
if home_country:
|
||||
# Closer to Home. Near you (+ Elsewhere in your country when a state is set)
|
||||
# is a ONE-TIME lead block on page 0; the world is the paginated body. Thin
|
||||
# tiers fold down so a header is never shown empty (Codex: lead, don't trap).
|
||||
NEAR_CAP, COUNTRY_CAP, MIN_TIER = 8, 8, 3
|
||||
if offset == 0:
|
||||
near = _fetch("near", NEAR_CAP, 0)
|
||||
country = _fetch("country", COUNTRY_CAP, 0) if home_state else []
|
||||
world = _fetch("world", limit, 0)
|
||||
next_offset = limit if len(world) == limit else None
|
||||
tiers = []
|
||||
if home_state:
|
||||
if len(near) >= MIN_TIER:
|
||||
tiers.append(("near", near))
|
||||
else:
|
||||
country = near + country # fold sparse "near" into your country
|
||||
if len(country) >= MIN_TIER:
|
||||
tiers.append(("country", country))
|
||||
else:
|
||||
world = country + world # fold sparse country into the world
|
||||
elif len(near) >= MIN_TIER:
|
||||
tiers.append(("near", near)) # near == your whole country here
|
||||
else:
|
||||
world = near + world
|
||||
tiers.append(("world", world))
|
||||
rows = []
|
||||
for key, group in tiers:
|
||||
for r in group:
|
||||
r["__section"] = key
|
||||
rows.append(r)
|
||||
else:
|
||||
rows = _fetch("world", limit, offset)
|
||||
for r in rows:
|
||||
r["__section"] = "world"
|
||||
next_offset = offset + limit if len(rows) == limit else None
|
||||
else:
|
||||
rows = queries.feed(
|
||||
conn, topic=topic, flavor=flavor, accepted_only=accepted_only,
|
||||
limit=limit, offset=offset, tag=tag, source_id=source_id, sort=sort, **kw,
|
||||
)
|
||||
# Keep the top of a browse view readable: stable-sort paywalled items
|
||||
# below readable ones (composite order preserved within each group).
|
||||
rows = sorted(rows, key=lambda r: is_paywalled_for_source(r["canonical_url"], r["paywall_override"]))
|
||||
rows = _fetch(None, limit, offset)
|
||||
next_offset = offset + len(rows) if len(rows) == limit else None
|
||||
# Paywalled below readable WITHIN each section (so tiers stay grouped); non-home
|
||||
# rows all share section rank 0, preserving the original global behavior.
|
||||
_SEC = {"near": 0, "country": 1, "world": 2}
|
||||
rows = sorted(rows, key=lambda r: (
|
||||
_SEC.get(r.get("__section"), 0),
|
||||
is_paywalled_for_source(r["canonical_url"], r["paywall_override"]),
|
||||
))
|
||||
return FeedResponse(
|
||||
topic=topic,
|
||||
flavor=flavor,
|
||||
count=len(rows),
|
||||
items=[Article.from_row(r) for r in rows],
|
||||
next_offset=next_offset,
|
||||
)
|
||||
|
||||
@app.get("/api/search", response_model=FeedResponse)
|
||||
|
||||
+8
-3
@@ -182,9 +182,14 @@ def feed(
|
||||
elif geo_scope == "country" and home_country:
|
||||
clauses.append("EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
|
||||
params.append(home_country)
|
||||
if home_state: # "elsewhere in your country" = your country, but not your state
|
||||
clauses.append("NOT EXISTS (SELECT 1 FROM article_places p2 WHERE p2.article_id = a.id AND p2.state_code = ?)")
|
||||
params.append(home_state)
|
||||
if home_state:
|
||||
# "elsewhere in your country" excludes ONLY what actually went to "near" (a
|
||||
# high/medium-confidence home-state match). A low-confidence home-state story
|
||||
# isn't near, so it must still surface here, not vanish between tiers.
|
||||
clauses.append(
|
||||
"NOT (g.confidence IN ('high','medium') AND EXISTS (SELECT 1 FROM article_places p2 "
|
||||
"WHERE p2.article_id = a.id AND p2.country_code = ? AND p2.state_code = ?))")
|
||||
params.extend([home_country, home_state])
|
||||
elif geo_scope == "world" and home_country:
|
||||
clauses.append("NOT EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
|
||||
params.append(home_country)
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
"""Closer to Home: /api/feed?home=... sections the feed into near / country / world,
|
||||
state-match only on high/medium confidence, sparse tiers fold down, Brief untouched."""
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from goodnews.db import connect, init_db
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app_db(tmp_path, monkeypatch):
|
||||
db = tmp_path / "t.sqlite3"
|
||||
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
||||
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "http://testserver")
|
||||
import importlib
|
||||
import goodnews.api as api
|
||||
importlib.reload(api)
|
||||
c = connect(str(db)); init_db(c)
|
||||
c.execute("INSERT INTO sources (id,name,feed_url,trust_score,content_visible) VALUES (1,'S','http://s/f',5,1)")
|
||||
|
||||
def art(aid, *, breadth, conf, country=None, state=None):
|
||||
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,published_at) "
|
||||
"VALUES (?,1,?,?,?,?)", (aid, f"https://x/{aid}", f"Story {aid}", f"h{aid}", "2026-06-18T08:00:00"))
|
||||
c.execute("INSERT INTO article_scores (article_id,accepted,novelty_score,constructive_score) "
|
||||
"VALUES (?,1,5,5)", (aid,))
|
||||
c.execute("INSERT INTO article_geo (article_id,breadth,confidence,geo_version) VALUES (?,?,?, 'geo-v1')",
|
||||
(aid, breadth, conf))
|
||||
if country:
|
||||
c.execute("INSERT INTO article_places (article_id,country_code,state_code,ord) VALUES (?,?,?,0)",
|
||||
(aid, country, state))
|
||||
|
||||
# 4 NY high-conf (near), 1 NY LOW-conf (must NOT be near), 3 US/CA (country), 4 world
|
||||
for i in range(1, 5):
|
||||
art(i, breadth="locality", conf="high", country="US", state="NY")
|
||||
art(5, breadth="locality", conf="low", country="US", state="NY")
|
||||
for i in range(6, 9):
|
||||
art(i, breadth="regional", conf="high", country="US", state="CA")
|
||||
for i in range(9, 13):
|
||||
art(i, breadth="global", conf="high") # placeless -> world
|
||||
c.commit(); c.close()
|
||||
return api.create_app()
|
||||
|
||||
|
||||
def _sections(items):
|
||||
return [it["section"] for it in items]
|
||||
|
||||
|
||||
def test_home_state_sections_near_country_world(app_db):
|
||||
r = TestClient(app_db).get("/api/feed?home=US-NY&limit=50").json()
|
||||
items = r["items"]
|
||||
secs = _sections(items)
|
||||
# order: all near, then all country, then all world (no interleaving)
|
||||
assert secs == sorted(secs, key=["near", "country", "world"].index)
|
||||
near = [it for it in items if it["section"] == "near"]
|
||||
assert len(near) == 4 and all(it["geo_confidence"] == "high" for it in near)
|
||||
# the LOW-confidence NY story is NOT "near" — state match needs high/medium
|
||||
a5 = next(it for it in items if it["id"] == 5)
|
||||
assert a5["section"] == "country"
|
||||
# world holds the placeless/global ones
|
||||
assert {it["id"] for it in items if it["section"] == "world"} == {9, 10, 11, 12}
|
||||
|
||||
|
||||
def test_sparse_near_folds_into_country(app_db):
|
||||
# home in a state with no high-conf local stories -> "near" is empty, no near header;
|
||||
# its (none) items fold away and we still get country + world, never an empty tier.
|
||||
r = TestClient(app_db).get("/api/feed?home=US-TX&limit=50").json()
|
||||
assert "near" not in _sections(r["items"]) # nothing local to TX -> no near section
|
||||
assert "country" in _sections(r["items"]) # US stories still surface as your country
|
||||
|
||||
|
||||
def test_country_only_home_has_no_country_tier(app_db):
|
||||
r = TestClient(app_db).get("/api/feed?home=US&limit=50").json()
|
||||
secs = set(_sections(r["items"]))
|
||||
assert "country" not in secs # no state -> near IS the whole country
|
||||
assert secs <= {"near", "world"}
|
||||
near_ids = {it["id"] for it in r["items"] if it["section"] == "near"}
|
||||
assert near_ids == {1, 2, 3, 4, 5, 6, 7, 8} # all US (incl. the low-conf one, country match)
|
||||
|
||||
|
||||
def test_no_home_is_unchanged_and_unsectioned(app_db):
|
||||
r = TestClient(app_db).get("/api/feed?limit=50").json()
|
||||
assert all(it["section"] is None for it in r["items"])
|
||||
assert r["next_offset"] is None or isinstance(r["next_offset"], int)
|
||||
Reference in New Issue
Block a user