Geo Stage 4 (data layer): geo on feed responses + home-scope query filters

Foundation for "Closer to Home" (server-side, Codex-approved). No behavior change
yet — geo_scope defaults None, so the default/edge-cached feed is identical.

- queries.feed now returns each article's geo (breadth, confidence, and ISO-coded
  places) via a LEFT JOIN + places subquery. Article.from_row parses geo_places
  into [{country, state}]. Brief query doesn't select geo, so the Brief stays bare.
- queries.feed gains home-scope filters (home_country/home_state/geo_scope =
  near|country|world): STATE match only counts on high/medium geo confidence;
  untagged articles fall to 'world' so nothing is lost during backfill.

Next: API composition (home param + near/country/world sectioning with soft/blended
headers + a next_offset pagination model) and the Home picker UI. 360 tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-19 19:30:43 -04:00
parent 1c05554a28
commit ad4e88c8f2
3 changed files with 151 additions and 1 deletions
+15
View File
@@ -322,11 +322,26 @@ class Article(BaseModel):
paywalled: bool = False
tags: list[str] = []
summary: str | None = None # our own cached summary (present on the brief)
# Subject geography (present on feed rows; absent/empty on the brief). breadth is
# locality|regional|national|multinational|global|unknown; places are ISO codes.
geo_breadth: str | None = None
geo_confidence: str | None = None
geo_places: list[dict] = [] # e.g. [{"country": "US", "state": "NY"}, {"country": "GB", "state": None}]
@classmethod
def from_row(cls, row: dict) -> "Article":
raw_tags = row.get("tags")
places = []
for tok in (row.get("geo_places") or "").split(","):
tok = tok.strip()
if not tok:
continue
cc, _, sc = tok.partition("-")
places.append({"country": cc, "state": sc or None})
return cls(
geo_breadth=row.get("geo_breadth"),
geo_confidence=row.get("geo_confidence"),
geo_places=places,
summary=row.get("summary"),
id=row["id"],
title=row["title"],
+33 -1
View File
@@ -80,6 +80,9 @@ def feed(
follow_tags: list[str] | None = None,
since: str | None = None,
match: str | None = None,
home_country: str | None = None,
home_state: str | None = None,
geo_scope: str | None = None, # 'near' | 'country' | 'world' relative to the reader's home
) -> list[dict]:
"""Return articles with categorical filters applied in SQL.
@@ -162,6 +165,30 @@ def feed(
params.extend(ftags)
clauses.append("(" + " OR ".join(ors) + ")" if ors else "0")
# Home-aware scoping for "Closer to Home" (server-side). Relative to the reader's
# chosen home; geo_scope=None leaves the feed exactly as it is today. A STATE match
# only counts when geo confidence is high/medium (don't surface "Near you" on a
# shaky location). Untagged articles have no places, so they land in 'world' — never
# lost while the backfill is still running.
if geo_scope == "near":
if home_state and home_country:
clauses.append(
"g.confidence IN ('high','medium') AND EXISTS (SELECT 1 FROM article_places p "
"WHERE p.article_id = a.id AND p.country_code = ? AND p.state_code = ?)")
params.extend([home_country, home_state])
elif home_country:
clauses.append("EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
params.append(home_country)
elif geo_scope == "country" and home_country:
clauses.append("EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
params.append(home_country)
if home_state: # "elsewhere in your country" = your country, but not your state
clauses.append("NOT EXISTS (SELECT 1 FROM article_places p2 WHERE p2.article_id = a.id AND p2.state_code = ?)")
params.append(home_state)
elif geo_scope == "world" and home_country:
clauses.append("NOT EXISTS (SELECT 1 FROM article_places p WHERE p.article_id = a.id AND p.country_code = ?)")
params.append(home_country)
where = "WHERE " + " AND ".join(clauses)
params.extend([limit, offset])
@@ -173,10 +200,15 @@ def feed(
order_by = "rank_score DESC, COALESCE(a.published_at, a.discovered_at) DESC"
rows = conn.execute(
f"""
SELECT {_ARTICLE_COLUMNS}
SELECT {_ARTICLE_COLUMNS},
g.breadth AS geo_breadth, g.confidence AS geo_confidence,
(SELECT group_concat(
p.country_code || CASE WHEN p.state_code IS NOT NULL THEN '-' || p.state_code ELSE '' END, ',')
FROM article_places p WHERE p.article_id = a.id) AS geo_places
FROM articles a
JOIN sources src ON src.id = a.source_id
JOIN article_scores s ON s.article_id = a.id
LEFT JOIN article_geo g ON g.article_id = a.id
{fts_join}
{where}
ORDER BY {order_by}