Games sync hardening (Codex audit): server-side state normalization

Don't trust client JSON at the storage layer:
- sanitize_game_state() runs before merge AND on the merged result (heals legacy
  rows). Word Search: keep only finds whose cells actually spell a real word in
  that day's grid (validated when the puzzle exists, shape-only 4-12 alpha +
  cell-length otherwise), dedupe, renumber ci. Word: validate status enum, guess
  count/length/alpha, colour-row shape, terminal answer/why.
- Completion is now derived from the real puzzle word count (foundWords ==
  expected), not a client-sent `ms` — so stats can't be inflated by junk.
- Date validated as YYYY-MM-DD at the API (400 otherwise) — no junk/future rows.

Tests: sanitizer-rejects-junk + bad-date 400; existing tests updated to use
real-shaped data (the sanitizer is a good forcing function). 237 pytest + 11
vitest green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-12 13:51:24 -04:00
parent dd0df64d76
commit 065ab98598
3 changed files with 135 additions and 18 deletions
+9 -2
View File
@@ -1579,27 +1579,34 @@ def create_app() -> FastAPI:
return (game == "word" and variant in games.WORD_VARIANTS) or \
(game == "wordsearch" and variant in games.WS_TIERS)
def _valid_pdate(d: str) -> bool:
return bool(re.match(r"^\d{4}-\d{2}-\d{2}$", d or "")) # plain YYYY-MM-DD, no junk rows
@app.get("/api/games/state")
def game_state_get(game: str, variant: str, date: str, request: Request) -> dict:
if not _game_ok(game, variant):
raise HTTPException(status_code=404, detail="no such game")
if not _valid_pdate(date):
raise HTTPException(status_code=400, detail="bad date")
with get_conn() as conn:
user = _current_user(conn, request)
if not user:
return {"state": None}
return {"state": games.load_game_state(conn, user["id"], game, variant, date[:10])}
return {"state": games.load_game_state(conn, user["id"], game, variant, date)}
@app.put("/api/games/state")
def game_state_put(body: GameStateBody, request: Request) -> dict:
if not _game_ok(body.game, body.variant):
raise HTTPException(status_code=404, detail="no such game")
if not _valid_pdate(body.date):
raise HTTPException(status_code=400, detail="bad date")
if len(json.dumps(body.state)) > 20000: # a real game state is tiny — reject junk
raise HTTPException(status_code=413, detail="state too large")
with get_conn() as conn:
user = _current_user(conn, request)
if not user:
return {"state": body.state} # signed out → no sync, just echo
merged = games.save_game_state(conn, user["id"], body.game, body.variant, body.date[:10], body.state or {})
merged = games.save_game_state(conn, user["id"], body.game, body.variant, body.date, body.state or {})
return {"state": merged}
@app.get("/api/games/stats")
+86 -2
View File
@@ -592,11 +592,95 @@ def load_game_state(conn: sqlite3.Connection, user_id: int, game: str, variant:
return None
def _int(x) -> int:
try:
return int(x)
except (TypeError, ValueError):
return 0
def _sanitize_wordsearch(conn: sqlite3.Connection, variant: str, date: str, state: dict) -> dict:
"""Trust only finds that are real for THIS puzzle: word in the day's list and
cells that actually spell it in the grid (validated when the puzzle exists,
shape-only otherwise). Dedupes, renumbers colours, and derives completion from
the real word count — never from a client-sent `ms` alone."""
words: list[str] = []
grid: list[str] = []
if variant in WS_TIERS and conn.execute(
"SELECT 1 FROM daily_puzzles WHERE puzzle_date=? AND game='wordsearch' AND variant=''", (date,)
).fetchone():
try:
p = wordsearch_response(conn, date, variant) # read-only; today's puzzle already exists
words, grid = list(p.get("words") or []), list(p.get("grid") or [])
except Exception: # noqa: BLE001
words, grid = [], []
wset = set(words)
clean: list[dict] = []
seen: set[str] = set()
for fw in (state.get("foundWords") or []):
if not isinstance(fw, dict):
continue
w, cells = fw.get("word"), fw.get("cells")
if not isinstance(w, str) or w in seen or not isinstance(cells, list):
continue
try:
cells = [[int(r), int(c)] for r, c in cells]
except (TypeError, ValueError):
continue
if len(cells) != len(w):
continue
if grid: # validate the find spells the word in the real grid
if w not in wset:
continue
spelled = "".join(grid[r][c] for r, c in cells if 0 <= r < len(grid) and 0 <= c < len(grid[r]))
if spelled != w:
continue
elif not (4 <= len(w) <= 12 and w.isalpha()): # no puzzle to check against → shape only
continue
seen.add(w)
clean.append({"word": w, "cells": cells, "ci": len(clean) % 10})
done = bool(words) and len(clean) == len(words)
return {"foundWords": clean, "startTime": _int(state.get("startTime")),
"ms": _int(state.get("ms")) if done else 0}
_WORD_COLOURS = {"absent", "present", "correct"}
def _sanitize_word(variant: str, state: dict) -> dict:
"""Validate shapes: status enum, guess count/length, colour rows, terminal fields."""
n = WORD_VARIANTS[variant]["length"]
maxg = WORD_VARIANTS[variant]["guesses"]
status = state.get("status") if state.get("status") in ("playing", "won", "lost") else "playing"
guesses = [g.lower() for g in (state.get("guesses") or [])[:maxg]
if isinstance(g, str) and len(g) == n and g.isalpha()]
cols = []
if isinstance(state.get("cols"), list):
for row in state["cols"][:len(guesses)]:
cols.append([c for c in row if c in _WORD_COLOURS][:n] if isinstance(row, list) else [])
out = {"guesses": guesses, "cols": cols, "status": status}
if status in ("won", "lost"):
ans = state.get("answer")
if isinstance(ans, str) and len(ans) == n and ans.isalpha():
out["answer"] = ans.lower()
if isinstance(state.get("why"), str):
out["why"] = state["why"][:600]
return out
def sanitize_game_state(conn: sqlite3.Connection, game: str, variant: str, date: str, state: dict) -> dict:
"""Never trust client JSON at the storage layer — normalize before merge/store."""
if game == "wordsearch":
return _sanitize_wordsearch(conn, variant, date, state or {})
return _sanitize_word(variant, state or {})
def save_game_state(conn: sqlite3.Connection, user_id: int, game: str, variant: str,
date: str, incoming: dict) -> dict:
"""Merge incoming with the stored state (server-authoritative) and persist."""
"""Sanitize → merge with the stored state (server-authoritative) → sanitize → persist."""
clean_in = sanitize_game_state(conn, game, variant, date, incoming or {})
stored = load_game_state(conn, user_id, game, variant, date)
merged = merge_game_state(game, stored, incoming or {})
merged = sanitize_game_state(conn, game, variant, date, merge_game_state(game, stored, clean_in))
conn.execute(
"INSERT INTO game_state (user_id, game, variant, puzzle_date, state_json, updated_at) "
"VALUES (?,?,?,?,?,CURRENT_TIMESTAMP) "