Games sync hardening (Codex audit): server-side state normalization

Don't trust client JSON at the storage layer:
- sanitize_game_state() runs before merge AND on the merged result (heals legacy
  rows). Word Search: keep only finds whose cells actually spell a real word in
  that day's grid (validated when the puzzle exists, shape-only 4-12 alpha +
  cell-length otherwise), dedupe, renumber ci. Word: validate status enum, guess
  count/length/alpha, colour-row shape, terminal answer/why.
- Completion is now derived from the real puzzle word count (foundWords ==
  expected), not a client-sent `ms` — so stats can't be inflated by junk.
- Date validated as YYYY-MM-DD at the API (400 otherwise) — no junk/future rows.

Tests: sanitizer-rejects-junk + bad-date 400; existing tests updated to use
real-shaped data (the sanitizer is a good forcing function). 237 pytest + 11
vitest green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-12 13:51:24 -04:00
parent dd0df64d76
commit 065ab98598
3 changed files with 135 additions and 18 deletions
+9 -2
View File
@@ -1579,27 +1579,34 @@ def create_app() -> FastAPI:
return (game == "word" and variant in games.WORD_VARIANTS) or \
(game == "wordsearch" and variant in games.WS_TIERS)
def _valid_pdate(d: str) -> bool:
return bool(re.match(r"^\d{4}-\d{2}-\d{2}$", d or "")) # plain YYYY-MM-DD, no junk rows
@app.get("/api/games/state")
def game_state_get(game: str, variant: str, date: str, request: Request) -> dict:
if not _game_ok(game, variant):
raise HTTPException(status_code=404, detail="no such game")
if not _valid_pdate(date):
raise HTTPException(status_code=400, detail="bad date")
with get_conn() as conn:
user = _current_user(conn, request)
if not user:
return {"state": None}
return {"state": games.load_game_state(conn, user["id"], game, variant, date[:10])}
return {"state": games.load_game_state(conn, user["id"], game, variant, date)}
@app.put("/api/games/state")
def game_state_put(body: GameStateBody, request: Request) -> dict:
if not _game_ok(body.game, body.variant):
raise HTTPException(status_code=404, detail="no such game")
if not _valid_pdate(body.date):
raise HTTPException(status_code=400, detail="bad date")
if len(json.dumps(body.state)) > 20000: # a real game state is tiny — reject junk
raise HTTPException(status_code=413, detail="state too large")
with get_conn() as conn:
user = _current_user(conn, request)
if not user:
return {"state": body.state} # signed out → no sync, just echo
merged = games.save_game_state(conn, user["id"], body.game, body.variant, body.date[:10], body.state or {})
merged = games.save_game_state(conn, user["id"], body.game, body.variant, body.date, body.state or {})
return {"state": merged}
@app.get("/api/games/stats")
+86 -2
View File
@@ -592,11 +592,95 @@ def load_game_state(conn: sqlite3.Connection, user_id: int, game: str, variant:
return None
def _int(x) -> int:
try:
return int(x)
except (TypeError, ValueError):
return 0
def _sanitize_wordsearch(conn: sqlite3.Connection, variant: str, date: str, state: dict) -> dict:
"""Trust only finds that are real for THIS puzzle: word in the day's list and
cells that actually spell it in the grid (validated when the puzzle exists,
shape-only otherwise). Dedupes, renumbers colours, and derives completion from
the real word count — never from a client-sent `ms` alone."""
words: list[str] = []
grid: list[str] = []
if variant in WS_TIERS and conn.execute(
"SELECT 1 FROM daily_puzzles WHERE puzzle_date=? AND game='wordsearch' AND variant=''", (date,)
).fetchone():
try:
p = wordsearch_response(conn, date, variant) # read-only; today's puzzle already exists
words, grid = list(p.get("words") or []), list(p.get("grid") or [])
except Exception: # noqa: BLE001
words, grid = [], []
wset = set(words)
clean: list[dict] = []
seen: set[str] = set()
for fw in (state.get("foundWords") or []):
if not isinstance(fw, dict):
continue
w, cells = fw.get("word"), fw.get("cells")
if not isinstance(w, str) or w in seen or not isinstance(cells, list):
continue
try:
cells = [[int(r), int(c)] for r, c in cells]
except (TypeError, ValueError):
continue
if len(cells) != len(w):
continue
if grid: # validate the find spells the word in the real grid
if w not in wset:
continue
spelled = "".join(grid[r][c] for r, c in cells if 0 <= r < len(grid) and 0 <= c < len(grid[r]))
if spelled != w:
continue
elif not (4 <= len(w) <= 12 and w.isalpha()): # no puzzle to check against → shape only
continue
seen.add(w)
clean.append({"word": w, "cells": cells, "ci": len(clean) % 10})
done = bool(words) and len(clean) == len(words)
return {"foundWords": clean, "startTime": _int(state.get("startTime")),
"ms": _int(state.get("ms")) if done else 0}
_WORD_COLOURS = {"absent", "present", "correct"}
def _sanitize_word(variant: str, state: dict) -> dict:
"""Validate shapes: status enum, guess count/length, colour rows, terminal fields."""
n = WORD_VARIANTS[variant]["length"]
maxg = WORD_VARIANTS[variant]["guesses"]
status = state.get("status") if state.get("status") in ("playing", "won", "lost") else "playing"
guesses = [g.lower() for g in (state.get("guesses") or [])[:maxg]
if isinstance(g, str) and len(g) == n and g.isalpha()]
cols = []
if isinstance(state.get("cols"), list):
for row in state["cols"][:len(guesses)]:
cols.append([c for c in row if c in _WORD_COLOURS][:n] if isinstance(row, list) else [])
out = {"guesses": guesses, "cols": cols, "status": status}
if status in ("won", "lost"):
ans = state.get("answer")
if isinstance(ans, str) and len(ans) == n and ans.isalpha():
out["answer"] = ans.lower()
if isinstance(state.get("why"), str):
out["why"] = state["why"][:600]
return out
def sanitize_game_state(conn: sqlite3.Connection, game: str, variant: str, date: str, state: dict) -> dict:
"""Never trust client JSON at the storage layer — normalize before merge/store."""
if game == "wordsearch":
return _sanitize_wordsearch(conn, variant, date, state or {})
return _sanitize_word(variant, state or {})
def save_game_state(conn: sqlite3.Connection, user_id: int, game: str, variant: str,
date: str, incoming: dict) -> dict:
"""Merge incoming with the stored state (server-authoritative) and persist."""
"""Sanitize → merge with the stored state (server-authoritative) → sanitize → persist."""
clean_in = sanitize_game_state(conn, game, variant, date, incoming or {})
stored = load_game_state(conn, user_id, game, variant, date)
merged = merge_game_state(game, stored, incoming or {})
merged = sanitize_game_state(conn, game, variant, date, merge_game_state(game, stored, clean_in))
conn.execute(
"INSERT INTO game_state (user_id, game, variant, puzzle_date, state_json, updated_at) "
"VALUES (?,?,?,?,?,CURRENT_TIMESTAMP) "
+40 -14
View File
@@ -48,12 +48,17 @@ def test_merge_handles_missing_sides():
# --- persistence convergence ---
def _find(word, row): # a shape-valid find: cells spelling the word along a row
return {"word": word, "cells": [[row, i] for i in range(len(word))], "ci": 0}
def test_save_converges_across_devices(conn):
# No stored puzzle for this date → shape-only sanitize (words 4-12, cells match).
games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
{"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "startTime": 100})
{"foundWords": [_find("BEACH", 0)], "startTime": 100})
merged = games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
{"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "startTime": 50})
assert {f["word"] for f in merged["foundWords"]} == {"CAT", "DOG"}
{"foundWords": [_find("OCEAN", 1)], "startTime": 50})
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"}
# stored state reflects the merge (order-independent)
assert games.load_game_state(conn, 1, "wordsearch", "small", "2026-06-12")["startTime"] == 50
@@ -61,9 +66,9 @@ def test_save_converges_across_devices(conn):
# --- derived stats ---
def test_word_stats_streak_and_distribution(conn):
games.save_game_state(conn, 1, "word", "5", "2026-06-12", {"status": "won", "guesses": ["a", "b", "c"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-11", {"status": "won", "guesses": ["a", "b", "c", "d"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-10", {"status": "lost", "guesses": ["a"] * 6})
games.save_game_state(conn, 1, "word", "5", "2026-06-12", {"status": "won", "guesses": ["aaaaa", "bbbbb", "ccccc"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-11", {"status": "won", "guesses": ["aaaaa", "bbbbb", "ccccc", "ddddd"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-10", {"status": "lost", "guesses": ["aaaaa"] * 6})
st = games.game_stats(conn, 1, "word", "5")
assert st["played"] == 3 and st["won"] == 2
assert st["streak"] == 2 # two most-recent wins, then the loss stops it
@@ -71,8 +76,13 @@ def test_word_stats_streak_and_distribution(conn):
def test_wordsearch_stats_best_time(conn):
games.save_game_state(conn, 1, "wordsearch", "med", "2026-06-12", {"foundWords": [], "ms": 4000})
games.save_game_state(conn, 1, "wordsearch", "med", "2026-06-11", {"foundWords": [], "ms": 6000})
import json
# Store completed states directly — game_stats just reads what's persisted
# (the sanitizer that gates `ms` on real completion is covered separately).
for d, ms in (("2026-06-12", 4000), ("2026-06-11", 6000)):
conn.execute("INSERT INTO game_state (user_id, game, variant, puzzle_date, state_json) "
"VALUES (1,'wordsearch','med',?,?)", (d, json.dumps({"foundWords": [], "ms": ms})))
conn.commit()
st = games.game_stats(conn, 1, "wordsearch", "med")
assert st["completed"] == 2 and st["best"] == 4000
@@ -86,16 +96,32 @@ def test_game_state_api_roundtrip(tmp_path, monkeypatch):
# signed out → no sync, echoes the posted state and GET sees nothing stored
anon = TestClient(app)
body = {"game": "wordsearch", "variant": "small", "date": "2026-06-12",
"state": {"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "startTime": 9}}
assert anon.put("/api/games/state", json=body).json()["state"]["foundWords"][0]["word"] == "CAT"
"state": {"foundWords": [_find("BEACH", 0)], "startTime": 9}}
assert anon.put("/api/games/state", json=body).json()["state"]["foundWords"][0]["word"] == "BEACH"
assert anon.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"] is None
# signed in: push from "device A", then "device B" → server returns the union
tc = _signin(app, api, "p@x.com")
tc.put("/api/games/state", json=body)
bodyB = {**body, "state": {"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "startTime": 4}}
bodyB = {**body, "state": {"foundWords": [_find("OCEAN", 1)], "startTime": 4}}
merged = tc.put("/api/games/state", json=bodyB).json()["state"]
assert {f["word"] for f in merged["foundWords"]} == {"CAT", "DOG"} and merged["startTime"] == 4
# GET returns the stored merge; unknown game → 404
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"} and merged["startTime"] == 4
# GET returns the stored merge; unknown game → 404; bad date → 400
got = tc.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"]
assert {f["word"] for f in got["foundWords"]} == {"CAT", "DOG"}
assert {f["word"] for f in got["foundWords"]} == {"BEACH", "OCEAN"}
assert tc.get("/api/games/state?game=nope&variant=x&date=2026-06-12").status_code == 404
assert tc.get("/api/games/state?game=wordsearch&variant=small&date=notadate").status_code == 400
def test_sanitizers_reject_junk(conn):
# Word: bad status → playing; wrong-length/non-alpha guesses dropped; cols capped to kept guesses
w = games._sanitize_word("5", {"status": "hacked", "guesses": ["abcde", "no", "12345", "fghij"],
"cols": [["correct", "absent", "bogus", "x", "y"], ["absent"] * 5, ["x"], ["y"]]})
assert w["status"] == "playing" and w["guesses"] == ["abcde", "fghij"]
assert len(w["cols"]) == 2 and w["cols"][0] == ["correct", "absent"]
# Word Search (no stored puzzle → shape-only): bad shapes dropped, no completion without word count
ws = games._sanitize_wordsearch(conn, "small", "2026-06-12", {
"foundWords": [_find("BEACH", 0), # ok
{"word": "CAT", "cells": [[0, 0], [0, 1], [0, 2]]}, # too short (<4)
{"word": "OCEAN", "cells": [[1, 0], [1, 1]]}], # cells != len
"ms": 12345})
assert [f["word"] for f in ws["foundWords"]] == ["BEACH"] and ws["ms"] == 0