Games sync hardening (Codex audit): server-side state normalization

Don't trust client JSON at the storage layer:
- sanitize_game_state() runs before merge AND on the merged result (heals legacy
  rows). Word Search: keep only finds whose cells actually spell a real word in
  that day's grid (validated when the puzzle exists, shape-only 4-12 alpha +
  cell-length otherwise), dedupe, renumber ci. Word: validate status enum, guess
  count/length/alpha, colour-row shape, terminal answer/why.
- Completion is now derived from the real puzzle word count (foundWords ==
  expected), not a client-sent `ms` — so stats can't be inflated by junk.
- Date validated as YYYY-MM-DD at the API (400 otherwise) — no junk/future rows.

Tests: sanitizer-rejects-junk + bad-date 400; existing tests updated to use
real-shaped data (the sanitizer is a good forcing function). 237 pytest + 11
vitest green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-12 13:51:24 -04:00
parent dd0df64d76
commit 065ab98598
3 changed files with 135 additions and 18 deletions
+40 -14
View File
@@ -48,12 +48,17 @@ def test_merge_handles_missing_sides():
# --- persistence convergence ---
def _find(word, row): # a shape-valid find: cells spelling the word along a row
return {"word": word, "cells": [[row, i] for i in range(len(word))], "ci": 0}
def test_save_converges_across_devices(conn):
# No stored puzzle for this date → shape-only sanitize (words 4-12, cells match).
games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
{"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "startTime": 100})
{"foundWords": [_find("BEACH", 0)], "startTime": 100})
merged = games.save_game_state(conn, 1, "wordsearch", "small", "2026-06-12",
{"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "startTime": 50})
assert {f["word"] for f in merged["foundWords"]} == {"CAT", "DOG"}
{"foundWords": [_find("OCEAN", 1)], "startTime": 50})
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"}
# stored state reflects the merge (order-independent)
assert games.load_game_state(conn, 1, "wordsearch", "small", "2026-06-12")["startTime"] == 50
@@ -61,9 +66,9 @@ def test_save_converges_across_devices(conn):
# --- derived stats ---
def test_word_stats_streak_and_distribution(conn):
games.save_game_state(conn, 1, "word", "5", "2026-06-12", {"status": "won", "guesses": ["a", "b", "c"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-11", {"status": "won", "guesses": ["a", "b", "c", "d"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-10", {"status": "lost", "guesses": ["a"] * 6})
games.save_game_state(conn, 1, "word", "5", "2026-06-12", {"status": "won", "guesses": ["aaaaa", "bbbbb", "ccccc"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-11", {"status": "won", "guesses": ["aaaaa", "bbbbb", "ccccc", "ddddd"]})
games.save_game_state(conn, 1, "word", "5", "2026-06-10", {"status": "lost", "guesses": ["aaaaa"] * 6})
st = games.game_stats(conn, 1, "word", "5")
assert st["played"] == 3 and st["won"] == 2
assert st["streak"] == 2 # two most-recent wins, then the loss stops it
@@ -71,8 +76,13 @@ def test_word_stats_streak_and_distribution(conn):
def test_wordsearch_stats_best_time(conn):
games.save_game_state(conn, 1, "wordsearch", "med", "2026-06-12", {"foundWords": [], "ms": 4000})
games.save_game_state(conn, 1, "wordsearch", "med", "2026-06-11", {"foundWords": [], "ms": 6000})
import json
# Store completed states directly — game_stats just reads what's persisted
# (the sanitizer that gates `ms` on real completion is covered separately).
for d, ms in (("2026-06-12", 4000), ("2026-06-11", 6000)):
conn.execute("INSERT INTO game_state (user_id, game, variant, puzzle_date, state_json) "
"VALUES (1,'wordsearch','med',?,?)", (d, json.dumps({"foundWords": [], "ms": ms})))
conn.commit()
st = games.game_stats(conn, 1, "wordsearch", "med")
assert st["completed"] == 2 and st["best"] == 4000
@@ -86,16 +96,32 @@ def test_game_state_api_roundtrip(tmp_path, monkeypatch):
# signed out → no sync, echoes the posted state and GET sees nothing stored
anon = TestClient(app)
body = {"game": "wordsearch", "variant": "small", "date": "2026-06-12",
"state": {"foundWords": [{"word": "CAT", "cells": [[0, 0]], "ci": 0}], "startTime": 9}}
assert anon.put("/api/games/state", json=body).json()["state"]["foundWords"][0]["word"] == "CAT"
"state": {"foundWords": [_find("BEACH", 0)], "startTime": 9}}
assert anon.put("/api/games/state", json=body).json()["state"]["foundWords"][0]["word"] == "BEACH"
assert anon.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"] is None
# signed in: push from "device A", then "device B" → server returns the union
tc = _signin(app, api, "p@x.com")
tc.put("/api/games/state", json=body)
bodyB = {**body, "state": {"foundWords": [{"word": "DOG", "cells": [[1, 1]], "ci": 1}], "startTime": 4}}
bodyB = {**body, "state": {"foundWords": [_find("OCEAN", 1)], "startTime": 4}}
merged = tc.put("/api/games/state", json=bodyB).json()["state"]
assert {f["word"] for f in merged["foundWords"]} == {"CAT", "DOG"} and merged["startTime"] == 4
# GET returns the stored merge; unknown game → 404
assert {f["word"] for f in merged["foundWords"]} == {"BEACH", "OCEAN"} and merged["startTime"] == 4
# GET returns the stored merge; unknown game → 404; bad date → 400
got = tc.get("/api/games/state?game=wordsearch&variant=small&date=2026-06-12").json()["state"]
assert {f["word"] for f in got["foundWords"]} == {"CAT", "DOG"}
assert {f["word"] for f in got["foundWords"]} == {"BEACH", "OCEAN"}
assert tc.get("/api/games/state?game=nope&variant=x&date=2026-06-12").status_code == 404
assert tc.get("/api/games/state?game=wordsearch&variant=small&date=notadate").status_code == 400
def test_sanitizers_reject_junk(conn):
# Word: bad status → playing; wrong-length/non-alpha guesses dropped; cols capped to kept guesses
w = games._sanitize_word("5", {"status": "hacked", "guesses": ["abcde", "no", "12345", "fghij"],
"cols": [["correct", "absent", "bogus", "x", "y"], ["absent"] * 5, ["x"], ["y"]]})
assert w["status"] == "playing" and w["guesses"] == ["abcde", "fghij"]
assert len(w["cols"]) == 2 and w["cols"][0] == ["correct", "absent"]
# Word Search (no stored puzzle → shape-only): bad shapes dropped, no completion without word count
ws = games._sanitize_wordsearch(conn, "small", "2026-06-12", {
"foundWords": [_find("BEACH", 0), # ok
{"word": "CAT", "cells": [[0, 0], [0, 1], [0, 2]]}, # too short (<4)
{"word": "OCEAN", "cells": [[1, 0], [1, 1]]}], # cells != len
"ms": 12345})
assert [f["word"] for f in ws["foundWords"]] == ["BEACH"] and ws["ms"] == 0