Observability + warming guardrails (Codex)

* client_error details, not just a count: new client_errors table + POST
  /api/client-error (reason/path/user-agent/time) + GET /api/admin/client-errors.
  The boot-seatbelt beacon now sends the reason + path (once per page); the admin
  Overview lists the recent errors so we can tell chunk vs SW vs API vs JS — the
  truth meter for the next day as the new SW propagates.
* Deploy warming now also hits the shell, routes (/play /account /admin), SW,
  version.json, word lists, and icons/logo/font — not just immutable chunks.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-11 12:31:32 -04:00
parent 370d62270b
commit 61f575ba6d
7 changed files with 105 additions and 13 deletions
+10 -2
View File
@@ -23,5 +23,13 @@ find "$site/_app/immutable" -type f -mtime +14 -delete 2>/dev/null || true
# domain so the FIRST real visitor after a deploy gets cache HITs instead of slow
# cold fetches from the (residential) origin — the post-deploy blank/slow-load cause.
echo " warming edge cache…"
find "$site/_app/immutable" -type f \( -name '*.js' -o -name '*.css' \) -printf '/_app/immutable/%P\n' \
| xargs -P 8 -I{} curl -fsS -o /dev/null --max-time 20 "https://upbeatbytes.com{}" 2>/dev/null || true
base="https://upbeatbytes.com"
{
# every immutable chunk/asset (a superset of what index.html boots from)
find "$site/_app/immutable" -type f \( -name '*.js' -o -name '*.css' \) -printf '/_app/immutable/%P\n'
# shell + key routes + SW + version + static assets (primes CF↔origin even where
# no-cache; caches the cacheable ones)
printf '%s\n' / /play /account /admin /service-worker.js /_app/version.json \
/manifest.webmanifest /words-5.json /words-6.json /logo.svg /favicon.svg \
/icon-192.png /icon-512.png /fonts/inter-latin-wght-normal.woff2
} | xargs -P 8 -I{} curl -fsS -o /dev/null --max-time 20 "$base{}" 2>/dev/null || true
+19 -9
View File
@@ -38,17 +38,22 @@
// screen. Show a calm recovery card if the app hasn't mounted, and reload
// once on a chunk/preload failure (e.g. a just-deployed hashed chunk).
(function () {
function showBoot() {
var sent = false;
function report(reason) {
if (sent) return; sent = true; // one beacon per page
try {
var b = new Blob([JSON.stringify({ reason: String(reason || 'unknown').slice(0, 300),
path: location.pathname })], { type: 'application/json' });
navigator.sendBeacon && navigator.sendBeacon('/api/client-error', b);
} catch (e) { /* best-effort telemetry */ }
}
function showBoot(reason) {
if (window.__ubMounted) return; // app is running; in-app handles it
var el = document.getElementById('boot-fallback');
if (el) el.style.display = 'flex';
try {
var b = new Blob([JSON.stringify({ kind: 'client_error', visitor: 'e' + Math.random().toString(36).slice(2) })],
{ type: 'application/json' });
navigator.sendBeacon && navigator.sendBeacon('/api/events', b);
} catch (e) { /* best-effort telemetry */ }
report(reason);
}
var timer = setTimeout(showBoot, 10000);
var timer = setTimeout(function () { showBoot('boot-timeout'); }, 10000);
// Svelte calls this once it has mounted (see +layout.svelte).
window.__ubBooted = function () {
window.__ubMounted = true;
@@ -58,6 +63,7 @@
try { sessionStorage.removeItem('ub_reloaded'); } catch (e) {}
};
addEventListener('vite:preloadError', function (e) {
report('preloadError: ' + ((e && e.payload && e.payload.message) || ''));
try {
if (!sessionStorage.getItem('ub_reloaded')) {
sessionStorage.setItem('ub_reloaded', '1');
@@ -66,8 +72,12 @@
}
} catch (err) { /* ignore */ }
});
addEventListener('error', showBoot);
addEventListener('unhandledrejection', showBoot);
addEventListener('error', function (e) {
showBoot(e && (e.message || (e.error && e.error.message)) || 'error');
});
addEventListener('unhandledrejection', function (e) {
showBoot(e && e.reason && (e.reason.message || String(e.reason)) || 'rejection');
});
})();
</script>
</head>
+25
View File
@@ -32,11 +32,14 @@
feedback = await getJSON('/api/admin/feedback');
candidates = await getJSON('/api/admin/candidates');
wpPool = await getJSON('/api/admin/word/pool');
clientErrors = await getJSON('/api/admin/client-errors');
} catch {
error = "Couldn't load stats.";
}
});
let clientErrors = $state([]);
// --- Games: Daily Word pool ---
let wpWord = $state('');
let wpResult = $state(null); // lookup result for the current input
@@ -369,6 +372,20 @@
{/if}
</div>
{#if clientErrors.length}
<h2>Recent load errors <span class="count">(last {clientErrors.length})</span></h2>
<ul class="cerrs">
{#each clientErrors as e (e.created_at + e.reason)}
<li>
<span class="ce-when">{fdate(e.created_at)}</span>
<span class="ce-reason">{e.reason || '—'}</span>
<span class="ce-path">{e.path || '/'}</span>
<span class="ce-ua">{e.user_agent}</span>
</li>
{/each}
</ul>
{/if}
{:else if section === 'content'}
<h2>Corpus</h2>
<div class="cards">
@@ -1009,6 +1026,14 @@
.stat.alert { background: #f3e0e0; }
.stat.alert .n { color: #9a3b3b; }
.cerrs { list-style: none; padding: 0; margin: 10px 0 0; display: flex; flex-direction: column; gap: 6px; }
.cerrs li { display: grid; grid-template-columns: auto 1fr auto; gap: 6px 12px; align-items: baseline;
font-size: 0.82rem; padding: 8px 12px; background: var(--surface); border: 1px solid var(--line); border-radius: 8px; }
.ce-when { color: var(--muted); white-space: nowrap; }
.ce-reason { font-family: var(--label); color: #9a3b3b; }
.ce-path { color: var(--accent-deep); white-space: nowrap; }
.ce-ua { grid-column: 1 / -1; color: var(--muted); font-size: 0.72rem;
overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
/* Games — Daily Word pool */
.wp-lookup { display: flex; align-items: center; gap: 12px; flex-wrap: wrap; margin: 14px 0 6px; }
+28
View File
@@ -344,6 +344,12 @@ class WordPoolBody(BaseModel):
word: str
class ClientErrorBody(BaseModel):
reason: str = ""
path: str = ""
version: str = ""
class EmailStartRequest(BaseModel):
email: str
@@ -919,6 +925,28 @@ def create_app() -> FastAPI:
conn.commit()
return {"ok": True} # always identical; dedup'd by the unique key
@app.post("/api/client-error")
def record_client_error(body: ClientErrorBody, request: Request) -> dict:
# Boot-failure seatbelt telemetry — what blank-risk looks like in the wild.
ua = (request.headers.get("user-agent") or "")[:300]
with get_conn() as conn:
conn.execute(
"INSERT INTO client_errors (reason, path, user_agent, app_version) VALUES (?, ?, ?, ?)",
((body.reason or "")[:300], (body.path or "")[:200], ua, (body.version or "")[:60]),
)
conn.execute("DELETE FROM client_errors WHERE created_at < datetime('now','-14 days')")
conn.commit()
return {"ok": True}
@app.get("/api/admin/client-errors")
def admin_client_errors(request: Request) -> list[dict]:
with get_conn() as conn:
_require_admin(conn, request)
rows = conn.execute(
"SELECT reason, path, user_agent, created_at FROM client_errors ORDER BY id DESC LIMIT 20"
).fetchall()
return [dict(r) for r in rows]
@app.post("/api/feedback")
def submit_feedback(body: FeedbackBody, request: Request, background_tasks: BackgroundTasks) -> dict:
if body.hp: # honeypot tripped → accept silently, store nothing
+9
View File
@@ -260,6 +260,15 @@ CREATE TABLE IF NOT EXISTS feedback_replies (
);
CREATE INDEX IF NOT EXISTS idx_feedback_replies_fid ON feedback_replies(feedback_id);
CREATE TABLE IF NOT EXISTS client_errors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
reason TEXT NOT NULL DEFAULT '',
path TEXT NOT NULL DEFAULT '',
user_agent TEXT NOT NULL DEFAULT '',
app_version TEXT NOT NULL DEFAULT '',
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS word_pool (
word TEXT NOT NULL,
variant TEXT NOT NULL, -- '5' | '6'
+2 -2
View File
@@ -567,8 +567,8 @@ def admin_stats(conn: sqlite3.Connection, days: int = 30) -> dict:
"daily": daily,
# Boot-failure seatbelt signal — blank-screen risk surfacing.
"client_errors": {
"today": scalar("SELECT COUNT(*) FROM events WHERE kind='client_error' AND day=date('now')"),
"window": kc.get("client_error", 0),
"today": scalar("SELECT COUNT(*) FROM client_errors WHERE date(created_at)=date('now')"),
"window": scalar("SELECT COUNT(*) FROM client_errors WHERE created_at>=date('now',?)", (since,)),
},
}
+12
View File
@@ -390,3 +390,15 @@ def test_word_pool_admin(tmp_path, monkeypatch):
# remove the admin-added word
tc.delete("/api/admin/word/pool/plumb")
assert "plumb" not in tc.get("/api/admin/word/pool").json()["5"]["added"]
def test_client_error_telemetry(tmp_path, monkeypatch):
app, api = _make(tmp_path, monkeypatch, admin_email="boss@x.com")
anon = TestClient(app)
assert anon.post("/api/client-error", json={"reason": "boot-timeout", "path": "/play"}).json()["ok"] is True
assert anon.get("/api/admin/client-errors").status_code == 401 # gated
tc = _signin(app, api, "boss@x.com")
rows = tc.get("/api/admin/client-errors").json()
assert len(rows) == 1 and rows[0]["reason"] == "boot-timeout" and rows[0]["path"] == "/play"
assert rows[0]["user_agent"] # captured from the request header
assert tc.get("/api/admin/stats").json()["client_errors"]["today"] == 1