Reliability: slow ≠ failed — SW nav timeout, slow-boot telemetry, de-bot stats
Root cause of the intermittent white screen: the shell HTML is no-cache (cf-cache-status: DYNAMIC), so every page-open does a synchronous round-trip to the residential origin before any pixel renders — and the SW's network-first navigation only fell back to the cached shell on REJECTION, never on slowness. A stalled fetch meant staring at white with a perfectly good shell in cache. The boot seatbelt couldn't see it either: it lives inside the HTML that hadn't arrived yet, so slow boots left no telemetry. - service-worker: race navigation fetch vs 2.5s grace timer. Network wins → fresh HTML as before; timer/5xx/failure → cached shell instantly, network response still refreshes the cache in the background. Safe due to the 14-day immutable-chunk grace window. Caps the white screen at ~2.5s for repeat visitors on any network. - app.html: beacon `boot-slow: Nms (html Nms) on 4g` when mount takes >4s — the "white screen, then it loaded" glitches finally leave a trace, with HTML-arrival timing to separate slow-origin from slow-JS. - admin: bot UAs (HeadlessChrome/bot/spider/crawl/…) excluded from the headline "Load errors today" count — throttled crawlers trip the 10s boot check routinely (the one recorded error was HeadlessChrome on X11, not a phone). Bots stay visible in the list, tagged + dimmed. Tests: telemetry test extended for bot flag + filtered counts. 223 pytest + 11 vitest green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -61,6 +61,21 @@
|
||||
var el = document.getElementById('boot-fallback');
|
||||
if (el && el.parentNode) el.parentNode.removeChild(el);
|
||||
try { sessionStorage.removeItem('ub_reloaded'); } catch (e) {}
|
||||
// Slow-but-successful boots (the "white screen, then it loaded" glitch)
|
||||
// would otherwise leave no trace — beacon the timing so they're visible.
|
||||
// performance.now() counts from navigation start, so a slow-arriving
|
||||
// HTML document is included, not just slow JS.
|
||||
try {
|
||||
var ms = Math.round(performance.now());
|
||||
if (ms > 4000) {
|
||||
var nav = performance.getEntriesByType && performance.getEntriesByType('navigation')[0];
|
||||
var detail = 'boot-slow: ' + ms + 'ms';
|
||||
if (nav && nav.responseStart) detail += ' (html ' + Math.round(nav.responseStart) + 'ms)';
|
||||
if (navigator.connection && navigator.connection.effectiveType)
|
||||
detail += ' on ' + navigator.connection.effectiveType;
|
||||
report(detail);
|
||||
}
|
||||
} catch (e) { /* timing is best-effort */ }
|
||||
};
|
||||
addEventListener('vite:preloadError', function (e) {
|
||||
report('preloadError: ' + ((e && e.payload && e.payload.message) || ''));
|
||||
|
||||
@@ -458,9 +458,9 @@
|
||||
<h2>Recent load errors <span class="count">(last {clientErrors.length})</span></h2>
|
||||
<ul class="cerrs">
|
||||
{#each clientErrors as e (e.created_at + e.reason)}
|
||||
<li>
|
||||
<li class:bot={e.bot}>
|
||||
<span class="ce-when">{fdate(e.created_at)}</span>
|
||||
<span class="ce-reason">{e.reason || '—'}</span>
|
||||
<span class="ce-reason">{e.reason || '—'}{#if e.bot}<span class="ce-bot">bot</span>{/if}</span>
|
||||
<span class="ce-path">{e.path || '/'}</span>
|
||||
<span class="ce-ua">{e.user_agent}</span>
|
||||
</li>
|
||||
@@ -1198,6 +1198,11 @@
|
||||
font-size: 0.82rem; padding: 8px 12px; background: var(--surface); border: 1px solid var(--line); border-radius: 8px; }
|
||||
.ce-when { color: var(--muted); white-space: nowrap; }
|
||||
.ce-reason { font-family: var(--label); color: #9a3b3b; }
|
||||
.cerrs li.bot { opacity: 0.6; }
|
||||
.cerrs li.bot .ce-reason { color: var(--muted); }
|
||||
.ce-bot { display: inline-block; margin-left: 8px; padding: 1px 8px; border-radius: 999px;
|
||||
background: var(--accent-soft); color: var(--accent-deep);
|
||||
font-size: 0.68rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.ce-path { color: var(--accent-deep); white-space: nowrap; }
|
||||
.ce-ua { grid-column: 1 / -1; color: var(--muted); font-size: 0.72rem;
|
||||
overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
import { version } from '$service-worker';
|
||||
|
||||
const CACHE = `upbeat-${version}`;
|
||||
// How long a navigation may wait on the network before the cached shell is
|
||||
// served instead. Long enough for a healthy fetch, short enough that a stalled
|
||||
// cellular/origin hop never reads as a broken site.
|
||||
const NAV_TIMEOUT_MS = 2500;
|
||||
|
||||
// Paths the FastAPI server owns — the SW must NOT intercept or cache these.
|
||||
function isServerPath(p) {
|
||||
@@ -38,19 +42,32 @@ self.addEventListener('fetch', (event) => {
|
||||
if (url.origin !== location.origin) return;
|
||||
if (isServerPath(url.pathname)) return; // let the network/server handle these
|
||||
|
||||
// Navigations: network-first; keep the freshest real HTML shell as the offline
|
||||
// fallback; on a failed fetch, serve that cached shell (never blank).
|
||||
// Navigations: network-first, but a SLOW network must not mean a white screen —
|
||||
// "slow" and "failed" both fall back to the cached shell. We race the fetch
|
||||
// against a short grace timer: network wins → freshest HTML as usual; timer
|
||||
// wins (or 5xx/failure) → serve the cached shell instantly while the network
|
||||
// response still lands in the cache for next time. A slightly stale shell is
|
||||
// safe: deploys keep old immutable chunks for a 14-day grace window.
|
||||
if (request.mode === 'navigate') {
|
||||
event.respondWith(
|
||||
fetch(request)
|
||||
.then((res) => {
|
||||
if (res && res.ok && (res.headers.get('content-type') || '').includes('text/html')) {
|
||||
const copy = res.clone();
|
||||
caches.open(CACHE).then((c) => c.put('/', copy)).catch(() => {});
|
||||
}
|
||||
return res;
|
||||
})
|
||||
.catch(() => caches.match('/'))
|
||||
(async () => {
|
||||
const cache = await caches.open(CACHE);
|
||||
const cached = await cache.match('/');
|
||||
const network = fetch(request)
|
||||
.then((res) => {
|
||||
if (res && res.ok && (res.headers.get('content-type') || '').includes('text/html')) {
|
||||
cache.put('/', res.clone()).catch(() => {});
|
||||
}
|
||||
return res;
|
||||
})
|
||||
.catch(() => null);
|
||||
if (!cached) return (await network) || Response.error(); // first visit: network only
|
||||
const winner = await Promise.race([
|
||||
network,
|
||||
new Promise((resolve) => setTimeout(() => resolve('slow'), NAV_TIMEOUT_MS)),
|
||||
]);
|
||||
return winner && winner !== 'slow' && winner.ok ? winner : cached;
|
||||
})()
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
+3
-1
@@ -961,7 +961,9 @@ def create_app() -> FastAPI:
|
||||
rows = conn.execute(
|
||||
"SELECT reason, path, user_agent, created_at FROM client_errors ORDER BY id DESC LIMIT 20"
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
# Bots stay visible in the list (tagged) but are excluded from the
|
||||
# headline counts — see queries.admin_stats.
|
||||
return [{**dict(r), "bot": queries.is_bot_ua(r["user_agent"])} for r in rows]
|
||||
|
||||
@app.post("/api/feedback")
|
||||
def submit_feedback(body: FeedbackBody, request: Request, background_tasks: BackgroundTasks) -> dict:
|
||||
|
||||
+22
-3
@@ -13,6 +13,18 @@ from datetime import UTC, datetime, timedelta
|
||||
from .feeds import MAX_BACKOFF_MINUTES
|
||||
from .paywall import is_paywalled
|
||||
|
||||
# UA substrings that mark automated clients. Crawlers run JS on a throttled
|
||||
# budget and trip the boot-failure beacon routinely — without this filter they
|
||||
# read as real users seeing blank screens.
|
||||
BOT_UA_MARKS = ("headlesschrome", "bot", "spider", "crawl", "python", "curl", "wget", "phantomjs")
|
||||
_NOT_BOT_SQL = " AND ".join(f"instr(lower(user_agent), '{m}')=0" for m in BOT_UA_MARKS)
|
||||
|
||||
|
||||
def is_bot_ua(ua: str | None) -> bool:
|
||||
low = (ua or "").lower()
|
||||
return any(m in low for m in BOT_UA_MARKS)
|
||||
|
||||
|
||||
# Composite ranking used everywhere a "best first" order is needed. Kept as one
|
||||
# expression so brief, category feeds, and the API all rank identically.
|
||||
RANK_SCORE_SQL = (
|
||||
@@ -565,10 +577,17 @@ def admin_stats(conn: sqlite3.Connection, days: int = 30) -> dict:
|
||||
"top_topics": top_topics,
|
||||
"shares": shares,
|
||||
"daily": daily,
|
||||
# Boot-failure seatbelt signal — blank-screen risk surfacing.
|
||||
# Boot-failure seatbelt signal — blank-screen risk surfacing. Bots are
|
||||
# excluded from the headline counts: throttled crawlers fail the boot
|
||||
# check routinely and would read as real users seeing blank screens.
|
||||
"client_errors": {
|
||||
"today": scalar("SELECT COUNT(*) FROM client_errors WHERE date(created_at)=date('now')"),
|
||||
"window": scalar("SELECT COUNT(*) FROM client_errors WHERE created_at>=date('now',?)", (since,)),
|
||||
"today": scalar(
|
||||
f"SELECT COUNT(*) FROM client_errors WHERE date(created_at)=date('now') AND {_NOT_BOT_SQL}"
|
||||
),
|
||||
"window": scalar(
|
||||
f"SELECT COUNT(*) FROM client_errors WHERE created_at>=date('now',?) AND {_NOT_BOT_SQL}",
|
||||
(since,),
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -418,7 +418,16 @@ def test_client_error_telemetry(tmp_path, monkeypatch):
|
||||
rows = tc.get("/api/admin/client-errors").json()
|
||||
assert len(rows) == 1 and rows[0]["reason"] == "boot-timeout" and rows[0]["path"] == "/play"
|
||||
assert rows[0]["user_agent"] # captured from the request header
|
||||
assert rows[0]["bot"] is False
|
||||
assert tc.get("/api/admin/stats").json()["client_errors"]["today"] == 1
|
||||
# A throttled crawler tripping the beacon must NOT inflate the headline count,
|
||||
# but stays visible (tagged) in the list.
|
||||
anon.post("/api/client-error", json={"reason": "boot-timeout", "path": "/"},
|
||||
headers={"user-agent": "Mozilla/5.0 (X11; Linux x86_64) HeadlessChrome/138.0 Safari/537.36"})
|
||||
rows = tc.get("/api/admin/client-errors").json()
|
||||
assert len(rows) == 2 and rows[0]["bot"] is True
|
||||
stats = tc.get("/api/admin/stats").json()["client_errors"]
|
||||
assert stats["today"] == 1 and stats["window"] == 1 # bot excluded from both
|
||||
|
||||
|
||||
def test_wordsearch_theme_admin(tmp_path, monkeypatch):
|
||||
|
||||
Reference in New Issue
Block a user