analytics: honest engagement metric — Engaged readers vs Recorded visits (Codex)

Admin now shows two numbers:
- Recorded visits: the existing raw count (one daily 'visit' beacon; still includes
  UA-spoofing bots that slip past the UA filter).
- Engaged readers: distinct visitor-day with DELIBERATE activity — either the new
  gesture-gated 'engaged' beacon (fires once/day only after ~8s visible AND a real
  scroll/pointer/key/touch) or a deliberate action (source_click, full_story, share,
  replace_used, paywall_replace, not_today/less_like_this/hide_topic, game start/
  complete/share). Explicitly EXCLUDES auto-fired visit/summary_viewed/open, replace_none,
  and game *_arrival (a share-loop landing, not engagement).

armEngaged() in analytics.js (wired in the global layout) + a mirrored vanilla-JS beacon
on the server-rendered /a/<id> share pages. 'engaged' added to the event allowlist and
fired with article_id=0 so the uniqueness constraint dedups it per day. queries.admin_stats
gains engaged_today/d7/d30. Bots are doubly excluded (UA filter at the beacon + the
gesture gate). Tests cover the metric (engaged + deliberate counted; visit/summary/arrival
not). 447 backend + 36 frontend tests pass.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-30 14:07:24 -04:00
parent 8a7606e20d
commit f416e13700
7 changed files with 103 additions and 5 deletions
+1
View File
@@ -644,6 +644,7 @@ _EVENT_KINDS = {
"share_ub", "copy_source", "native_share",
"not_today", "less_like_this", "hide_topic",
"replace_used", "replace_none", "paywall_replace", "paywalled_source_open",
"engaged", # genuine engagement: ~8s visible + a real gesture (vs. a raw visit)
"client_error", # boot-failure seatbelt beacon (blank-screen risk signal)
} | _GAME_EVENT_KINDS
+25
View File
@@ -20,6 +20,19 @@ from .paywall import is_paywalled, is_paywalled_for_source
BOT_UA_MARKS = ("headlesschrome", "bot", "spider", "crawl", "python", "curl", "wget", "phantomjs")
_NOT_BOT_SQL = " AND ".join(f"instr(lower(user_agent), '{m}')=0" for m in BOT_UA_MARKS)
# "Engaged reader" = a distinct visitor-day with DELIBERATE activity, as opposed to a raw
# visit (which a JS-capable bot can trip). Counts the gesture-gated 'engaged' beacon OR a
# genuine deliberate action. Deliberately EXCLUDES auto-fired/passive kinds (visit,
# summary_viewed, open), replace_none, and game *_arrival (a share-loop landing, not engagement).
_ENGAGED_GAMES = ("word", "wordsearch", "bloom", "match")
ENGAGED_EVENT_KINDS = (
"engaged", "full_story", "source_click",
"share_ub", "copy_source", "native_share",
"replace_used", "paywall_replace", "paywalled_source_open",
"not_today", "less_like_this", "hide_topic",
*(f"{g}_{e}" for g in _ENGAGED_GAMES for e in ("started", "completed", "shared")),
)
def is_bot_ua(ua: str | None) -> bool:
low = (ua or "").lower()
@@ -746,13 +759,25 @@ def admin_stats(conn: sqlite3.Connection, days: int = 30) -> dict:
def scalar(sql, params=()):
return conn.execute(sql, params).fetchone()[0] or 0
eng_ph = ",".join("?" * len(ENGAGED_EVENT_KINDS))
visitors = {
# Recorded visits — the raw/noisy count (one daily 'visit' beacon per device).
"today": scalar("SELECT COUNT(DISTINCT visitor_hash) FROM events "
"WHERE kind='visit' AND visitor_hash!='' AND day=date('now')"),
"d7": scalar("SELECT COUNT(DISTINCT visitor_hash) FROM events "
"WHERE kind='visit' AND visitor_hash!='' AND day>=date('now','-7 days')"),
"d30": scalar("SELECT COUNT(DISTINCT visitor_hash) FROM events "
"WHERE kind='visit' AND visitor_hash!='' AND day>=date('now',?)", (since,)),
# Engaged readers — distinct visitor-day with deliberate activity (the honest number).
"engaged_today": scalar(f"SELECT COUNT(DISTINCT visitor_hash) FROM events "
f"WHERE kind IN ({eng_ph}) AND visitor_hash!='' AND day=date('now')",
ENGAGED_EVENT_KINDS),
"engaged_d7": scalar(f"SELECT COUNT(DISTINCT visitor_hash) FROM events "
f"WHERE kind IN ({eng_ph}) AND visitor_hash!='' AND day>=date('now','-7 days')",
ENGAGED_EVENT_KINDS),
"engaged_d30": scalar(f"SELECT COUNT(DISTINCT visitor_hash) FROM events "
f"WHERE kind IN ({eng_ph}) AND visitor_hash!='' AND day>=date('now',?)",
(*ENGAGED_EVENT_KINDS, since)),
}
# Returning (seen on ≥2 distinct days) vs one-and-done, over the window.
+8
View File
@@ -344,6 +344,14 @@ def render_share_page(article: dict, base_url: str, summary: str | None = None,
// visit isn't recorded for a /a/ landing — count it here, once per day per device.
var t=new Date().toISOString().slice(0,10);
if(localStorage.getItem('goodnews:visitday')!==t){{localStorage.setItem('goodnews:visitday',t);beacon({{kind:'visit',article_id:0,visitor:v}});}}
// Engaged-reader signal (mirrors the SPA's armEngaged): ~8s visible + a real gesture, once/day.
var eng=false,gest=false,secs=0;
function fireEng(){{
if(eng||!gest||secs<8) return; eng=true;
try{{ if(localStorage.getItem('goodnews:engagedday')!==t){{localStorage.setItem('goodnews:engagedday',t);beacon({{kind:'engaged',article_id:0,visitor:v}});}} }}catch(e){{}}
}}
var iv=setInterval(function(){{ if(document.visibilityState==='visible'){{secs++;fireEng();}} if(eng) clearInterval(iv); }},1000);
['scroll','pointerdown','keydown','touchstart'].forEach(function(e){{window.addEventListener(e,function(){{gest=true;fireEng();}},{{passive:true}});}});
}}catch(e){{}}
}})();
</script>