fix: QOTD/WOTD freshness — pick within the freshest cohort, not the rotated pool

Both selectors ordered candidates least-recently-shown, then daily.seeded_order() ROTATED the whole list and took [0] — an arbitrary date-hashed item, undoing the ordering. Result: repeats (quote id 2 on 6/28+6/29; word "harmony" on 6/25+6/28), no guarantee a pool item is shown before it recurs. Fix: daily.freshest(rows) returns the freshest cohort only — every NEVER-shown item while any remain, else the oldest-shown group. quote/wotd _candidates use it; seeded_order now picks deterministically WITHIN that cohort. So every pool item is featured once before any repeat, then cycles oldest-first. Dropped the unused _NO_REPEAT_POOL window. Tests: no-repeat-until-exhausted (quote + wotd) + a freshest() unit test. 428 backend tests green. (Separate follow-up: expand the QOTD pool from 16 → 90+ vetted public-domain quotes for a longer no-repeat window.) Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 05:39:06 -04:00
parent 414a4c4b8b
commit 0ae789752e
5 changed files with 64 additions and 13 deletions
@@ -33,6 +33,21 @@ def seeded_order(ids: list, date_str: str) -> list:
    return ids[seed:] + ids[:seed]
 def freshest(rows: list) -> list:
    """The cohort to feature today, from pool rows carrying `id` + `shown_at`: every
    NEVER-shown item (shown_at NULL) while any remain, else every item tied for the
    OLDEST shown_at. Guarantees each pool item is featured ONCE before any repeat, then
    cycles oldest-first. Pick deterministically *within* this cohort (seeded_order) —
    NEVER across the whole pool, which re-feeds recent items (the QOTD/WOTD repeat bug)."""
    never = [r["id"] for r in rows if r["shown_at"] is None]
    if never:
        return sorted(never)
    if not rows:
        return []
    oldest = min(r["shown_at"] for r in rows)
    return sorted(r["id"] for r in rows if r["shown_at"] == oldest)
 def content_key(*parts) -> str:
    """A stable dedup key for a pool item (so re-harvesting never duplicates a row)."""
    raw = "|".join("" if p is None else str(p) for p in parts)
@@ -14,8 +14,6 @@ import sqlite3
 from . import daily
 from .localtime import local_today
 _NO_REPEAT_POOL = 60
 # Public-domain (ancient / author died well over a century ago), uplifting. Admin curates.
 SEED = [
    ("Very little is needed to make a happy life; it is all within yourself, in your way of thinking.", "Marcus Aurelius", "Meditations"),
@@ -65,11 +63,10 @@ def _candidates(conn: sqlite3.Connection, avoid: int | None = None) -> list[int]
    if featured:
        ids = [r[0] for r in featured]
    else:
-        rows = conn.execute(
+        # The freshest cohort only (never-shown, else the oldest-shown group) — picking
-            "SELECT id FROM quote_pool WHERE blocked=0 ORDER BY shown_at IS NOT NULL, shown_at, id LIMIT ?",
+        # across the whole pool is what re-fed recent quotes day to day.
-            (_NO_REPEAT_POOL,),
+        rows = conn.execute("SELECT id, shown_at FROM quote_pool WHERE blocked=0").fetchall()
-        ).fetchall()
+        ids = daily.freshest(rows)
        ids = [r[0] for r in rows]
    if avoid is not None:
        ids = [i for i in ids if i != avoid] or ids
    return ids
@@ -25,7 +25,6 @@ from .localtime import local_today
 DICT_BASE = "https://api.dictionaryapi.dev/api/v2/entries/en"
 _UA = {"User-Agent": "upbeatBytes/1.0 (+https://upbeatbytes.com)"}
 _NO_REPEAT_POOL = 60
 _TARGET_POOL = 30          # keep harvesting (a batch/day) until the pool reaches this
 _HARVEST_BATCH = 12
 _MIN_AUDIO_BYTES = 500
@@ -222,11 +221,10 @@ def _candidates(conn: sqlite3.Connection, avoid: int | None = None) -> list[int]
    if featured:
        ids = [r[0] for r in featured]
    else:
-        rows = conn.execute(
+        # The freshest cohort only (never-shown, else the oldest-shown group) — picking
-            "SELECT id FROM wotd_pool WHERE blocked=0 ORDER BY shown_at IS NOT NULL, shown_at, id LIMIT ?",
+        # across the whole pool is what re-fed recent words day to day.
-            (_NO_REPEAT_POOL,),
+        rows = conn.execute("SELECT id, shown_at FROM wotd_pool WHERE blocked=0").fetchall()
-        ).fetchall()
+        ids = daily.freshest(rows)
        ids = [r[0] for r in rows]
    if avoid is not None:
        ids = [i for i in ids if i != avoid] or ids
    return ids
@@ -56,3 +56,28 @@ def test_get_today_never_empty(conn):
 def test_run_daily_seeds_then_picks(conn):
    r = quote.run_daily(conn)
    assert r["pool"] == len(quote.SEED) and r["picked"]
 def test_no_repeat_until_pool_exhausted(conn):
    """Every quote is featured exactly once before ANY repeat; then the oldest-shown
    repeats first. (Regression for the rotate-the-whole-pool selector bug.)"""
    import datetime
    quote.seed(conn)
    n = len(quote.SEED)
    d0 = datetime.date(2026, 1, 1)
    picks = [quote.pick_daily(conn, feature_date=(d0 + datetime.timedelta(days=i)).isoformat())["pool_id"]
             for i in range(n)]
    assert len(set(picks)) == n                     # full coverage, no repeat within the pool
    nxt = (d0 + datetime.timedelta(days=n)).isoformat()
    assert quote.pick_daily(conn, feature_date=nxt)["pool_id"] == picks[0]   # oldest repeats first
 def test_freshest_cohort():
    from goodnews import daily
    # never-shown win outright (the oldest shown item is ignored while any never-shown remain)
    assert daily.freshest([{"id": 1, "shown_at": "2026-01-02"},
                           {"id": 2, "shown_at": None}, {"id": 3, "shown_at": None}]) == [2, 3]
    # all shown → only the oldest-shown cohort
    assert daily.freshest([{"id": 1, "shown_at": "2026-01-03"},
                           {"id": 2, "shown_at": "2026-01-01"}, {"id": 3, "shown_at": "2026-01-01"}]) == [2, 3]
    assert daily.freshest([]) == []
@@ -142,3 +142,19 @@ def test_polish_returns_none_with_empty_examples():
        def chat_text(self, m):
            return '{"gloss": "A warm clear gloss.", "examples": []}'
    assert wotd._polish(C(), "serene", "adjective", "x") is None
 def test_no_repeat_until_pool_exhausted(conn):
    """Same freshness guarantee as QOTD: every word featured once before any repeat,
    then the oldest-shown repeats first. (Regression for 'harmony' repeating after 3 days.)"""
    import datetime
    for w in ["alpha", "bravo", "charlie", "delta"]:
        conn.execute("INSERT INTO wotd_pool (word, definition) VALUES (?, 'a definition')", (w,))
    conn.commit()
    n = 4
    d0 = datetime.date(2026, 2, 1)
    picks = [wotd.pick_daily(conn, feature_date=(d0 + datetime.timedelta(days=i)).isoformat())["pool_id"]
             for i in range(n)]
    assert len(set(picks)) == n
    nxt = (d0 + datetime.timedelta(days=n)).isoformat()
    assert wotd.pick_daily(conn, feature_date=nxt)["pool_id"] == picks[0]