Brief emotional-mix guardrails + source on its own line

Composition (Codex's priority — content mix was the louder problem): - _select_diverse now guards the daily five's emotional tone: at most 1 health, at most 2 science+health combined, at most 2 of any topic, distinct sources — so at least three of the five are community/culture/animals/environment when available. Caps relax (mix, then source) only to fill on thin days. - Verified live: today's five went to environment x2, health, animals, science. UI: - Source moved to its own line below the tags, left-justified, for uniform rhythm across hero and tiles (was sometimes trailing the tags, right-aligned). - Watermark kept as-is (intentionally subtle; liked). Tests updated for the emotional-mix contract (80 total). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 12:29:02 +00:00
parent 541f59ed6e
commit 3858380ffe
3 changed files with 64 additions and 38 deletions
@@ -36,8 +36,8 @@
    <div class="tags">
      {#if article.topic}<span class="tag">{article.topic}</span>{/if}
      {#if article.flavor}<span class="tag soft">{article.flavor}</span>{/if}
-      <span class="src">{article.source}</span>
    </div>
+    <div class="src">{article.source}</div>

    <h3><a href={safeHref} target="_blank" rel="noopener">{article.title}</a></h3>

@@ -85,7 +85,8 @@
    padding: 2px 9px; font-weight: 600; text-transform: capitalize;
  }
  .tag.soft { background: var(--sage-soft); color: var(--sage-deep); }
-  .src { color: var(--muted); margin-left: auto; }
+  /* Source on its own line below the tags, left-justified, for uniformity. */
+  .src { color: var(--muted); font-size: 0.78rem; margin: -2px 0 2px; }

  h3 { font-size: 1.18rem; }
  h3 a:hover { color: var(--sage-deep); }
@@ -134,7 +135,7 @@
    color: var(--sage); opacity: 0.05; text-transform: lowercase; letter-spacing: -0.02em;
    pointer-events: none; user-select: none;
  }
-  .tile .tags, .tile h3, .tile .why, .tile .actions { position: relative; }
+  .tile .tags, .tile .src, .tile h3, .tile .why, .tile .actions { position: relative; }

  /* ---- Hero WITH image: two columns, with an atmospheric overlay ---- */
  .hero { display: grid; grid-template-columns: 1.1fr 1fr; }
@@ -148,24 +148,44 @@ def _candidate_articles(
    ).fetchall()


-def _select_diverse(rows: list[sqlite3.Row], limit: int, max_per_topic: int = 2) -> list[sqlite3.Row]:
-    """Pick up to `limit` items from `rows` (already ranked best-first).
+def _select_diverse(rows: list[sqlite3.Row], limit: int) -> list[sqlite3.Row]:
+    """Pick up to `limit` items for the daily brief (rows ranked best-first).

-    Contract:
-    1. Prefer higher-ranked items.
-    2. Source diversity: at most one item per source while other sources remain.
-    3. Topic balance: no more than `max_per_topic` of the same topic while other
-       topics still have candidates — so the five don't cluster (e.g. several
-       medical/science items) when community/culture/animals/environment exist.
-    4. Always fill to `limit` when enough candidates exist: the source and topic
-       caps are relaxed (in that order) only as needed to reach the count.
+    The daily five should feel like *good news*, not a research digest, so the
+    emotional mix is guarded — not just topic count:
+    - at most 1 health item,
+    - at most 2 science+health items combined,
+    - at most 2 of any single topic,
+    - distinct sources.
+    Because science/health are capped at 2 combined, at least three of the five
+    are community/culture/animals/environment whenever those exist — so the page
+    leads with breadth, not clustered medical/science breakthroughs.
+
+    Caps are relaxed (topic first, then source) only as needed to still fill the
+    count on thin days; we never return fewer when candidates exist.
    """
    selected: list[sqlite3.Row] = []
    selected_ids: set = set()
    seen_sources: set = set()
    topic_count: dict = {}

-    def consider(enforce_source: bool, enforce_topic: bool) -> None:
+    def add(row: sqlite3.Row) -> None:
+        selected.append(row)
+        selected_ids.add(row["id"])
+        seen_sources.add(row["source_name"])
+        topic_count[row["topic"]] = topic_count.get(row["topic"], 0) + 1
+
+    def emotional_mix_ok(row: sqlite3.Row) -> bool:
+        topic = row["topic"]
+        health = topic_count.get("health", 0)
+        science = topic_count.get("science", 0)
+        if topic == "health" and health >= 1:
+            return False
+        if topic in ("science", "health") and (science + health) >= 2:
+            return False
+        return topic_count.get(topic, 0) < 2
+
+    def fill(enforce_mix: bool, enforce_source: bool) -> None:
        for row in rows:
            if len(selected) >= limit:
                return
@@ -173,17 +193,13 @@ def _select_diverse(rows: list[sqlite3.Row], limit: int, max_per_topic: int = 2)
                continue
            if enforce_source and row["source_name"] in seen_sources:
                continue
-            topic = row["topic"]
-            if enforce_topic and topic_count.get(topic, 0) >= max_per_topic:
+            if enforce_mix and not emotional_mix_ok(row):
                continue
-            selected.append(row)
-            selected_ids.add(row["id"])
-            seen_sources.add(row["source_name"])
-            topic_count[topic] = topic_count.get(topic, 0) + 1
+            add(row)

-    consider(enforce_source=True, enforce_topic=True)    # distinct source, topic-balanced
-    consider(enforce_source=True, enforce_topic=False)   # relax topic cap to fill
-    consider(enforce_source=False, enforce_topic=False)  # relax source too, last resort
+    fill(enforce_mix=True, enforce_source=True)    # balanced mix, distinct sources
+    fill(enforce_mix=False, enforce_source=True)    # relax the mix caps to fill
+    fill(enforce_mix=False, enforce_source=False)   # relax source too, last resort
    return selected


@@ -2,7 +2,6 @@ from goodnews.briefs import _select_diverse


 def row(id, source, topic):
-    # _select_diverse reads id, source_name, topic; plain dicts support [] access.
    return {"id": id, "source_name": source, "topic": topic}


@@ -10,29 +9,39 @@ def test_prefers_distinct_sources_best_first():
    rows = [
        row(1, "A", "science"),
        row(2, "A", "science"),   # same source as #1 — skipped while others remain
-        row(3, "B", "health"),
+        row(3, "B", "community"),
        row(4, "C", "environment"),
    ]
    assert [r["id"] for r in _select_diverse(rows, limit=3)] == [1, 3, 4]


-def test_caps_a_topic_when_alternatives_exist():
+def test_at_most_one_health_when_alternatives_exist():
    rows = [
-        row(1, "A", "science"), row(2, "B", "science"),
-        row(3, "C", "science"), row(4, "D", "science"),
-        row(5, "E", "community"), row(6, "F", "animals"), row(7, "G", "culture"),
+        row(1, "A", "health"), row(2, "B", "health"),
+        row(3, "C", "science"), row(4, "D", "community"),
+        row(5, "E", "animals"), row(6, "F", "environment"),
    ]
-    selected = _select_diverse(rows, limit=5, max_per_topic=2)
-    topics = [r["topic"] for r in selected]
-    assert len(selected) == 5
-    assert topics.count("science") == 2  # capped, even though 4 were available
-    assert {"community", "animals", "culture"} <= set(topics)
+    topics = [r["topic"] for r in _select_diverse(rows, limit=5)]
+    assert len(topics) == 5
+    assert topics.count("health") == 1


-def test_relaxes_cap_when_only_one_topic_available():
-    rows = [row(i, f"S{i}", "science") for i in range(1, 6)]
-    selected = _select_diverse(rows, limit=5)
-    assert len(selected) == 5  # all science: cap relaxed because nothing else exists
+def test_science_plus_health_capped_at_two():
+    rows = [
+        row(1, "A", "science"), row(2, "B", "science"), row(3, "C", "science"),
+        row(4, "D", "health"), row(5, "E", "community"),
+        row(6, "F", "animals"), row(7, "G", "culture"),
+    ]
+    topics = [r["topic"] for r in _select_diverse(rows, limit=5)]
+    assert len(topics) == 5
+    assert topics.count("science") + topics.count("health") <= 2
+    # …which means the rest are the gentler lanes
+    assert sum(t in {"community", "animals", "culture", "environment"} for t in topics) >= 3
+
+
+def test_relaxes_caps_to_fill_on_thin_days():
+    rows = [row(i, f"S{i}", "science") for i in range(1, 6)]  # only science available
+    assert len(_select_diverse(rows, limit=5)) == 5


 def test_backfills_repeating_source_when_needed():