Files
upbeatBytes/tests/test_briefs.py
T
thejayman77 9cdcda5e02 Durability pass: tests, clearer diversity/classify behavior, Calm Filters foundation
- Add pytest suite (34 tests) covering scoring thresholds, dedup clustering +
  representative selection + time window, brief source/category diversity,
  avoid-term phrase matching, and text canonicalization/truncation.
- Rewrite _select_diverse with an explicit, tested contract (best-first, one
  per source, backfill, then inject a second category by evicting the
  lowest-ranked pick).
- classify_articles now returns attempted/succeeded/skipped (ClassifyReport) so
  silent model failures are visible in both the cycle and classify output.
- Fix clean_text truncation to stay within max_len (ellipsis no longer
  overshoots).
- New filters.py: canonical FilterPrefs shape (include/mute topics+flavors,
  avoid_terms, pauses) and pure word/phrase-boundary matching engine seeding
  Calm Filters. Not yet wired into the API.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 19:07:31 +00:00

51 lines
1.8 KiB
Python

from goodnews.briefs import _select_diverse
def row(id, source, category):
# _select_diverse only reads these three keys; plain dicts support [] access.
return {"id": id, "source_name": source, "default_category": category}
def test_prefers_distinct_sources_best_first():
rows = [
row(1, "A", "science"),
row(2, "A", "science"), # same source as #1 — should be skipped while others remain
row(3, "B", "science"),
row(4, "C", "environment"),
]
selected = _select_diverse(rows, limit=3)
ids = [r["id"] for r in selected]
assert ids == [1, 3, 4] # one per source, ranked order preserved
def test_backfills_when_sources_exhausted():
rows = [row(1, "A", "science"), row(2, "A", "science"), row(3, "A", "science")]
selected = _select_diverse(rows, limit=2)
assert len(selected) == 2 # repeats source A only because no others exist
def test_injects_second_category_without_shrinking():
rows = [
row(1, "A", "science"),
row(2, "B", "science"),
row(3, "C", "science"),
row(4, "D", "environment"), # the only other category, lowest ranked
]
selected = _select_diverse(rows, limit=3)
cats = {r["default_category"] for r in selected}
assert len(selected) == 3
assert len(cats) >= 2 # environment swapped in for diversity
assert any(r["default_category"] == "environment" for r in selected)
def test_keeps_single_category_when_no_alternative_exists():
rows = [row(1, "A", "science"), row(2, "B", "science"), row(3, "C", "science")]
selected = _select_diverse(rows, limit=3)
assert len(selected) == 3
assert {r["default_category"] for r in selected} == {"science"}
def test_never_returns_more_than_limit():
rows = [row(i, f"S{i}", "science") for i in range(10)]
assert len(_select_diverse(rows, limit=5)) == 5