from goodnews.briefs import _select_diverse def row(id, source, topic): return {"id": id, "source_name": source, "topic": topic} def test_prefers_distinct_sources_best_first(): rows = [ row(1, "A", "science"), row(2, "A", "science"), # same source as #1 — skipped while others remain row(3, "B", "community"), row(4, "C", "environment"), ] assert [r["id"] for r in _select_diverse(rows, limit=3)] == [1, 3, 4] def test_at_most_one_health_when_alternatives_exist(): rows = [ row(1, "A", "health"), row(2, "B", "health"), row(3, "C", "science"), row(4, "D", "community"), row(5, "E", "animals"), row(6, "F", "environment"), ] topics = [r["topic"] for r in _select_diverse(rows, limit=5)] assert len(topics) == 5 assert topics.count("health") == 1 def test_science_plus_health_capped_at_two(): rows = [ row(1, "A", "science"), row(2, "B", "science"), row(3, "C", "science"), row(4, "D", "health"), row(5, "E", "community"), row(6, "F", "animals"), row(7, "G", "culture"), ] topics = [r["topic"] for r in _select_diverse(rows, limit=5)] assert len(topics) == 5 assert topics.count("science") + topics.count("health") <= 2 # …which means the rest are the gentler lanes assert sum(t in {"community", "animals", "culture", "environment"} for t in topics) >= 3 def test_relaxes_caps_to_fill_on_thin_days(): rows = [row(i, f"S{i}", "science") for i in range(1, 6)] # only science available assert len(_select_diverse(rows, limit=5)) == 5 def test_backfills_repeating_source_when_needed(): rows = [row(1, "A", "science"), row(2, "A", "science"), row(3, "A", "science")] assert len(_select_diverse(rows, limit=2)) == 2 def test_never_exceeds_limit(): rows = [row(i, f"S{i}", "science") for i in range(20)] assert len(_select_diverse(rows, limit=5)) == 5