Files
upbeatBytes/tests/test_filters.py
T
thejayman77 9cdcda5e02 Durability pass: tests, clearer diversity/classify behavior, Calm Filters foundation
- Add pytest suite (34 tests) covering scoring thresholds, dedup clustering +
  representative selection + time window, brief source/category diversity,
  avoid-term phrase matching, and text canonicalization/truncation.
- Rewrite _select_diverse with an explicit, tested contract (best-first, one
  per source, backfill, then inject a second category by evicting the
  lowest-ranked pick).
- classify_articles now returns attempted/succeeded/skipped (ClassifyReport) so
  silent model failures are visible in both the cycle and classify output.
- Fix clean_text truncation to stay within max_len (ellipsis no longer
  overshoots).
- New filters.py: canonical FilterPrefs shape (include/mute topics+flavors,
  avoid_terms, pauses) and pure word/phrase-boundary matching engine seeding
  Calm Filters. Not yet wired into the API.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 19:07:31 +00:00

89 lines
3.1 KiB
Python

from datetime import datetime, timezone
from goodnews.filters import (
FilterPrefs,
Pause,
filter_articles,
text_matches_avoid_terms,
)
NOW = datetime(2026, 6, 1, tzinfo=timezone.utc)
def art(topic="science", flavor="discovery", title="A calm discovery", description=""):
return {"topic": topic, "flavor": flavor, "title": title, "description": description}
# --- avoid-term matching: the trust-critical pure function ---
def test_single_word_matches_whole_word_only():
assert text_matches_avoid_terms("New cancer drug approved", ["cancer"])
assert not text_matches_avoid_terms("Cancerous growth studied", ["cancer"])
def test_substring_does_not_match():
# "pan" must not match "pandemic"
assert not text_matches_avoid_terms("Pandemic preparedness improves", ["pan"])
def test_phrase_matches_as_phrase():
assert text_matches_avoid_terms("The stock market crashed today", ["stock market"])
assert not text_matches_avoid_terms("Stocks and other markets", ["stock market"])
def test_punctuation_and_case_normalized():
assert text_matches_avoid_terms("An Anti-Aging breakthrough", ["anti aging"])
assert text_matches_avoid_terms("ELECTION results", ["election"])
def test_empty_inputs_are_safe():
assert not text_matches_avoid_terms("", ["cancer"])
assert not text_matches_avoid_terms("anything", [])
assert not text_matches_avoid_terms(None, ["cancer"])
# --- filter_articles over the canonical prefs ---
def test_empty_prefs_pass_everything_through():
items = [art(), art(topic="health")]
assert filter_articles(items, FilterPrefs(), NOW) == items
def test_mute_topic_drops_matching_articles():
items = [art(topic="science"), art(topic="health")]
prefs = FilterPrefs.from_dict({"mute_topics": ["health"]})
out = filter_articles(items, prefs, NOW)
assert [a["topic"] for a in out] == ["science"]
def test_include_topics_keeps_only_those():
items = [art(topic="science"), art(topic="animals"), art(topic="health")]
prefs = FilterPrefs.from_dict({"include_topics": ["science", "animals"]})
out = filter_articles(items, prefs, NOW)
assert {a["topic"] for a in out} == {"science", "animals"}
def test_avoid_terms_match_title_and_description():
items = [art(title="Update on the election"), art(description="about an election too"), art()]
prefs = FilterPrefs.from_dict({"avoid_terms": ["election"]})
out = filter_articles(items, prefs, NOW)
assert len(out) == 1
def test_active_pause_hides_topic_but_expired_does_not():
items = [art(topic="health")]
active = FilterPrefs.from_dict(
{"pauses": [{"kind": "topic", "value": "health", "until": "2026-06-02T00:00:00Z"}]}
)
expired = FilterPrefs.from_dict(
{"pauses": [{"kind": "topic", "value": "health", "until": "2026-05-01T00:00:00Z"}]}
)
assert filter_articles(items, active, NOW) == []
assert filter_articles(items, expired, NOW) == items
def test_pause_active_helper():
assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(NOW)
assert not Pause("topic", "health", "2026-05-01T00:00:00Z").active(NOW)
assert not Pause("topic", "health", "garbage").active(NOW)