from datetime import datetime, timezone from goodnews.filters import ( FilterPrefs, Pause, filter_articles, prefs_from_json, text_matches_avoid_terms, ) NOW = datetime(2026, 6, 1, tzinfo=timezone.utc) def art(topic="science", flavor="discovery", title="A calm discovery", description=""): return {"topic": topic, "flavor": flavor, "title": title, "description": description} # --- avoid-term matching: the trust-critical pure function --- def test_single_word_matches_whole_word_only(): assert text_matches_avoid_terms("New cancer drug approved", ["cancer"]) assert not text_matches_avoid_terms("Cancerous growth studied", ["cancer"]) def test_substring_does_not_match(): # "pan" must not match "pandemic" assert not text_matches_avoid_terms("Pandemic preparedness improves", ["pan"]) def test_phrase_matches_as_phrase(): assert text_matches_avoid_terms("The stock market crashed today", ["stock market"]) assert not text_matches_avoid_terms("Stocks and other markets", ["stock market"]) def test_punctuation_and_case_normalized(): assert text_matches_avoid_terms("An Anti-Aging breakthrough", ["anti aging"]) assert text_matches_avoid_terms("ELECTION results", ["election"]) def test_empty_inputs_are_safe(): assert not text_matches_avoid_terms("", ["cancer"]) assert not text_matches_avoid_terms("anything", []) assert not text_matches_avoid_terms(None, ["cancer"]) # --- filter_articles over the canonical prefs --- def test_empty_prefs_pass_everything_through(): items = [art(), art(topic="health")] assert filter_articles(items, FilterPrefs(), NOW) == items def test_mute_topic_drops_matching_articles(): items = [art(topic="science"), art(topic="health")] prefs = FilterPrefs.from_dict({"mute_topics": ["health"]}) out = filter_articles(items, prefs, NOW) assert [a["topic"] for a in out] == ["science"] def test_include_topics_keeps_only_those(): items = [art(topic="science"), art(topic="animals"), art(topic="health")] prefs = FilterPrefs.from_dict({"include_topics": ["science", "animals"]}) out = filter_articles(items, prefs, NOW) assert {a["topic"] for a in out} == {"science", "animals"} def test_avoid_terms_match_title_and_description(): items = [art(title="Update on the election"), art(description="about an election too"), art()] prefs = FilterPrefs.from_dict({"avoid_terms": ["election"]}) out = filter_articles(items, prefs, NOW) assert len(out) == 1 def test_active_pause_hides_topic_but_expired_does_not(): items = [art(topic="health")] active = FilterPrefs.from_dict( {"pauses": [{"kind": "topic", "value": "health", "until": "2026-06-02T00:00:00Z"}]} ) expired = FilterPrefs.from_dict( {"pauses": [{"kind": "topic", "value": "health", "until": "2026-05-01T00:00:00Z"}]} ) assert filter_articles(items, active, NOW) == [] assert filter_articles(items, expired, NOW) == items def test_pause_active_helper(): assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(NOW) assert not Pause("topic", "health", "2026-05-01T00:00:00Z").active(NOW) assert not Pause("topic", "health", "garbage").active(NOW) def test_pause_active_tolerates_naive_now(): # A naive `now` must not raise an aware-vs-naive comparison error. naive = datetime(2026, 6, 1) assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(naive) # --- forgiving parsing (bad blobs must never break the feed) --- def test_prefs_from_json_tolerates_garbage(): assert prefs_from_json("not json").is_empty() assert prefs_from_json(None).is_empty() assert prefs_from_json("[1,2,3]").is_empty() # wrong shape def test_from_dict_skips_malformed_pauses(): prefs = FilterPrefs.from_dict( { "mute_topics": ["health"], "pauses": [ {"kind": "topic", "value": "science", "until": "2026-06-02T00:00:00Z"}, {"kind": "topic"}, # malformed — missing value/until "garbage", # not even a dict ], } ) assert prefs.mute_topics == ["health"] assert len(prefs.pauses) == 1 # only the well-formed pause survives def test_from_dict_ignores_non_string_list_entries(): prefs = FilterPrefs.from_dict({"avoid_terms": ["ok", 5, None, "fine"]}) assert prefs.avoid_terms == ["ok", "fine"]