upbeatBytes/tests/test_filters.py

from datetime import datetime, timezone

from goodnews.filters import (
    FilterPrefs,
    Pause,
    filter_articles,
    prefs_from_json,
    text_matches_avoid_terms,
)

NOW = datetime(2026, 6, 1, tzinfo=timezone.utc)


def art(topic="science", flavor="discovery", title="A calm discovery", description=""):
    return {"topic": topic, "flavor": flavor, "title": title, "description": description}


# --- avoid-term matching: the trust-critical pure function ---

def test_single_word_matches_whole_word_only():
    assert text_matches_avoid_terms("New cancer drug approved", ["cancer"])
    assert not text_matches_avoid_terms("Cancerous growth studied", ["cancer"])


def test_substring_does_not_match():
    # "pan" must not match "pandemic"
    assert not text_matches_avoid_terms("Pandemic preparedness improves", ["pan"])


def test_phrase_matches_as_phrase():
    assert text_matches_avoid_terms("The stock market crashed today", ["stock market"])
    assert not text_matches_avoid_terms("Stocks and other markets", ["stock market"])


def test_punctuation_and_case_normalized():
    assert text_matches_avoid_terms("An Anti-Aging breakthrough", ["anti aging"])
    assert text_matches_avoid_terms("ELECTION results", ["election"])


def test_empty_inputs_are_safe():
    assert not text_matches_avoid_terms("", ["cancer"])
    assert not text_matches_avoid_terms("anything", [])
    assert not text_matches_avoid_terms(None, ["cancer"])


# --- filter_articles over the canonical prefs ---

def test_empty_prefs_pass_everything_through():
    items = [art(), art(topic="health")]
    assert filter_articles(items, FilterPrefs(), NOW) == items


def test_mute_topic_drops_matching_articles():
    items = [art(topic="science"), art(topic="health")]
    prefs = FilterPrefs.from_dict({"mute_topics": ["health"]})
    out = filter_articles(items, prefs, NOW)
    assert [a["topic"] for a in out] == ["science"]


def test_include_topics_keeps_only_those():
    items = [art(topic="science"), art(topic="animals"), art(topic="health")]
    prefs = FilterPrefs.from_dict({"include_topics": ["science", "animals"]})
    out = filter_articles(items, prefs, NOW)
    assert {a["topic"] for a in out} == {"science", "animals"}


def test_avoid_terms_match_title_and_description():
    items = [art(title="Update on the election"), art(description="about an election too"), art()]
    prefs = FilterPrefs.from_dict({"avoid_terms": ["election"]})
    out = filter_articles(items, prefs, NOW)
    assert len(out) == 1


def test_active_pause_hides_topic_but_expired_does_not():
    items = [art(topic="health")]
    active = FilterPrefs.from_dict(
        {"pauses": [{"kind": "topic", "value": "health", "until": "2026-06-02T00:00:00Z"}]}
    )
    expired = FilterPrefs.from_dict(
        {"pauses": [{"kind": "topic", "value": "health", "until": "2026-05-01T00:00:00Z"}]}
    )
    assert filter_articles(items, active, NOW) == []
    assert filter_articles(items, expired, NOW) == items


def test_pause_active_helper():
    assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(NOW)
    assert not Pause("topic", "health", "2026-05-01T00:00:00Z").active(NOW)
    assert not Pause("topic", "health", "garbage").active(NOW)


def test_pause_active_tolerates_naive_now():
    # A naive `now` must not raise an aware-vs-naive comparison error.
    naive = datetime(2026, 6, 1)
    assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(naive)


# --- forgiving parsing (bad blobs must never break the feed) ---

def test_prefs_from_json_tolerates_garbage():
    assert prefs_from_json("not json").is_empty()
    assert prefs_from_json(None).is_empty()
    assert prefs_from_json("[1,2,3]").is_empty()  # wrong shape


def test_from_dict_skips_malformed_pauses():
    prefs = FilterPrefs.from_dict(
        {
            "mute_topics": ["health"],
            "pauses": [
                {"kind": "topic", "value": "science", "until": "2026-06-02T00:00:00Z"},
                {"kind": "topic"},  # malformed — missing value/until
                "garbage",            # not even a dict
            ],
        }
    )
    assert prefs.mute_topics == ["health"]
    assert len(prefs.pauses) == 1  # only the well-formed pause survives


def test_from_dict_ignores_non_string_list_entries():
    prefs = FilterPrefs.from_dict({"avoid_terms": ["ok", 5, None, "fine"]})
    assert prefs.avoid_terms == ["ok", "fine"]