091dec64ae
- API endpoints (feed, brief, category-counts) accept a 'prefs' JSON query param, parsed tolerantly into FilterPrefs (bad blobs never break the feed). - Feed over-fetches then applies word-boundary filters in Python and slices to the page; brief is filtered down (no refill); counts are computed over the same filtered set so browse numbers match the feed exactly. - Pause.active() coerces naive datetimes to UTC; FilterPrefs.from_dict skips malformed pauses and non-string list entries. - Static site adds the humane ladder (Not today / Less like this / Always hide) plus a Calm filters panel managing pauses, mutes, and avoid-terms in localStorage. Nothing leaves the device. - Tests now 38 (added forgiving-parse and naive-now cases). README documents it. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
124 lines
4.3 KiB
Python
124 lines
4.3 KiB
Python
from datetime import datetime, timezone
|
|
|
|
from goodnews.filters import (
|
|
FilterPrefs,
|
|
Pause,
|
|
filter_articles,
|
|
prefs_from_json,
|
|
text_matches_avoid_terms,
|
|
)
|
|
|
|
NOW = datetime(2026, 6, 1, tzinfo=timezone.utc)
|
|
|
|
|
|
def art(topic="science", flavor="discovery", title="A calm discovery", description=""):
|
|
return {"topic": topic, "flavor": flavor, "title": title, "description": description}
|
|
|
|
|
|
# --- avoid-term matching: the trust-critical pure function ---
|
|
|
|
def test_single_word_matches_whole_word_only():
|
|
assert text_matches_avoid_terms("New cancer drug approved", ["cancer"])
|
|
assert not text_matches_avoid_terms("Cancerous growth studied", ["cancer"])
|
|
|
|
|
|
def test_substring_does_not_match():
|
|
# "pan" must not match "pandemic"
|
|
assert not text_matches_avoid_terms("Pandemic preparedness improves", ["pan"])
|
|
|
|
|
|
def test_phrase_matches_as_phrase():
|
|
assert text_matches_avoid_terms("The stock market crashed today", ["stock market"])
|
|
assert not text_matches_avoid_terms("Stocks and other markets", ["stock market"])
|
|
|
|
|
|
def test_punctuation_and_case_normalized():
|
|
assert text_matches_avoid_terms("An Anti-Aging breakthrough", ["anti aging"])
|
|
assert text_matches_avoid_terms("ELECTION results", ["election"])
|
|
|
|
|
|
def test_empty_inputs_are_safe():
|
|
assert not text_matches_avoid_terms("", ["cancer"])
|
|
assert not text_matches_avoid_terms("anything", [])
|
|
assert not text_matches_avoid_terms(None, ["cancer"])
|
|
|
|
|
|
# --- filter_articles over the canonical prefs ---
|
|
|
|
def test_empty_prefs_pass_everything_through():
|
|
items = [art(), art(topic="health")]
|
|
assert filter_articles(items, FilterPrefs(), NOW) == items
|
|
|
|
|
|
def test_mute_topic_drops_matching_articles():
|
|
items = [art(topic="science"), art(topic="health")]
|
|
prefs = FilterPrefs.from_dict({"mute_topics": ["health"]})
|
|
out = filter_articles(items, prefs, NOW)
|
|
assert [a["topic"] for a in out] == ["science"]
|
|
|
|
|
|
def test_include_topics_keeps_only_those():
|
|
items = [art(topic="science"), art(topic="animals"), art(topic="health")]
|
|
prefs = FilterPrefs.from_dict({"include_topics": ["science", "animals"]})
|
|
out = filter_articles(items, prefs, NOW)
|
|
assert {a["topic"] for a in out} == {"science", "animals"}
|
|
|
|
|
|
def test_avoid_terms_match_title_and_description():
|
|
items = [art(title="Update on the election"), art(description="about an election too"), art()]
|
|
prefs = FilterPrefs.from_dict({"avoid_terms": ["election"]})
|
|
out = filter_articles(items, prefs, NOW)
|
|
assert len(out) == 1
|
|
|
|
|
|
def test_active_pause_hides_topic_but_expired_does_not():
|
|
items = [art(topic="health")]
|
|
active = FilterPrefs.from_dict(
|
|
{"pauses": [{"kind": "topic", "value": "health", "until": "2026-06-02T00:00:00Z"}]}
|
|
)
|
|
expired = FilterPrefs.from_dict(
|
|
{"pauses": [{"kind": "topic", "value": "health", "until": "2026-05-01T00:00:00Z"}]}
|
|
)
|
|
assert filter_articles(items, active, NOW) == []
|
|
assert filter_articles(items, expired, NOW) == items
|
|
|
|
|
|
def test_pause_active_helper():
|
|
assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(NOW)
|
|
assert not Pause("topic", "health", "2026-05-01T00:00:00Z").active(NOW)
|
|
assert not Pause("topic", "health", "garbage").active(NOW)
|
|
|
|
|
|
def test_pause_active_tolerates_naive_now():
|
|
# A naive `now` must not raise an aware-vs-naive comparison error.
|
|
naive = datetime(2026, 6, 1)
|
|
assert Pause("topic", "health", "2026-06-02T00:00:00Z").active(naive)
|
|
|
|
|
|
# --- forgiving parsing (bad blobs must never break the feed) ---
|
|
|
|
def test_prefs_from_json_tolerates_garbage():
|
|
assert prefs_from_json("not json").is_empty()
|
|
assert prefs_from_json(None).is_empty()
|
|
assert prefs_from_json("[1,2,3]").is_empty() # wrong shape
|
|
|
|
|
|
def test_from_dict_skips_malformed_pauses():
|
|
prefs = FilterPrefs.from_dict(
|
|
{
|
|
"mute_topics": ["health"],
|
|
"pauses": [
|
|
{"kind": "topic", "value": "science", "until": "2026-06-02T00:00:00Z"},
|
|
{"kind": "topic"}, # malformed — missing value/until
|
|
"garbage", # not even a dict
|
|
],
|
|
}
|
|
)
|
|
assert prefs.mute_topics == ["health"]
|
|
assert len(prefs.pauses) == 1 # only the well-formed pause survives
|
|
|
|
|
|
def test_from_dict_ignores_non_string_list_entries():
|
|
prefs = FilterPrefs.from_dict({"avoid_terms": ["ok", 5, None, "fine"]})
|
|
assert prefs.avoid_terms == ["ok", "fine"]
|