Files
upbeatBytes/tests/test_onthisday.py
T
thejayman77 6c10ad99a9 On This Day: serve sharp images (originalimage, not the 330px thumbnail)
The Wikimedia feed's thumbnail is 330px, which upscales blurry in our hero. Use
originalimage.source instead — it's reliably sharp. (Can't just request a bigger
thumbnail width: for very large source images Wikimedia only serves pre-generated
bucket sizes and 400s on arbitrary widths — e.g. 500px ok, 800/1024px fail.)

- onthisday._best_image() prefers originalimage, falls back to the thumbnail.
- scripts/otd_image_upsize_backfill.py re-fetches each stored MM-DD and upgrades
  image_url in onthisday_pool + daily_onthisday in place (ran on host: pool + 6
  daily rows now sharp; today's hero verified 200). Only the /onthisday hero
  loads this image (home card is text-only), so larger files are a single-page,
  one-time load.
- test_best_image locks the prefer-original/fallback behavior.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 17:07:37 -04:00

91 lines
4.1 KiB
Python

"""On This Day: harvest tone-filters + dedupes, deterministic daily pick (idempotent,
respects blocked/featured), never-empty get_today, run_daily bootstraps."""
import pytest
from goodnews import onthisday
from goodnews.db import connect, init_db
# md '06-22': three good/neutral + one clearly negative (dropped by the keyword floor).
FAKE = [
{"md": "06-22", "year": 1611, "text": "The first public art museum opens its doors.",
"summary": "A milestone for public culture.", "image_url": "i1", "page_url": "u1"},
{"md": "06-22", "year": 1990, "text": "A new species of butterfly is discovered.",
"summary": None, "image_url": None, "page_url": "u2"},
{"md": "06-22", "year": 1941, "text": "An army invades a neighbour, killing thousands.",
"summary": None, "image_url": None, "page_url": "u3"}, # 'invad' + 'kill' -> dropped
{"md": "06-22", "year": 1868, "text": "An inventor patents a joyful new musical instrument.",
"summary": "s", "image_url": None, "page_url": "u4"},
]
@pytest.fixture
def conn(monkeypatch):
monkeypatch.setattr(onthisday, "_fetch_events", lambda md: [dict(e) for e in FAKE if e["md"] == md])
c = connect(":memory:"); init_db(c)
yield c
c.close()
def test_harvest_filters_negatives_and_dedupes(conn):
r = onthisday.harvest(conn, "06-22")
assert r["fetched"] == 4 and r["kept"] == 3 and r["added"] == 3 # the invasion dropped
assert onthisday.harvest(conn, "06-22")["added"] == 0 # idempotent (content key)
texts = [row[0] for row in conn.execute("SELECT text FROM onthisday_pool").fetchall()]
assert not any("invades" in t for t in texts)
def test_pick_caches_marks_shown_and_idempotent(conn):
onthisday.harvest(conn, "06-22")
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
assert a and a["md"] == "06-22" and a["year"] in (1611, 1990, 1868)
shown = conn.execute("SELECT shown_at FROM onthisday_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
assert shown == "2026-06-22"
assert onthisday.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"] # unchanged
def test_blocked_never_picked(conn):
onthisday.harvest(conn, "06-22")
conn.execute("UPDATE onthisday_pool SET blocked=1 WHERE year!=1990") # only the butterfly left
conn.commit()
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
assert a["year"] == 1990
def test_featured_is_pinned(conn):
onthisday.harvest(conn, "06-22")
conn.execute("UPDATE onthisday_pool SET featured=1 WHERE year=1868")
conn.commit()
# featured wins regardless of the date seed
assert onthisday.pick_daily(conn, feature_date="2026-06-22", force=True)["year"] == 1868
def test_get_today_never_empty(conn):
onthisday.harvest(conn, "06-22")
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
assert onthisday.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"] # falls back to latest
def test_pick_returns_none_when_pool_empty(conn):
assert onthisday.pick_daily(conn, feature_date="2026-03-03") is None # nothing harvested
def test_tone_filter_llm_narrows(conn):
class FakeClient:
def chat_text(self, messages):
return 'sure: {"keep": [0]}' # keep only the first of whatever it's given
kept = onthisday._tone_filter([dict(e) for e in FAKE], client=FakeClient())
# keyword floor drops the invasion (3 remain), then the LLM narrows to 1
assert len(kept) == 1 and kept[0]["year"] == 1611
def test_best_image_prefers_original_over_thumbnail():
# the 330px thumbnail upscales (blurry); originalimage is reliably sharp → prefer it
page = {
"thumbnail": {"source": "https://x/thumb/Foo.jpg/330px-Foo.jpg", "width": 330},
"originalimage": {"source": "https://x/thumb/Foo.jpg/3840px-Foo.jpg", "width": 7000},
}
assert onthisday._best_image(page) == "https://x/thumb/Foo.jpg/3840px-Foo.jpg"
# falls back to the thumbnail when there's no originalimage, and to None when neither exists
assert onthisday._best_image({"thumbnail": {"source": "https://x/330px-Foo.jpg"}}) == "https://x/330px-Foo.jpg"
assert onthisday._best_image({}) is None