6c10ad99a9
The Wikimedia feed's thumbnail is 330px, which upscales blurry in our hero. Use originalimage.source instead — it's reliably sharp. (Can't just request a bigger thumbnail width: for very large source images Wikimedia only serves pre-generated bucket sizes and 400s on arbitrary widths — e.g. 500px ok, 800/1024px fail.) - onthisday._best_image() prefers originalimage, falls back to the thumbnail. - scripts/otd_image_upsize_backfill.py re-fetches each stored MM-DD and upgrades image_url in onthisday_pool + daily_onthisday in place (ran on host: pool + 6 daily rows now sharp; today's hero verified 200). Only the /onthisday hero loads this image (home card is text-only), so larger files are a single-page, one-time load. - test_best_image locks the prefer-original/fallback behavior. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
91 lines
4.1 KiB
Python
91 lines
4.1 KiB
Python
"""On This Day: harvest tone-filters + dedupes, deterministic daily pick (idempotent,
|
|
respects blocked/featured), never-empty get_today, run_daily bootstraps."""
|
|
import pytest
|
|
|
|
from goodnews import onthisday
|
|
from goodnews.db import connect, init_db
|
|
|
|
# md '06-22': three good/neutral + one clearly negative (dropped by the keyword floor).
|
|
FAKE = [
|
|
{"md": "06-22", "year": 1611, "text": "The first public art museum opens its doors.",
|
|
"summary": "A milestone for public culture.", "image_url": "i1", "page_url": "u1"},
|
|
{"md": "06-22", "year": 1990, "text": "A new species of butterfly is discovered.",
|
|
"summary": None, "image_url": None, "page_url": "u2"},
|
|
{"md": "06-22", "year": 1941, "text": "An army invades a neighbour, killing thousands.",
|
|
"summary": None, "image_url": None, "page_url": "u3"}, # 'invad' + 'kill' -> dropped
|
|
{"md": "06-22", "year": 1868, "text": "An inventor patents a joyful new musical instrument.",
|
|
"summary": "s", "image_url": None, "page_url": "u4"},
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def conn(monkeypatch):
|
|
monkeypatch.setattr(onthisday, "_fetch_events", lambda md: [dict(e) for e in FAKE if e["md"] == md])
|
|
c = connect(":memory:"); init_db(c)
|
|
yield c
|
|
c.close()
|
|
|
|
|
|
def test_harvest_filters_negatives_and_dedupes(conn):
|
|
r = onthisday.harvest(conn, "06-22")
|
|
assert r["fetched"] == 4 and r["kept"] == 3 and r["added"] == 3 # the invasion dropped
|
|
assert onthisday.harvest(conn, "06-22")["added"] == 0 # idempotent (content key)
|
|
texts = [row[0] for row in conn.execute("SELECT text FROM onthisday_pool").fetchall()]
|
|
assert not any("invades" in t for t in texts)
|
|
|
|
|
|
def test_pick_caches_marks_shown_and_idempotent(conn):
|
|
onthisday.harvest(conn, "06-22")
|
|
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
|
|
assert a and a["md"] == "06-22" and a["year"] in (1611, 1990, 1868)
|
|
shown = conn.execute("SELECT shown_at FROM onthisday_pool WHERE id=?", (a["pool_id"],)).fetchone()[0]
|
|
assert shown == "2026-06-22"
|
|
assert onthisday.pick_daily(conn, feature_date="2026-06-22")["pool_id"] == a["pool_id"] # unchanged
|
|
|
|
|
|
def test_blocked_never_picked(conn):
|
|
onthisday.harvest(conn, "06-22")
|
|
conn.execute("UPDATE onthisday_pool SET blocked=1 WHERE year!=1990") # only the butterfly left
|
|
conn.commit()
|
|
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
|
|
assert a["year"] == 1990
|
|
|
|
|
|
def test_featured_is_pinned(conn):
|
|
onthisday.harvest(conn, "06-22")
|
|
conn.execute("UPDATE onthisday_pool SET featured=1 WHERE year=1868")
|
|
conn.commit()
|
|
# featured wins regardless of the date seed
|
|
assert onthisday.pick_daily(conn, feature_date="2026-06-22", force=True)["year"] == 1868
|
|
|
|
|
|
def test_get_today_never_empty(conn):
|
|
onthisday.harvest(conn, "06-22")
|
|
a = onthisday.pick_daily(conn, feature_date="2026-06-22")
|
|
assert onthisday.get_today(conn, "2099-01-01")["pool_id"] == a["pool_id"] # falls back to latest
|
|
|
|
|
|
def test_pick_returns_none_when_pool_empty(conn):
|
|
assert onthisday.pick_daily(conn, feature_date="2026-03-03") is None # nothing harvested
|
|
|
|
|
|
def test_tone_filter_llm_narrows(conn):
|
|
class FakeClient:
|
|
def chat_text(self, messages):
|
|
return 'sure: {"keep": [0]}' # keep only the first of whatever it's given
|
|
kept = onthisday._tone_filter([dict(e) for e in FAKE], client=FakeClient())
|
|
# keyword floor drops the invasion (3 remain), then the LLM narrows to 1
|
|
assert len(kept) == 1 and kept[0]["year"] == 1611
|
|
|
|
|
|
def test_best_image_prefers_original_over_thumbnail():
|
|
# the 330px thumbnail upscales (blurry); originalimage is reliably sharp → prefer it
|
|
page = {
|
|
"thumbnail": {"source": "https://x/thumb/Foo.jpg/330px-Foo.jpg", "width": 330},
|
|
"originalimage": {"source": "https://x/thumb/Foo.jpg/3840px-Foo.jpg", "width": 7000},
|
|
}
|
|
assert onthisday._best_image(page) == "https://x/thumb/Foo.jpg/3840px-Foo.jpg"
|
|
# falls back to the thumbnail when there's no originalimage, and to None when neither exists
|
|
assert onthisday._best_image({"thumbnail": {"source": "https://x/330px-Foo.jpg"}}) == "https://x/330px-Foo.jpg"
|
|
assert onthisday._best_image({}) is None
|