Daily Art backend: curated Met pool, daily cached pick, /api/art (prototype)

The engine for the /art room (design-independent; deploy held for Codex review).

- goodnews/art.py: harvest a curated pool of public-domain HIGHLIGHT artworks from the
  Met (isHighlight+isPublicDomain+hasImages -> masterworks, never potsherds; CC0). Daily
  deterministic pick from the least-recently-shown (no soon-repeats, same for everyone),
  fetch metadata + download the image to OUR cache (data/art_cache) so the homepage never
  waits on or hotlinks the museum. Bulletproof: bad object/image falls through candidates;
  a failed day keeps the last piece (room never empty). Injectable HTTP for tests.
- Schema: art_pool + daily_art. /api/art/today (edge-cacheable) + /api/art/image/{id}
  (served from cache, immutable). CLI `art [--harvest] [--force]` + a non-fatal cycle step.
- Tests (5, mocked HTTP) + verified live against the Met: harvested 1641 works,
  picked/cached "Repose" by John White Alexander. 371 tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-21 14:50:20 -04:00
parent 0c68c22221
commit 308516a263
6 changed files with 341 additions and 2 deletions
+75
View File
@@ -0,0 +1,75 @@
"""Daily Art: curated harvest, bulletproof daily pick (skips non-public-domain / bad
images, falls through candidates), local image cache, and never-empty get_today."""
import pytest
from goodnews import art
from goodnews.db import connect, init_db
OBJECTS = {
1: {"objectID": 1, "isPublicDomain": True, "title": "Sunflowers", "artistDisplayName": "Van Gogh",
"objectDate": "1887", "medium": "Oil on canvas", "department": "European Paintings",
"creditLine": "Gift", "objectURL": "https://met/1",
"primaryImageSmall": "https://img/1-web.jpg", "primaryImage": "https://img/1.jpg"},
2: {"objectID": 2, "isPublicDomain": False, "primaryImageSmall": "https://img/2.jpg"}, # not CC0 -> skip
3: {"objectID": 3, "isPublicDomain": True, "title": "Irises", "artistDisplayName": "Van Gogh",
"primaryImageSmall": "https://img/3-web.jpg"},
}
def _fake_json(url, timeout=20):
if "/search" in url:
return {"total": 3, "objectIDs": [1, 2, 3]}
if "/objects/" in url:
return OBJECTS[int(url.rstrip("/").split("/")[-1])]
raise AssertionError(url)
def _fake_bytes(url, timeout=30):
return (b"\xff\xd8\xff" + b"x" * 5000, "image/jpeg") # a valid-looking jpeg
@pytest.fixture
def conn(tmp_path, monkeypatch):
monkeypatch.setenv("GOODNEWS_ART_CACHE", str(tmp_path / "art"))
monkeypatch.setattr(art, "_http_json", _fake_json)
monkeypatch.setattr(art, "_http_bytes", _fake_bytes)
c = connect(":memory:"); init_db(c)
yield c
c.close()
def test_harvest_dedupes_into_pool(conn):
r = art.harvest_pool(conn)
assert r["pool"] == 3 and r["added"] == 3
assert art.harvest_pool(conn)["added"] == 0 # idempotent
def test_pick_caches_image_metadata_and_marks_shown(conn):
art.harvest_pool(conn)
a = art.pick_daily(conn, art_date="2026-06-21")
assert a and a["object_id"] in (1, 3) and a["title"] in ("Sunflowers", "Irises")
assert a["artist"] == "Van Gogh" and a["image_file"]
assert list(art.cache_dir().glob(f"{a['object_id']}.*")) # image cached locally
shown = conn.execute("SELECT shown_at FROM art_pool WHERE object_id=?", (a["object_id"],)).fetchone()[0]
assert shown == "2026-06-21"
def test_pick_skips_non_public_domain(conn):
conn.execute("INSERT INTO art_pool (source, object_id) VALUES ('met', 2)") # only the non-CC0 one
conn.commit()
assert art.pick_daily(conn, art_date="2026-06-21") is None # nothing fetched, not an error
def test_pick_is_idempotent_and_get_today_never_empty(conn):
art.harvest_pool(conn)
a1 = art.pick_daily(conn, art_date="2026-06-21")
a2 = art.pick_daily(conn, art_date="2026-06-21") # same day -> unchanged
assert a1["object_id"] == a2["object_id"]
assert art.get_today(conn, "2026-06-21")["object_id"] == a1["object_id"]
# an unknown date falls back to the most recent cached piece (room never empty)
assert art.get_today(conn, "2099-01-01")["object_id"] == a1["object_id"]
def test_run_daily_bootstraps_pool_then_picks(conn):
r = art.run_daily(conn)
assert r["pool"] == 3 and r["picked_object"] in (1, 3)