ed814c97b9
- daily_art gains blurb + palette columns (idempotent migration). - art._palette: Pillow median-cut to ~5 hex colors from the cached image (best- effort → [] on any failure). art._blurb: a warm 2-3 sentence "what you're looking at" note grounded in the Met catalogue (title/artist/bio/date/medium/ classification/culture/tags). Prompt leans on context/significance and the title+tags for subject — explicitly NOT asserting literal composition (figure counts/poses) it can't see, since the model can't view the image. Markdown stripped from the output. - pick_daily generates both (client optional → blurb skipped when absent); cycle + art CLI pass an LLM client. /api/art/today exposes blurb + palette. - Backfilled the last 3 days on host (Veteran / Magnolia Vase / Bierstadt). - scripts/art_blurb_palette_backfill.py for in-place backfill (no re-pick). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
148 lines
6.1 KiB
Python
148 lines
6.1 KiB
Python
"""Daily Art: curated harvest, bulletproof daily pick (skips non-public-domain / bad
|
|
images, falls through candidates), local image cache, and never-empty get_today."""
|
|
import pytest
|
|
|
|
from goodnews import art
|
|
from goodnews.db import connect, init_db
|
|
|
|
OBJECTS = {
|
|
1: {"objectID": 1, "isPublicDomain": True, "title": "Sunflowers", "artistDisplayName": "Van Gogh",
|
|
"objectDate": "1887", "medium": "Oil on canvas", "department": "European Paintings",
|
|
"creditLine": "Gift", "objectURL": "https://met/1",
|
|
"primaryImageSmall": "https://img/1-web.jpg", "primaryImage": "https://img/1.jpg"},
|
|
2: {"objectID": 2, "isPublicDomain": False, "primaryImageSmall": "https://img/2.jpg"}, # not CC0 -> skip
|
|
3: {"objectID": 3, "isPublicDomain": True, "title": "Irises", "artistDisplayName": "Van Gogh",
|
|
"primaryImageSmall": "https://img/3-web.jpg"},
|
|
}
|
|
|
|
|
|
def _fake_json(url, timeout=20):
|
|
if "/search" in url:
|
|
return {"total": 3, "objectIDs": [1, 2, 3]}
|
|
if "/objects/" in url:
|
|
return OBJECTS[int(url.rstrip("/").split("/")[-1])]
|
|
raise AssertionError(url)
|
|
|
|
|
|
def _fake_bytes(url, timeout=30):
|
|
return (b"\xff\xd8\xff" + b"x" * 5000, "image/jpeg") # a valid-looking jpeg
|
|
|
|
|
|
@pytest.fixture
|
|
def conn(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("GOODNEWS_ART_CACHE", str(tmp_path / "art"))
|
|
monkeypatch.setattr(art, "_http_json", _fake_json)
|
|
monkeypatch.setattr(art, "_http_bytes", _fake_bytes)
|
|
c = connect(":memory:"); init_db(c)
|
|
yield c
|
|
c.close()
|
|
|
|
|
|
def test_harvest_dedupes_into_pool(conn):
|
|
r = art.harvest_pool(conn)
|
|
assert r["pool"] == 3 and r["added"] == 3
|
|
assert art.harvest_pool(conn)["added"] == 0 # idempotent
|
|
|
|
|
|
def test_palette_extracts_hex_colors(tmp_path):
|
|
from PIL import Image
|
|
p = tmp_path / "img.png"
|
|
im = Image.new("RGB", (60, 60), (200, 30, 30)) # mostly red...
|
|
for x in range(60):
|
|
for y in range(30):
|
|
im.putpixel((x, y), (30, 150, 70)) # ...top half green
|
|
im.save(p)
|
|
cols = art._palette(p, n=3)
|
|
assert 1 <= len(cols) <= 3
|
|
assert all(c.startswith("#") and len(c) == 7 for c in cols)
|
|
|
|
|
|
def test_palette_bad_image_is_empty(tmp_path):
|
|
p = tmp_path / "bad.jpg"
|
|
p.write_bytes(b"\xff\xd8\xff" + b"x" * 500) # not a decodable image
|
|
assert art._palette(p) == []
|
|
|
|
|
|
class _FakeClient:
|
|
def __init__(self, text="A quiet wheat field at dusk."):
|
|
self.text, self.seen = text, None
|
|
def chat_text(self, messages):
|
|
self.seen = messages
|
|
return self.text
|
|
|
|
|
|
def test_blurb_grounds_in_metadata_and_cleans():
|
|
c = _FakeClient(" A returning soldier in a golden field. \n")
|
|
out = art._blurb(c, {"title": "The Veteran", "artistDisplayName": "Homer",
|
|
"medium": "Oil on canvas", "tags": [{"term": "wheat"}, {"term": "scythe"}]})
|
|
assert out == "A returning soldier in a golden field."
|
|
user = c.seen[-1]["content"]
|
|
assert "Homer" in user and "Oil on canvas" in user and "wheat" in user # catalogue facts fed in
|
|
|
|
|
|
def test_blurb_none_on_error_or_empty():
|
|
class Bad:
|
|
def chat_text(self, m): raise RuntimeError("down")
|
|
assert art._blurb(Bad(), {"title": "X"}) is None
|
|
assert art._blurb(_FakeClient(" "), {"title": "X"}) is None
|
|
|
|
|
|
def test_pick_stores_blurb_and_palette(conn):
|
|
art.harvest_pool(conn)
|
|
a = art.pick_daily(conn, art_date="2026-06-21", client=_FakeClient("A quiet masterwork."))
|
|
assert a["blurb"] == "A quiet masterwork."
|
|
assert a["palette"] == "[]" # fixture image isn't decodable → empty palette, stored as JSON
|
|
b = art.pick_daily(conn, art_date="2026-06-22") # no client → no blurb, pick still succeeds
|
|
assert b["blurb"] is None
|
|
|
|
|
|
def test_pick_caches_image_metadata_and_marks_shown(conn):
|
|
art.harvest_pool(conn)
|
|
a = art.pick_daily(conn, art_date="2026-06-21")
|
|
assert a and a["object_id"] in (1, 3) and a["title"] in ("Sunflowers", "Irises")
|
|
assert a["artist"] == "Van Gogh" and a["image_file"]
|
|
assert a["is_public_domain"] == 1 # license marker stored
|
|
assert list(art.cache_dir().glob(f"{a['object_id']}.*")) # image cached locally
|
|
assert not list(art.cache_dir().glob("*.tmp")) # atomic write left no temp file
|
|
|
|
|
|
def test_pick_caches_full_res_for_lightbox(conn):
|
|
conn.execute("INSERT INTO art_pool (source, object_id) VALUES ('met', 1)") # has distinct primaryImage
|
|
conn.commit()
|
|
a = art.pick_daily(conn, art_date="2026-06-21")
|
|
assert a and a["object_id"] == 1
|
|
assert list(art.cache_dir().glob("1.*")) # web-large display copy
|
|
assert list(art.cache_dir().glob("1-full.*")) # hi-res copy for the zoom
|
|
assert not list(art.cache_dir().glob("*.tmp"))
|
|
|
|
|
|
def test_blocked_pieces_are_never_picked(conn):
|
|
art.harvest_pool(conn)
|
|
conn.execute("UPDATE art_pool SET blocked=1 WHERE object_id=1") # block the good one
|
|
conn.commit()
|
|
a = art.pick_daily(conn, art_date="2026-06-21")
|
|
assert a is None or a["object_id"] != 1 # never the blocked piece
|
|
shown = conn.execute("SELECT shown_at FROM art_pool WHERE object_id=?", (a["object_id"],)).fetchone()[0]
|
|
assert shown == "2026-06-21"
|
|
|
|
|
|
def test_pick_skips_non_public_domain(conn):
|
|
conn.execute("INSERT INTO art_pool (source, object_id) VALUES ('met', 2)") # only the non-CC0 one
|
|
conn.commit()
|
|
assert art.pick_daily(conn, art_date="2026-06-21") is None # nothing fetched, not an error
|
|
|
|
|
|
def test_pick_is_idempotent_and_get_today_never_empty(conn):
|
|
art.harvest_pool(conn)
|
|
a1 = art.pick_daily(conn, art_date="2026-06-21")
|
|
a2 = art.pick_daily(conn, art_date="2026-06-21") # same day -> unchanged
|
|
assert a1["object_id"] == a2["object_id"]
|
|
assert art.get_today(conn, "2026-06-21")["object_id"] == a1["object_id"]
|
|
# an unknown date falls back to the most recent cached piece (room never empty)
|
|
assert art.get_today(conn, "2099-01-01")["object_id"] == a1["object_id"]
|
|
|
|
|
|
def test_run_daily_bootstraps_pool_then_picks(conn):
|
|
r = art.run_daily(conn)
|
|
assert r["pool"] == 3 and r["picked_object"] in (1, 3)
|