Files
thejayman77 ed814c97b9 Daily Art engine: museum-guide blurb (grounded LLM) + extracted palette
- daily_art gains blurb + palette columns (idempotent migration).
- art._palette: Pillow median-cut to ~5 hex colors from the cached image (best-
  effort → [] on any failure). art._blurb: a warm 2-3 sentence "what you're
  looking at" note grounded in the Met catalogue (title/artist/bio/date/medium/
  classification/culture/tags). Prompt leans on context/significance and the
  title+tags for subject — explicitly NOT asserting literal composition (figure
  counts/poses) it can't see, since the model can't view the image. Markdown
  stripped from the output.
- pick_daily generates both (client optional → blurb skipped when absent); cycle
  + art CLI pass an LLM client. /api/art/today exposes blurb + palette.
- Backfilled the last 3 days on host (Veteran / Magnolia Vase / Bierstadt).
- scripts/art_blurb_palette_backfill.py for in-place backfill (no re-pick).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-23 20:12:54 -04:00

148 lines
6.1 KiB
Python

"""Daily Art: curated harvest, bulletproof daily pick (skips non-public-domain / bad
images, falls through candidates), local image cache, and never-empty get_today."""
import pytest
from goodnews import art
from goodnews.db import connect, init_db
OBJECTS = {
1: {"objectID": 1, "isPublicDomain": True, "title": "Sunflowers", "artistDisplayName": "Van Gogh",
"objectDate": "1887", "medium": "Oil on canvas", "department": "European Paintings",
"creditLine": "Gift", "objectURL": "https://met/1",
"primaryImageSmall": "https://img/1-web.jpg", "primaryImage": "https://img/1.jpg"},
2: {"objectID": 2, "isPublicDomain": False, "primaryImageSmall": "https://img/2.jpg"}, # not CC0 -> skip
3: {"objectID": 3, "isPublicDomain": True, "title": "Irises", "artistDisplayName": "Van Gogh",
"primaryImageSmall": "https://img/3-web.jpg"},
}
def _fake_json(url, timeout=20):
if "/search" in url:
return {"total": 3, "objectIDs": [1, 2, 3]}
if "/objects/" in url:
return OBJECTS[int(url.rstrip("/").split("/")[-1])]
raise AssertionError(url)
def _fake_bytes(url, timeout=30):
return (b"\xff\xd8\xff" + b"x" * 5000, "image/jpeg") # a valid-looking jpeg
@pytest.fixture
def conn(tmp_path, monkeypatch):
monkeypatch.setenv("GOODNEWS_ART_CACHE", str(tmp_path / "art"))
monkeypatch.setattr(art, "_http_json", _fake_json)
monkeypatch.setattr(art, "_http_bytes", _fake_bytes)
c = connect(":memory:"); init_db(c)
yield c
c.close()
def test_harvest_dedupes_into_pool(conn):
r = art.harvest_pool(conn)
assert r["pool"] == 3 and r["added"] == 3
assert art.harvest_pool(conn)["added"] == 0 # idempotent
def test_palette_extracts_hex_colors(tmp_path):
from PIL import Image
p = tmp_path / "img.png"
im = Image.new("RGB", (60, 60), (200, 30, 30)) # mostly red...
for x in range(60):
for y in range(30):
im.putpixel((x, y), (30, 150, 70)) # ...top half green
im.save(p)
cols = art._palette(p, n=3)
assert 1 <= len(cols) <= 3
assert all(c.startswith("#") and len(c) == 7 for c in cols)
def test_palette_bad_image_is_empty(tmp_path):
p = tmp_path / "bad.jpg"
p.write_bytes(b"\xff\xd8\xff" + b"x" * 500) # not a decodable image
assert art._palette(p) == []
class _FakeClient:
def __init__(self, text="A quiet wheat field at dusk."):
self.text, self.seen = text, None
def chat_text(self, messages):
self.seen = messages
return self.text
def test_blurb_grounds_in_metadata_and_cleans():
c = _FakeClient(" A returning soldier in a golden field. \n")
out = art._blurb(c, {"title": "The Veteran", "artistDisplayName": "Homer",
"medium": "Oil on canvas", "tags": [{"term": "wheat"}, {"term": "scythe"}]})
assert out == "A returning soldier in a golden field."
user = c.seen[-1]["content"]
assert "Homer" in user and "Oil on canvas" in user and "wheat" in user # catalogue facts fed in
def test_blurb_none_on_error_or_empty():
class Bad:
def chat_text(self, m): raise RuntimeError("down")
assert art._blurb(Bad(), {"title": "X"}) is None
assert art._blurb(_FakeClient(" "), {"title": "X"}) is None
def test_pick_stores_blurb_and_palette(conn):
art.harvest_pool(conn)
a = art.pick_daily(conn, art_date="2026-06-21", client=_FakeClient("A quiet masterwork."))
assert a["blurb"] == "A quiet masterwork."
assert a["palette"] == "[]" # fixture image isn't decodable → empty palette, stored as JSON
b = art.pick_daily(conn, art_date="2026-06-22") # no client → no blurb, pick still succeeds
assert b["blurb"] is None
def test_pick_caches_image_metadata_and_marks_shown(conn):
art.harvest_pool(conn)
a = art.pick_daily(conn, art_date="2026-06-21")
assert a and a["object_id"] in (1, 3) and a["title"] in ("Sunflowers", "Irises")
assert a["artist"] == "Van Gogh" and a["image_file"]
assert a["is_public_domain"] == 1 # license marker stored
assert list(art.cache_dir().glob(f"{a['object_id']}.*")) # image cached locally
assert not list(art.cache_dir().glob("*.tmp")) # atomic write left no temp file
def test_pick_caches_full_res_for_lightbox(conn):
conn.execute("INSERT INTO art_pool (source, object_id) VALUES ('met', 1)") # has distinct primaryImage
conn.commit()
a = art.pick_daily(conn, art_date="2026-06-21")
assert a and a["object_id"] == 1
assert list(art.cache_dir().glob("1.*")) # web-large display copy
assert list(art.cache_dir().glob("1-full.*")) # hi-res copy for the zoom
assert not list(art.cache_dir().glob("*.tmp"))
def test_blocked_pieces_are_never_picked(conn):
art.harvest_pool(conn)
conn.execute("UPDATE art_pool SET blocked=1 WHERE object_id=1") # block the good one
conn.commit()
a = art.pick_daily(conn, art_date="2026-06-21")
assert a is None or a["object_id"] != 1 # never the blocked piece
shown = conn.execute("SELECT shown_at FROM art_pool WHERE object_id=?", (a["object_id"],)).fetchone()[0]
assert shown == "2026-06-21"
def test_pick_skips_non_public_domain(conn):
conn.execute("INSERT INTO art_pool (source, object_id) VALUES ('met', 2)") # only the non-CC0 one
conn.commit()
assert art.pick_daily(conn, art_date="2026-06-21") is None # nothing fetched, not an error
def test_pick_is_idempotent_and_get_today_never_empty(conn):
art.harvest_pool(conn)
a1 = art.pick_daily(conn, art_date="2026-06-21")
a2 = art.pick_daily(conn, art_date="2026-06-21") # same day -> unchanged
assert a1["object_id"] == a2["object_id"]
assert art.get_today(conn, "2026-06-21")["object_id"] == a1["object_id"]
# an unknown date falls back to the most recent cached piece (room never empty)
assert art.get_today(conn, "2099-01-01")["object_id"] == a1["object_id"]
def test_run_daily_bootstraps_pool_then_picks(conn):
r = art.run_daily(conn)
assert r["pool"] == 3 and r["picked_object"] in (1, 3)