Daily Art engine: museum-guide blurb (grounded LLM) + extracted palette

- daily_art gains blurb + palette columns (idempotent migration).
- art._palette: Pillow median-cut to ~5 hex colors from the cached image (best-
  effort → [] on any failure). art._blurb: a warm 2-3 sentence "what you're
  looking at" note grounded in the Met catalogue (title/artist/bio/date/medium/
  classification/culture/tags). Prompt leans on context/significance and the
  title+tags for subject — explicitly NOT asserting literal composition (figure
  counts/poses) it can't see, since the model can't view the image. Markdown
  stripped from the output.
- pick_daily generates both (client optional → blurb skipped when absent); cycle
  + art CLI pass an LLM client. /api/art/today exposes blurb + palette.
- Backfilled the last 3 days on host (Veteran / Magnolia Vase / Bierstadt).
- scripts/art_blurb_palette_backfill.py for in-place backfill (no re-pick).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-23 20:12:54 -04:00
parent 79ecb800af
commit ed814c97b9
7 changed files with 172 additions and 10 deletions
+2
View File
@@ -2294,6 +2294,8 @@ def create_app() -> FastAPI:
"source": a["source"], "museum": museums.get(a["source"], a["source"]), "source": a["source"], "museum": museums.get(a["source"], a["source"]),
"is_public_domain": bool(a["is_public_domain"]), "is_public_domain": bool(a["is_public_domain"]),
"license": "Public Domain (CC0)" if a["is_public_domain"] else None, "license": "Public Domain (CC0)" if a["is_public_domain"] else None,
"blurb": a.get("blurb"),
"palette": json.loads(a["palette"]) if a.get("palette") else [],
"image_url": f"/api/art/image/{a['object_id']}", "image_url": f"/api/art/image/{a['object_id']}",
"image_url_large": f"/api/art/image/{a['object_id']}?size=full", "image_url_large": f"/api/art/image/{a['object_id']}?size=full",
} }
+73 -8
View File
@@ -132,6 +132,69 @@ def _download_image(obj: dict, object_id: int) -> str | None:
return display return display
def _palette(image_path: "Path", n: int = 5) -> list[str]:
"""Extract ~n representative hex colors from the cached image (for the 'colors in this
piece' strip). Best-effort: any failure → empty list (the strip just hides)."""
try:
from PIL import Image
with Image.open(image_path) as im:
im = im.convert("RGB")
im.thumbnail((120, 120)) # tiny — palette, not fidelity
# Adaptive median-cut to a small palette, then order by how much of the image each covers.
q = im.quantize(colors=max(n * 2, 8), method=Image.Quantize.MEDIANCUT)
pal = q.getpalette()
counts = sorted(q.getcolors(), reverse=True) # [(count, index), ...] most-used first
out, seen = [], set()
for _count, idx in counts:
r, g, b = pal[idx * 3], pal[idx * 3 + 1], pal[idx * 3 + 2]
hexc = f"#{r:02x}{g:02x}{b:02x}"
if hexc in seen:
continue
seen.add(hexc)
out.append(hexc)
if len(out) >= n:
break
return out
except Exception: # noqa: BLE001 — palette is decorative; never break the pick
return []
_BLURB_SYSTEM = (
"You are the calm, knowledgeable curator of a daily-art feature for a general audience — "
"people who enjoy a beautiful painting but aren't art historians. In 2 to 3 warm, plain "
"sentences, help them appreciate the piece and why it's worth a moment: its mood, the "
"artist, the era or movement, and a little real context or significance.\n"
"GROUNDING (important): the catalogue details below — especially the title and the "
"'Depicts' tags — are your only reliable guide to the SUBJECT. You cannot actually see the "
"image, so do NOT assert literal visual specifics you can't verify: do not state how many "
"figures are shown, their exact poses or actions, colors, or background details. Lean on "
"what's certain (title, tags, medium, date, artist, movement) and on feeling/significance. "
"If you don't recognize the exact work, stay general and contextual rather than inventing. "
"No preamble, no title repetition, no hype, no markdown — just the note."
)
def _blurb(client, obj: dict) -> str | None:
"""A short 'museum guide' note for the piece, grounded in the Met catalogue metadata.
Best-effort + cached by the caller; returns None on any trouble."""
tags = ", ".join(t.get("term", "") for t in (obj.get("tags") or []) if t.get("term"))[:200]
facts = "\n".join(f"{k}: {v}" for k, v in (
("Title", obj.get("title")), ("Artist", obj.get("artistDisplayName")),
("Artist bio", obj.get("artistDisplayBio")), ("Date", obj.get("objectDate")),
("Medium", obj.get("medium")), ("Type", obj.get("objectName")),
("Classification", obj.get("classification")), ("Culture", obj.get("culture")),
("Period", obj.get("period")), ("Depicts", tags),
) if v)
user = f"Catalogue details:\n{facts}\n\nWrite the note."
try:
out = client.chat_text([{"role": "system", "content": _BLURB_SYSTEM},
{"role": "user", "content": user}]) or ""
except Exception: # noqa: BLE001
return None
out = " ".join(out.replace("*", "").replace("_", " ").split()).strip()[:600] # no stray markdown
return out or None
def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[int]: def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[int]:
"""The N least-recently-shown pool IDs, rotated deterministically by the date so the """The N least-recently-shown pool IDs, rotated deterministically by the date so the
same piece shows for everyone that day and pieces don't repeat soon.""" same piece shows for everyone that day and pieces don't repeat soon."""
@@ -148,7 +211,7 @@ def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[in
def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: str = "met", def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: str = "met",
force: bool = False) -> dict | None: force: bool = False, client=None) -> dict | None:
"""Pick + cache the day's art. Idempotent (skips if today's already done unless force). """Pick + cache the day's art. Idempotent (skips if today's already done unless force).
Tries successive candidates so a bad object/image never breaks the day; returns the Tries successive candidates so a bad object/image never breaks the day; returns the
stored row, or None if nothing could be fetched (caller keeps the prior day's piece).""" stored row, or None if nothing could be fetched (caller keeps the prior day's piece)."""
@@ -166,21 +229,23 @@ def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: st
fname = _download_image(obj, oid) fname = _download_image(obj, oid)
if not fname: if not fname:
continue continue
# All network work is done above; only now do we open a brief write txn + commit. # All network/LLM/compute is done up front; only then a brief write txn + commit.
palette = json.dumps(_palette(cache_dir() / fname)) or None
blurb = _blurb(client, obj) if client else None
conn.execute( conn.execute(
"INSERT INTO daily_art (art_date, source, object_id, title, artist, date_text, medium, " "INSERT INTO daily_art (art_date, source, object_id, title, artist, date_text, medium, "
"department, credit, source_url, image_file, image_url_full, is_public_domain) " "department, credit, source_url, image_file, image_url_full, is_public_domain, blurb, palette) "
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
"ON CONFLICT(art_date) DO UPDATE SET object_id=excluded.object_id, title=excluded.title, " "ON CONFLICT(art_date) DO UPDATE SET object_id=excluded.object_id, title=excluded.title, "
"artist=excluded.artist, date_text=excluded.date_text, medium=excluded.medium, " "artist=excluded.artist, date_text=excluded.date_text, medium=excluded.medium, "
"department=excluded.department, credit=excluded.credit, source_url=excluded.source_url, " "department=excluded.department, credit=excluded.credit, source_url=excluded.source_url, "
"image_file=excluded.image_file, image_url_full=excluded.image_url_full, " "image_file=excluded.image_file, image_url_full=excluded.image_url_full, "
"is_public_domain=excluded.is_public_domain", "is_public_domain=excluded.is_public_domain, blurb=excluded.blurb, palette=excluded.palette",
(art_date, source, oid, obj.get("title") or "Untitled", (art_date, source, oid, obj.get("title") or "Untitled",
obj.get("artistDisplayName") or None, obj.get("objectDate") or None, obj.get("artistDisplayName") or None, obj.get("objectDate") or None,
obj.get("medium") or None, obj.get("department") or None, obj.get("medium") or None, obj.get("department") or None,
obj.get("creditLine") or None, obj.get("objectURL") or None, fname, obj.get("creditLine") or None, obj.get("objectURL") or None, fname,
obj.get("primaryImage") or None, 1 if obj.get("isPublicDomain") else 0), obj.get("primaryImage") or None, 1 if obj.get("isPublicDomain") else 0, blurb, palette),
) )
conn.execute("UPDATE art_pool SET shown_at=? WHERE source=? AND object_id=?", conn.execute("UPDATE art_pool SET shown_at=? WHERE source=? AND object_id=?",
(art_date, source, oid)) (art_date, source, oid))
@@ -199,13 +264,13 @@ def get_today(conn: sqlite3.Connection, art_date: str | None = None) -> dict | N
return dict(row) if row else None return dict(row) if row else None
def run_daily(conn: sqlite3.Connection, source: str = "met") -> dict: def run_daily(conn: sqlite3.Connection, source: str = "met", client=None) -> dict:
"""Cycle entry point: ensure the pool exists, then ensure today has a piece. Bounded """Cycle entry point: ensure the pool exists, then ensure today has a piece. Bounded
and non-fatal — safe to call every cycle (it no-ops once the day is picked).""" and non-fatal — safe to call every cycle (it no-ops once the day is picked)."""
pool = conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0] pool = conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0]
harvested = None harvested = None
if pool == 0: if pool == 0:
harvested = harvest_pool(conn, source=source) harvested = harvest_pool(conn, source=source)
picked = pick_daily(conn, source=source) picked = pick_daily(conn, source=source, client=client)
return {"pool": conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0], return {"pool": conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0],
"harvested": harvested, "picked_object": picked.get("object_id") if picked else None} "harvested": harvested, "picked_object": picked.get("object_id") if picked else None}
+2 -2
View File
@@ -319,7 +319,7 @@ def main() -> None:
if args.harvest: if args.harvest:
h = art.harvest_pool(conn) h = art.harvest_pool(conn)
print(f"art harvest: found={h['found']} added={h['added']} pool={h['pool']} errors={h['errors']}") print(f"art harvest: found={h['found']} added={h['added']} pool={h['pool']} errors={h['errors']}")
picked = art.pick_daily(conn, force=args.force) picked = art.pick_daily(conn, force=args.force, client=LocalModelClient.from_env())
if picked: if picked:
print(f"art pick: {picked['art_date']} -> #{picked['object_id']} " print(f"art pick: {picked['art_date']} -> #{picked['object_id']} "
f"\"{picked['title']}\"{picked['artist'] or 'Unknown'}") f"\"{picked['title']}\"{picked['artist'] or 'Unknown'}")
@@ -556,7 +556,7 @@ def _run_cycle_locked(conn: sqlite3.Connection, args: argparse.Namespace) -> Non
# once the day is picked; non-fatal like every other step. # once the day is picked; non-fatal like every other step.
if not args.no_art: if not args.no_art:
try: try:
a = art.run_daily(conn) a = art.run_daily(conn, client=LocalModelClient.from_env()) # client → the guide blurb
print(f"art: pool={a['pool']} picked={a['picked_object']}") print(f"art: pool={a['pool']} picked={a['picked_object']}")
except Exception as exc: except Exception as exc:
print(f"art: skipped ({exc})") print(f"art: skipped ({exc})")
+5
View File
@@ -273,6 +273,8 @@ CREATE TABLE IF NOT EXISTS daily_art (
image_file TEXT, -- our cached (web-large) image image_file TEXT, -- our cached (web-large) image
image_url_full TEXT, -- source full-res URL, for a later richer /art view image_url_full TEXT, -- source full-res URL, for a later richer /art view
is_public_domain INTEGER, -- license marker (CC0/public domain), stored for citizenship is_public_domain INTEGER, -- license marker (CC0/public domain), stored for citizenship
blurb TEXT, -- LLM "museum guide" note: what you're looking at (cached)
palette TEXT, -- JSON array of hex colors extracted from the image
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
); );
@@ -640,6 +642,9 @@ def _migrate(conn: sqlite3.Connection) -> None:
conn.execute("ALTER TABLE daily_art ADD COLUMN image_url_full TEXT") conn.execute("ALTER TABLE daily_art ADD COLUMN image_url_full TEXT")
if art_cols and "is_public_domain" not in art_cols: if art_cols and "is_public_domain" not in art_cols:
conn.execute("ALTER TABLE daily_art ADD COLUMN is_public_domain INTEGER") conn.execute("ALTER TABLE daily_art ADD COLUMN is_public_domain INTEGER")
for column in ("blurb", "palette"): # richer /art page: guide note + extracted colors
if art_cols and column not in art_cols:
conn.execute(f"ALTER TABLE daily_art ADD COLUMN {column} TEXT")
# feedback.read_at (admin inbox read/unread) added later. # feedback.read_at (admin inbox read/unread) added later.
fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")} fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")}
+34
View File
@@ -0,0 +1,34 @@
"""One-off: migrate daily_art (blurb/palette) and backfill the most recent picks in place
— re-fetch Met metadata for the guide blurb, extract the palette from the cached image —
WITHOUT re-picking (keeps each day's existing piece). Run on the host with LLM env sourced."""
import json
import os
from goodnews import art
from goodnews.db import connect, init_db
from goodnews.llm import LocalModelClient
conn = connect(os.environ.get("GOODNEWS_DB", "data/goodnews.sqlite3"))
init_db(conn) # idempotent migration: adds blurb/palette
client = LocalModelClient.from_env()
rows = conn.execute(
"SELECT art_date, object_id, image_file, title FROM daily_art "
"WHERE blurb IS NULL OR palette IS NULL ORDER BY art_date DESC LIMIT 8"
).fetchall()
print(f"rows to backfill: {len(rows)}")
for r in rows:
img = art.cache_dir() / r["image_file"] if r["image_file"] else None
palette = json.dumps(art._palette(img)) if (img and img.exists()) else "[]"
blurb = None
try:
blurb = art._blurb(client, art._object(r["object_id"]))
except Exception as exc: # noqa: BLE001
print(f" blurb fetch failed for {r['object_id']}: {exc}")
conn.execute("UPDATE daily_art SET blurb=COALESCE(?, blurb), palette=? WHERE art_date=?",
(blurb, palette, r["art_date"]))
conn.commit()
print(f"\n{r['art_date']} · #{r['object_id']} · {r['title']}")
print(f" palette: {palette}")
print(f" blurb: {blurb}")
conn.close()
+52
View File
@@ -44,6 +44,58 @@ def test_harvest_dedupes_into_pool(conn):
assert art.harvest_pool(conn)["added"] == 0 # idempotent assert art.harvest_pool(conn)["added"] == 0 # idempotent
def test_palette_extracts_hex_colors(tmp_path):
from PIL import Image
p = tmp_path / "img.png"
im = Image.new("RGB", (60, 60), (200, 30, 30)) # mostly red...
for x in range(60):
for y in range(30):
im.putpixel((x, y), (30, 150, 70)) # ...top half green
im.save(p)
cols = art._palette(p, n=3)
assert 1 <= len(cols) <= 3
assert all(c.startswith("#") and len(c) == 7 for c in cols)
def test_palette_bad_image_is_empty(tmp_path):
p = tmp_path / "bad.jpg"
p.write_bytes(b"\xff\xd8\xff" + b"x" * 500) # not a decodable image
assert art._palette(p) == []
class _FakeClient:
def __init__(self, text="A quiet wheat field at dusk."):
self.text, self.seen = text, None
def chat_text(self, messages):
self.seen = messages
return self.text
def test_blurb_grounds_in_metadata_and_cleans():
c = _FakeClient(" A returning soldier in a golden field. \n")
out = art._blurb(c, {"title": "The Veteran", "artistDisplayName": "Homer",
"medium": "Oil on canvas", "tags": [{"term": "wheat"}, {"term": "scythe"}]})
assert out == "A returning soldier in a golden field."
user = c.seen[-1]["content"]
assert "Homer" in user and "Oil on canvas" in user and "wheat" in user # catalogue facts fed in
def test_blurb_none_on_error_or_empty():
class Bad:
def chat_text(self, m): raise RuntimeError("down")
assert art._blurb(Bad(), {"title": "X"}) is None
assert art._blurb(_FakeClient(" "), {"title": "X"}) is None
def test_pick_stores_blurb_and_palette(conn):
art.harvest_pool(conn)
a = art.pick_daily(conn, art_date="2026-06-21", client=_FakeClient("A quiet masterwork."))
assert a["blurb"] == "A quiet masterwork."
assert a["palette"] == "[]" # fixture image isn't decodable → empty palette, stored as JSON
b = art.pick_daily(conn, art_date="2026-06-22") # no client → no blurb, pick still succeeds
assert b["blurb"] is None
def test_pick_caches_image_metadata_and_marks_shown(conn): def test_pick_caches_image_metadata_and_marks_shown(conn):
art.harvest_pool(conn) art.harvest_pool(conn)
a = art.pick_daily(conn, art_date="2026-06-21") a = art.pick_daily(conn, art_date="2026-06-21")
+4
View File
@@ -23,6 +23,8 @@ def client(tmp_path, monkeypatch):
"VALUES ('2026-06-21','met',10154,'Lander''s Peak','Bierstadt','1863','Oil','Paintings'," "VALUES ('2026-06-21','met',10154,'Lander''s Peak','Bierstadt','1863','Oil','Paintings',"
"'Gift','https://met/10154','10154.jpg','https://met/full.jpg',1)" "'Gift','https://met/10154','10154.jpg','https://met/full.jpg',1)"
) )
c.execute("UPDATE daily_art SET blurb=?, palette=? WHERE object_id=10154",
("A luminous western vista.", '["#7fb4cf", "#c79a3c"]'))
c.commit(); c.close() c.commit(); c.close()
cache.mkdir(parents=True, exist_ok=True) cache.mkdir(parents=True, exist_ok=True)
(cache / "10154.jpg").write_bytes(b"\xff\xd8\xff" + b"x" * 5000) # web-large display copy (cache / "10154.jpg").write_bytes(b"\xff\xd8\xff" + b"x" * 5000) # web-large display copy
@@ -43,3 +45,5 @@ def test_today_exposes_full_res_url(client):
assert a["image_url"] == "/api/art/image/10154" assert a["image_url"] == "/api/art/image/10154"
assert a["image_url_large"] == "/api/art/image/10154?size=full" assert a["image_url_large"] == "/api/art/image/10154?size=full"
assert a["license"] == "Public Domain (CC0)" and a["museum"] == "The Met" assert a["license"] == "Public Domain (CC0)" and a["museum"] == "The Met"
assert a["blurb"] == "A luminous western vista."
assert a["palette"] == ["#7fb4cf", "#c79a3c"] # parsed from stored JSON