diff --git a/goodnews/api.py b/goodnews/api.py index b0fc1a9..16b3af8 100644 --- a/goodnews/api.py +++ b/goodnews/api.py @@ -2294,6 +2294,8 @@ def create_app() -> FastAPI: "source": a["source"], "museum": museums.get(a["source"], a["source"]), "is_public_domain": bool(a["is_public_domain"]), "license": "Public Domain (CC0)" if a["is_public_domain"] else None, + "blurb": a.get("blurb"), + "palette": json.loads(a["palette"]) if a.get("palette") else [], "image_url": f"/api/art/image/{a['object_id']}", "image_url_large": f"/api/art/image/{a['object_id']}?size=full", } diff --git a/goodnews/art.py b/goodnews/art.py index d1047b4..4286bcb 100644 --- a/goodnews/art.py +++ b/goodnews/art.py @@ -132,6 +132,69 @@ def _download_image(obj: dict, object_id: int) -> str | None: return display +def _palette(image_path: "Path", n: int = 5) -> list[str]: + """Extract ~n representative hex colors from the cached image (for the 'colors in this + piece' strip). Best-effort: any failure → empty list (the strip just hides).""" + try: + from PIL import Image + with Image.open(image_path) as im: + im = im.convert("RGB") + im.thumbnail((120, 120)) # tiny — palette, not fidelity + # Adaptive median-cut to a small palette, then order by how much of the image each covers. + q = im.quantize(colors=max(n * 2, 8), method=Image.Quantize.MEDIANCUT) + pal = q.getpalette() + counts = sorted(q.getcolors(), reverse=True) # [(count, index), ...] most-used first + out, seen = [], set() + for _count, idx in counts: + r, g, b = pal[idx * 3], pal[idx * 3 + 1], pal[idx * 3 + 2] + hexc = f"#{r:02x}{g:02x}{b:02x}" + if hexc in seen: + continue + seen.add(hexc) + out.append(hexc) + if len(out) >= n: + break + return out + except Exception: # noqa: BLE001 — palette is decorative; never break the pick + return [] + + +_BLURB_SYSTEM = ( + "You are the calm, knowledgeable curator of a daily-art feature for a general audience — " + "people who enjoy a beautiful painting but aren't art historians. In 2 to 3 warm, plain " + "sentences, help them appreciate the piece and why it's worth a moment: its mood, the " + "artist, the era or movement, and a little real context or significance.\n" + "GROUNDING (important): the catalogue details below — especially the title and the " + "'Depicts' tags — are your only reliable guide to the SUBJECT. You cannot actually see the " + "image, so do NOT assert literal visual specifics you can't verify: do not state how many " + "figures are shown, their exact poses or actions, colors, or background details. Lean on " + "what's certain (title, tags, medium, date, artist, movement) and on feeling/significance. " + "If you don't recognize the exact work, stay general and contextual rather than inventing. " + "No preamble, no title repetition, no hype, no markdown — just the note." +) + + +def _blurb(client, obj: dict) -> str | None: + """A short 'museum guide' note for the piece, grounded in the Met catalogue metadata. + Best-effort + cached by the caller; returns None on any trouble.""" + tags = ", ".join(t.get("term", "") for t in (obj.get("tags") or []) if t.get("term"))[:200] + facts = "\n".join(f"{k}: {v}" for k, v in ( + ("Title", obj.get("title")), ("Artist", obj.get("artistDisplayName")), + ("Artist bio", obj.get("artistDisplayBio")), ("Date", obj.get("objectDate")), + ("Medium", obj.get("medium")), ("Type", obj.get("objectName")), + ("Classification", obj.get("classification")), ("Culture", obj.get("culture")), + ("Period", obj.get("period")), ("Depicts", tags), + ) if v) + user = f"Catalogue details:\n{facts}\n\nWrite the note." + try: + out = client.chat_text([{"role": "system", "content": _BLURB_SYSTEM}, + {"role": "user", "content": user}]) or "" + except Exception: # noqa: BLE001 + return None + out = " ".join(out.replace("*", "").replace("_", " ").split()).strip()[:600] # no stray markdown + return out or None + + def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[int]: """The N least-recently-shown pool IDs, rotated deterministically by the date so the same piece shows for everyone that day and pieces don't repeat soon.""" @@ -148,7 +211,7 @@ def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[in def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: str = "met", - force: bool = False) -> dict | None: + force: bool = False, client=None) -> dict | None: """Pick + cache the day's art. Idempotent (skips if today's already done unless force). Tries successive candidates so a bad object/image never breaks the day; returns the stored row, or None if nothing could be fetched (caller keeps the prior day's piece).""" @@ -166,21 +229,23 @@ def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: st fname = _download_image(obj, oid) if not fname: continue - # All network work is done above; only now do we open a brief write txn + commit. + # All network/LLM/compute is done up front; only then a brief write txn + commit. + palette = json.dumps(_palette(cache_dir() / fname)) or None + blurb = _blurb(client, obj) if client else None conn.execute( "INSERT INTO daily_art (art_date, source, object_id, title, artist, date_text, medium, " - "department, credit, source_url, image_file, image_url_full, is_public_domain) " - "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?) " + "department, credit, source_url, image_file, image_url_full, is_public_domain, blurb, palette) " + "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) " "ON CONFLICT(art_date) DO UPDATE SET object_id=excluded.object_id, title=excluded.title, " "artist=excluded.artist, date_text=excluded.date_text, medium=excluded.medium, " "department=excluded.department, credit=excluded.credit, source_url=excluded.source_url, " "image_file=excluded.image_file, image_url_full=excluded.image_url_full, " - "is_public_domain=excluded.is_public_domain", + "is_public_domain=excluded.is_public_domain, blurb=excluded.blurb, palette=excluded.palette", (art_date, source, oid, obj.get("title") or "Untitled", obj.get("artistDisplayName") or None, obj.get("objectDate") or None, obj.get("medium") or None, obj.get("department") or None, obj.get("creditLine") or None, obj.get("objectURL") or None, fname, - obj.get("primaryImage") or None, 1 if obj.get("isPublicDomain") else 0), + obj.get("primaryImage") or None, 1 if obj.get("isPublicDomain") else 0, blurb, palette), ) conn.execute("UPDATE art_pool SET shown_at=? WHERE source=? AND object_id=?", (art_date, source, oid)) @@ -199,13 +264,13 @@ def get_today(conn: sqlite3.Connection, art_date: str | None = None) -> dict | N return dict(row) if row else None -def run_daily(conn: sqlite3.Connection, source: str = "met") -> dict: +def run_daily(conn: sqlite3.Connection, source: str = "met", client=None) -> dict: """Cycle entry point: ensure the pool exists, then ensure today has a piece. Bounded and non-fatal — safe to call every cycle (it no-ops once the day is picked).""" pool = conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0] harvested = None if pool == 0: harvested = harvest_pool(conn, source=source) - picked = pick_daily(conn, source=source) + picked = pick_daily(conn, source=source, client=client) return {"pool": conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0], "harvested": harvested, "picked_object": picked.get("object_id") if picked else None} diff --git a/goodnews/cli.py b/goodnews/cli.py index 41c8972..dede165 100644 --- a/goodnews/cli.py +++ b/goodnews/cli.py @@ -319,7 +319,7 @@ def main() -> None: if args.harvest: h = art.harvest_pool(conn) print(f"art harvest: found={h['found']} added={h['added']} pool={h['pool']} errors={h['errors']}") - picked = art.pick_daily(conn, force=args.force) + picked = art.pick_daily(conn, force=args.force, client=LocalModelClient.from_env()) if picked: print(f"art pick: {picked['art_date']} -> #{picked['object_id']} " f"\"{picked['title']}\" — {picked['artist'] or 'Unknown'}") @@ -556,7 +556,7 @@ def _run_cycle_locked(conn: sqlite3.Connection, args: argparse.Namespace) -> Non # once the day is picked; non-fatal like every other step. if not args.no_art: try: - a = art.run_daily(conn) + a = art.run_daily(conn, client=LocalModelClient.from_env()) # client → the guide blurb print(f"art: pool={a['pool']} picked={a['picked_object']}") except Exception as exc: print(f"art: skipped ({exc})") diff --git a/goodnews/db.py b/goodnews/db.py index 121cea2..7bae853 100644 --- a/goodnews/db.py +++ b/goodnews/db.py @@ -273,6 +273,8 @@ CREATE TABLE IF NOT EXISTS daily_art ( image_file TEXT, -- our cached (web-large) image image_url_full TEXT, -- source full-res URL, for a later richer /art view is_public_domain INTEGER, -- license marker (CC0/public domain), stored for citizenship + blurb TEXT, -- LLM "museum guide" note: what you're looking at (cached) + palette TEXT, -- JSON array of hex colors extracted from the image created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ); @@ -640,6 +642,9 @@ def _migrate(conn: sqlite3.Connection) -> None: conn.execute("ALTER TABLE daily_art ADD COLUMN image_url_full TEXT") if art_cols and "is_public_domain" not in art_cols: conn.execute("ALTER TABLE daily_art ADD COLUMN is_public_domain INTEGER") + for column in ("blurb", "palette"): # richer /art page: guide note + extracted colors + if art_cols and column not in art_cols: + conn.execute(f"ALTER TABLE daily_art ADD COLUMN {column} TEXT") # feedback.read_at (admin inbox read/unread) added later. fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")} diff --git a/scripts/art_blurb_palette_backfill.py b/scripts/art_blurb_palette_backfill.py new file mode 100644 index 0000000..15b5aaf --- /dev/null +++ b/scripts/art_blurb_palette_backfill.py @@ -0,0 +1,34 @@ +"""One-off: migrate daily_art (blurb/palette) and backfill the most recent picks in place +— re-fetch Met metadata for the guide blurb, extract the palette from the cached image — +WITHOUT re-picking (keeps each day's existing piece). Run on the host with LLM env sourced.""" +import json +import os + +from goodnews import art +from goodnews.db import connect, init_db +from goodnews.llm import LocalModelClient + +conn = connect(os.environ.get("GOODNEWS_DB", "data/goodnews.sqlite3")) +init_db(conn) # idempotent migration: adds blurb/palette +client = LocalModelClient.from_env() + +rows = conn.execute( + "SELECT art_date, object_id, image_file, title FROM daily_art " + "WHERE blurb IS NULL OR palette IS NULL ORDER BY art_date DESC LIMIT 8" +).fetchall() +print(f"rows to backfill: {len(rows)}") +for r in rows: + img = art.cache_dir() / r["image_file"] if r["image_file"] else None + palette = json.dumps(art._palette(img)) if (img and img.exists()) else "[]" + blurb = None + try: + blurb = art._blurb(client, art._object(r["object_id"])) + except Exception as exc: # noqa: BLE001 + print(f" blurb fetch failed for {r['object_id']}: {exc}") + conn.execute("UPDATE daily_art SET blurb=COALESCE(?, blurb), palette=? WHERE art_date=?", + (blurb, palette, r["art_date"])) + conn.commit() + print(f"\n{r['art_date']} · #{r['object_id']} · {r['title']}") + print(f" palette: {palette}") + print(f" blurb: {blurb}") +conn.close() diff --git a/tests/test_art.py b/tests/test_art.py index 8108aa4..9c197f4 100644 --- a/tests/test_art.py +++ b/tests/test_art.py @@ -44,6 +44,58 @@ def test_harvest_dedupes_into_pool(conn): assert art.harvest_pool(conn)["added"] == 0 # idempotent +def test_palette_extracts_hex_colors(tmp_path): + from PIL import Image + p = tmp_path / "img.png" + im = Image.new("RGB", (60, 60), (200, 30, 30)) # mostly red... + for x in range(60): + for y in range(30): + im.putpixel((x, y), (30, 150, 70)) # ...top half green + im.save(p) + cols = art._palette(p, n=3) + assert 1 <= len(cols) <= 3 + assert all(c.startswith("#") and len(c) == 7 for c in cols) + + +def test_palette_bad_image_is_empty(tmp_path): + p = tmp_path / "bad.jpg" + p.write_bytes(b"\xff\xd8\xff" + b"x" * 500) # not a decodable image + assert art._palette(p) == [] + + +class _FakeClient: + def __init__(self, text="A quiet wheat field at dusk."): + self.text, self.seen = text, None + def chat_text(self, messages): + self.seen = messages + return self.text + + +def test_blurb_grounds_in_metadata_and_cleans(): + c = _FakeClient(" A returning soldier in a golden field. \n") + out = art._blurb(c, {"title": "The Veteran", "artistDisplayName": "Homer", + "medium": "Oil on canvas", "tags": [{"term": "wheat"}, {"term": "scythe"}]}) + assert out == "A returning soldier in a golden field." + user = c.seen[-1]["content"] + assert "Homer" in user and "Oil on canvas" in user and "wheat" in user # catalogue facts fed in + + +def test_blurb_none_on_error_or_empty(): + class Bad: + def chat_text(self, m): raise RuntimeError("down") + assert art._blurb(Bad(), {"title": "X"}) is None + assert art._blurb(_FakeClient(" "), {"title": "X"}) is None + + +def test_pick_stores_blurb_and_palette(conn): + art.harvest_pool(conn) + a = art.pick_daily(conn, art_date="2026-06-21", client=_FakeClient("A quiet masterwork.")) + assert a["blurb"] == "A quiet masterwork." + assert a["palette"] == "[]" # fixture image isn't decodable → empty palette, stored as JSON + b = art.pick_daily(conn, art_date="2026-06-22") # no client → no blurb, pick still succeeds + assert b["blurb"] is None + + def test_pick_caches_image_metadata_and_marks_shown(conn): art.harvest_pool(conn) a = art.pick_daily(conn, art_date="2026-06-21") diff --git a/tests/test_art_api.py b/tests/test_art_api.py index 986fb96..a96c6bf 100644 --- a/tests/test_art_api.py +++ b/tests/test_art_api.py @@ -23,6 +23,8 @@ def client(tmp_path, monkeypatch): "VALUES ('2026-06-21','met',10154,'Lander''s Peak','Bierstadt','1863','Oil','Paintings'," "'Gift','https://met/10154','10154.jpg','https://met/full.jpg',1)" ) + c.execute("UPDATE daily_art SET blurb=?, palette=? WHERE object_id=10154", + ("A luminous western vista.", '["#7fb4cf", "#c79a3c"]')) c.commit(); c.close() cache.mkdir(parents=True, exist_ok=True) (cache / "10154.jpg").write_bytes(b"\xff\xd8\xff" + b"x" * 5000) # web-large display copy @@ -43,3 +45,5 @@ def test_today_exposes_full_res_url(client): assert a["image_url"] == "/api/art/image/10154" assert a["image_url_large"] == "/api/art/image/10154?size=full" assert a["license"] == "Public Domain (CC0)" and a["museum"] == "The Met" + assert a["blurb"] == "A luminous western vista." + assert a["palette"] == ["#7fb4cf", "#c79a3c"] # parsed from stored JSON