"""Daily Art — the /art room. One gorgeous public-domain masterwork a day, picked from a curated pool of museum highlights and cached to OUR origin (image + metadata), so the homepage never waits on, nor hotlinks, the museum. Source: The Met Collection API (no key; public-domain works are CC0 — free, unrestricted, caching encouraged). Curation = isHighlight + isPublicDomain + hasImages, so the pool is masterworks, never potsherds. Built to be bulletproof: a failed pick falls through to the next candidate, and a failed day keeps yesterday's piece — the room is never empty. Network calls go through module-level _http_* helpers so tests can monkeypatch them. """ from __future__ import annotations import hashlib import json import os import sqlite3 import urllib.error import urllib.request from pathlib import Path from .localtime import local_today MET_BASE = "https://collectionapi.metmuseum.org/public/collection/v1" # Broad, visual, museum-grade terms. Each is filtered to public-domain highlights with # images, then deduped — a diverse pool of a few thousand masterworks. HARVEST_QUERIES = ("painting", "portrait", "landscape", "still life", "flowers", "sculpture", "drawing", "garden", "river", "sunset") _UA = {"User-Agent": "upbeatBytes/1.0 (+https://upbeatbytes.com)"} _PICK_ATTEMPTS = 8 # candidates to try before giving up for the day _NO_REPEAT_POOL = 40 # pick the daily piece from the N least-recently-shown _MIN_IMAGE_BYTES = 3000 # smaller than this = not a real image def _http_json(url: str, timeout: int = 20) -> dict: req = urllib.request.Request(url, headers=_UA) with urllib.request.urlopen(req, timeout=timeout) as r: return json.loads(r.read().decode("utf-8")) def _http_bytes(url: str, timeout: int = 30) -> tuple[bytes, str]: req = urllib.request.Request(url, headers=_UA) with urllib.request.urlopen(req, timeout=timeout) as r: return r.read(), (r.headers.get("Content-Type") or "") def cache_dir() -> Path: """Where cached images live — beside the DB, so the host cycle writes and the API container reads the same mounted volume.""" override = os.environ.get("GOODNEWS_ART_CACHE") if override: d = Path(override) else: db = Path(os.environ.get("GOODNEWS_DB", "data/goodnews.sqlite3")) d = db.parent / "art_cache" d.mkdir(parents=True, exist_ok=True) return d # --- harvest: build the curated pool of object IDs ------------------------------- def harvest_pool(conn: sqlite3.Connection, queries=HARVEST_QUERIES, source: str = "met") -> dict: """Query the Met for public-domain highlight images across broad art terms; store the deduped object IDs. Cheap: each search returns all matching IDs in one call. Per-query failure is non-fatal.""" found, errors = set(), 0 for q in queries: url = (f"{MET_BASE}/search?isHighlight=true&hasImages=true&isPublicDomain=true" f"&q={urllib.request.quote(q)}") try: data = _http_json(url) for oid in (data.get("objectIDs") or []): if isinstance(oid, int): found.add(oid) except Exception: # noqa: BLE001 — non-fatal per query errors += 1 before = conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0] conn.executemany( "INSERT OR IGNORE INTO art_pool (source, object_id) VALUES (?, ?)", [(source, oid) for oid in found], ) conn.commit() after = conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0] return {"queried": len(queries), "errors": errors, "found": len(found), "added": after - before, "pool": after} # --- daily pick: choose, fetch, cache -------------------------------------------- def _object(object_id: int) -> dict: return _http_json(f"{MET_BASE}/objects/{object_id}") def _fetch_to_cache(url: str | None, stem: str) -> str | None: """Download one image URL to cache as {stem}.{ext}; return the filename or None. Writes to a temp file then atomically renames, so a reader never sees a half-file.""" if not url: return None try: data, ctype = _http_bytes(url) except Exception: # noqa: BLE001 return None if not ctype.startswith("image/") or len(data) < _MIN_IMAGE_BYTES: return None ext = ".png" if "png" in ctype else ".jpg" fname = f"{stem}{ext}" cdir = cache_dir() tmp = cdir / f".{stem}.tmp" try: tmp.write_bytes(data) os.replace(tmp, cdir / fname) # atomic except OSError: try: tmp.unlink() except OSError: pass return None return fname def _download_image(obj: dict, object_id: int) -> str | None: """Cache the day's images to our origin. Stores the web-large display copy as {id}.{ext} (what the page shows) and, when available, the full-resolution copy as {id}-full.{ext} (what the lightbox opens, so zoom fills the screen). Returns the display filename or None if even the display copy couldn't be fetched.""" display = _fetch_to_cache(obj.get("primaryImageSmall") or obj.get("primaryImage"), str(object_id)) if not display: return None full_url = obj.get("primaryImage") if full_url and full_url != obj.get("primaryImageSmall"): _fetch_to_cache(full_url, f"{object_id}-full") # best-effort hi-res for zoom return display def _palette(image_path: "Path", n: int = 5) -> list[str]: """Extract ~n representative hex colors from the cached image (for the 'colors in this piece' strip). Best-effort: any failure → empty list (the strip just hides).""" try: from PIL import Image with Image.open(image_path) as im: im = im.convert("RGB") im.thumbnail((120, 120)) # tiny — palette, not fidelity # Adaptive median-cut to a small palette, then order by how much of the image each covers. q = im.quantize(colors=max(n * 2, 8), method=Image.Quantize.MEDIANCUT) pal = q.getpalette() counts = sorted(q.getcolors(), reverse=True) # [(count, index), ...] most-used first out, seen = [], set() for _count, idx in counts: r, g, b = pal[idx * 3], pal[idx * 3 + 1], pal[idx * 3 + 2] hexc = f"#{r:02x}{g:02x}{b:02x}" if hexc in seen: continue seen.add(hexc) out.append(hexc) if len(out) >= n: break return out except Exception: # noqa: BLE001 — palette is decorative; never break the pick return [] _BLURB_SYSTEM = ( "You are the calm, knowledgeable curator of a daily-art feature for a general audience — " "people who enjoy a beautiful painting but aren't art historians. In 2 to 3 warm, plain " "sentences, help them appreciate the piece and why it's worth a moment: its mood, the " "artist, the era or movement, and a little real context or significance.\n" "GROUNDING (important): the catalogue details below — especially the title and the " "'Depicts' tags — are your only reliable guide to the SUBJECT. You cannot actually see the " "image, so do NOT assert literal visual specifics you can't verify: do not state how many " "figures are shown, their exact poses or actions, colors, or background details. Lean on " "what's certain (title, tags, medium, date, artist, movement) and on feeling/significance. " "If you don't recognize the exact work, stay general and contextual rather than inventing. " "No preamble, no title repetition, no hype, no markdown — just the note." ) def _blurb(client, obj: dict) -> str | None: """A short 'museum guide' note for the piece, grounded in the Met catalogue metadata. Best-effort + cached by the caller; returns None on any trouble.""" tags = ", ".join(t.get("term", "") for t in (obj.get("tags") or []) if t.get("term"))[:200] facts = "\n".join(f"{k}: {v}" for k, v in ( ("Title", obj.get("title")), ("Artist", obj.get("artistDisplayName")), ("Artist bio", obj.get("artistDisplayBio")), ("Date", obj.get("objectDate")), ("Medium", obj.get("medium")), ("Type", obj.get("objectName")), ("Classification", obj.get("classification")), ("Culture", obj.get("culture")), ("Period", obj.get("period")), ("Depicts", tags), ) if v) user = f"Catalogue details:\n{facts}\n\nWrite the note." try: out = client.chat_text([{"role": "system", "content": _BLURB_SYSTEM}, {"role": "user", "content": user}]) or "" except Exception: # noqa: BLE001 return None out = " ".join(out.replace("*", "").replace("_", " ").split()).strip()[:600] # no stray markdown return out or None def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[int]: """The N least-recently-shown pool IDs, rotated deterministically by the date so the same piece shows for everyone that day and pieces don't repeat soon.""" rows = conn.execute( "SELECT object_id FROM art_pool WHERE source=? AND blocked=0 " "ORDER BY shown_at IS NOT NULL, shown_at, object_id LIMIT ?", (source, _NO_REPEAT_POOL), ).fetchall() ids = [r[0] for r in rows] if not ids: return ids seed = int(hashlib.sha256(art_date.encode()).hexdigest(), 16) % len(ids) return ids[seed:] + ids[:seed] # rotate so the daily choice is stable but varies def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: str = "met", force: bool = False, client=None) -> dict | None: """Pick + cache the day's art. Idempotent (skips if today's already done unless force). Tries successive candidates so a bad object/image never breaks the day; returns the stored row, or None if nothing could be fetched (caller keeps the prior day's piece).""" art_date = art_date or local_today() existing = conn.execute("SELECT * FROM daily_art WHERE art_date=?", (art_date,)).fetchone() if existing and not force: return dict(existing) for oid in _candidates(conn, art_date, source): try: obj = _object(oid) except Exception: # noqa: BLE001 continue if not obj.get("isPublicDomain"): continue fname = _download_image(obj, oid) if not fname: continue # All network/LLM/compute is done up front; only then a brief write txn + commit. palette = json.dumps(_palette(cache_dir() / fname)) or None blurb = _blurb(client, obj) if client else None conn.execute( "INSERT INTO daily_art (art_date, source, object_id, title, artist, date_text, medium, " "department, credit, source_url, image_file, image_url_full, is_public_domain, blurb, palette) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) " "ON CONFLICT(art_date) DO UPDATE SET object_id=excluded.object_id, title=excluded.title, " "artist=excluded.artist, date_text=excluded.date_text, medium=excluded.medium, " "department=excluded.department, credit=excluded.credit, source_url=excluded.source_url, " "image_file=excluded.image_file, image_url_full=excluded.image_url_full, " "is_public_domain=excluded.is_public_domain, blurb=excluded.blurb, palette=excluded.palette", (art_date, source, oid, obj.get("title") or "Untitled", obj.get("artistDisplayName") or None, obj.get("objectDate") or None, obj.get("medium") or None, obj.get("department") or None, obj.get("creditLine") or None, obj.get("objectURL") or None, fname, obj.get("primaryImage") or None, 1 if obj.get("isPublicDomain") else 0, blurb, palette), ) conn.execute("UPDATE art_pool SET shown_at=? WHERE source=? AND object_id=?", (art_date, source, oid)) conn.commit() return dict(conn.execute("SELECT * FROM daily_art WHERE art_date=?", (art_date,)).fetchone()) return None # nothing fetched today — get_today falls back to the latest piece def get_today(conn: sqlite3.Connection, art_date: str | None = None) -> dict | None: """Today's piece if present, else the most recent one cached (room is never empty).""" if art_date: row = conn.execute("SELECT * FROM daily_art WHERE art_date=?", (art_date,)).fetchone() if row: return dict(row) row = conn.execute("SELECT * FROM daily_art ORDER BY art_date DESC LIMIT 1").fetchone() return dict(row) if row else None def run_daily(conn: sqlite3.Connection, source: str = "met", client=None) -> dict: """Cycle entry point: ensure the pool exists, then ensure today has a piece. Bounded and non-fatal — safe to call every cycle (it no-ops once the day is picked).""" pool = conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0] harvested = None if pool == 0: harvested = harvest_pool(conn, source=source) picked = pick_daily(conn, source=source, client=client) return {"pool": conn.execute("SELECT COUNT(*) FROM art_pool WHERE source=?", (source,)).fetchone()[0], "harvested": harvested, "picked_object": picked.get("object_id") if picked else None}