Daily Art: Codex guardrails (atomic image, attribution/license, blocked lever)

Hardening before it runs further on the cycle:
- DB-lock/network: all HTTP (metadata + image) happens before any write; the write txn
  opens only at the brief INSERT and commits immediately. Images download to a temp file
  then atomic os.replace into cache (a reader never sees a half-written file).
- Site-timezone "daily" already used local_today() (same rhythm as the Brief) — confirmed.
- Attribution from day one: store + return title/artist/date/medium/department/credit/
  source_url/object_id/source + museum name + is_public_domain license marker + the full-
  res source URL (for a richer /art view later). UI can show: Title · Artist · The Met.
- "highlight != always beautiful": added a manual `blocked` flag on art_pool (excluded
  from picks) as the cheap curation lever; a featured override can follow.

Schema migrated (existing art tables get the new columns). 373 tests green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jay
2026-06-21 15:28:38 -04:00
parent 308516a263
commit db967bb7fa
4 changed files with 50 additions and 10 deletions
+5 -1
View File
@@ -2267,11 +2267,15 @@ def create_app() -> FastAPI:
response.headers["Cache-Control"] = _PRIVATE
raise HTTPException(status_code=404, detail="No art yet.")
response.headers["Cache-Control"] = _EDGE_FEED # one piece a day, same for everyone
museums = {"met": "The Met", "aic": "Art Institute of Chicago", "si": "Smithsonian"}
return {
"date": a["art_date"], "object_id": a["object_id"], "title": a["title"],
"artist": a["artist"], "date_text": a["date_text"], "medium": a["medium"],
"department": a["department"], "credit": a["credit"], "source_url": a["source_url"],
"source": a["source"], "image_url": f"/api/art/image/{a['object_id']}",
"source": a["source"], "museum": museums.get(a["source"], a["source"]),
"is_public_domain": bool(a["is_public_domain"]),
"license": "Public Domain (CC0)" if a["is_public_domain"] else None,
"image_url": f"/api/art/image/{a['object_id']}",
}
@app.get("/api/art/image/{object_id}")
+19 -6
View File
@@ -92,7 +92,8 @@ def _object(object_id: int) -> dict:
def _download_image(obj: dict, object_id: int) -> str | None:
"""Download the web-large (then full) image to our cache; return the filename or None."""
"""Download the web-large (then full) image to our cache; return the filename or None.
Writes to a temp file then atomically renames, so a reader never sees a half-file."""
for key in ("primaryImageSmall", "primaryImage"):
url = obj.get(key)
if not url:
@@ -105,9 +106,16 @@ def _download_image(obj: dict, object_id: int) -> str | None:
continue
ext = ".png" if "png" in ctype else ".jpg"
fname = f"{object_id}{ext}"
cdir = cache_dir()
tmp = cdir / f".{object_id}.tmp"
try:
(cache_dir() / fname).write_bytes(data)
tmp.write_bytes(data)
os.replace(tmp, cdir / fname) # atomic
except OSError:
try:
tmp.unlink()
except OSError:
pass
return None
return fname
return None
@@ -117,7 +125,8 @@ def _candidates(conn: sqlite3.Connection, art_date: str, source: str) -> list[in
"""The N least-recently-shown pool IDs, rotated deterministically by the date so the
same piece shows for everyone that day and pieces don't repeat soon."""
rows = conn.execute(
"SELECT object_id FROM art_pool WHERE source=? ORDER BY shown_at IS NOT NULL, shown_at, object_id LIMIT ?",
"SELECT object_id FROM art_pool WHERE source=? AND blocked=0 "
"ORDER BY shown_at IS NOT NULL, shown_at, object_id LIMIT ?",
(source, _NO_REPEAT_POOL),
).fetchall()
ids = [r[0] for r in rows]
@@ -146,17 +155,21 @@ def pick_daily(conn: sqlite3.Connection, art_date: str | None = None, source: st
fname = _download_image(obj, oid)
if not fname:
continue
# All network work is done above; only now do we open a brief write txn + commit.
conn.execute(
"INSERT INTO daily_art (art_date, source, object_id, title, artist, date_text, medium, "
"department, credit, source_url, image_file) VALUES (?,?,?,?,?,?,?,?,?,?,?) "
"department, credit, source_url, image_file, image_url_full, is_public_domain) "
"VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?) "
"ON CONFLICT(art_date) DO UPDATE SET object_id=excluded.object_id, title=excluded.title, "
"artist=excluded.artist, date_text=excluded.date_text, medium=excluded.medium, "
"department=excluded.department, credit=excluded.credit, source_url=excluded.source_url, "
"image_file=excluded.image_file",
"image_file=excluded.image_file, image_url_full=excluded.image_url_full, "
"is_public_domain=excluded.is_public_domain",
(art_date, source, oid, obj.get("title") or "Untitled",
obj.get("artistDisplayName") or None, obj.get("objectDate") or None,
obj.get("medium") or None, obj.get("department") or None,
obj.get("creditLine") or None, obj.get("objectURL") or None, fname),
obj.get("creditLine") or None, obj.get("objectURL") or None, fname,
obj.get("primaryImage") or None, 1 if obj.get("isPublicDomain") else 0),
)
conn.execute("UPDATE art_pool SET shown_at=? WHERE source=? AND object_id=?",
(art_date, source, oid))
+16 -3
View File
@@ -253,6 +253,7 @@ CREATE TABLE IF NOT EXISTS art_pool (
object_id INTEGER NOT NULL,
source TEXT NOT NULL DEFAULT 'met',
shown_at TEXT,
blocked INTEGER NOT NULL DEFAULT 0, -- manual lever: skip an odd/unsuitable piece
added_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (source, object_id)
);
@@ -265,9 +266,11 @@ CREATE TABLE IF NOT EXISTS daily_art (
date_text TEXT,
medium TEXT,
department TEXT,
credit TEXT,
source_url TEXT,
image_file TEXT,
credit TEXT, -- museum credit line
source_url TEXT, -- canonical museum object page
image_file TEXT, -- our cached (web-large) image
image_url_full TEXT, -- source full-res URL, for a later richer /art view
is_public_domain INTEGER, -- license marker (CC0/public domain), stored for citizenship
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
);
@@ -541,6 +544,16 @@ def _migrate(conn: sqlite3.Connection) -> None:
if "retry_after_at" not in source_cols:
conn.execute("ALTER TABLE sources ADD COLUMN retry_after_at TEXT")
# Daily Art columns added after the tables first shipped.
pool_cols = {row["name"] for row in conn.execute("PRAGMA table_info(art_pool)")}
if pool_cols and "blocked" not in pool_cols:
conn.execute("ALTER TABLE art_pool ADD COLUMN blocked INTEGER NOT NULL DEFAULT 0")
art_cols = {row["name"] for row in conn.execute("PRAGMA table_info(daily_art)")}
if art_cols and "image_url_full" not in art_cols:
conn.execute("ALTER TABLE daily_art ADD COLUMN image_url_full TEXT")
if art_cols and "is_public_domain" not in art_cols:
conn.execute("ALTER TABLE daily_art ADD COLUMN is_public_domain INTEGER")
# feedback.read_at (admin inbox read/unread) added later.
fb_cols = {row["name"] for row in conn.execute("PRAGMA table_info(feedback)")}
if fb_cols and "read_at" not in fb_cols:
+10
View File
@@ -49,7 +49,17 @@ def test_pick_caches_image_metadata_and_marks_shown(conn):
a = art.pick_daily(conn, art_date="2026-06-21")
assert a and a["object_id"] in (1, 3) and a["title"] in ("Sunflowers", "Irises")
assert a["artist"] == "Van Gogh" and a["image_file"]
assert a["is_public_domain"] == 1 # license marker stored
assert list(art.cache_dir().glob(f"{a['object_id']}.*")) # image cached locally
assert not list(art.cache_dir().glob("*.tmp")) # atomic write left no temp file
def test_blocked_pieces_are_never_picked(conn):
art.harvest_pool(conn)
conn.execute("UPDATE art_pool SET blocked=1 WHERE object_id=1") # block the good one
conn.commit()
a = art.pick_daily(conn, art_date="2026-06-21")
assert a is None or a["object_id"] != 1 # never the blocked piece
shown = conn.execute("SELECT shown_at FROM art_pool WHERE object_id=?", (a["object_id"],)).fetchone()[0]
assert shown == "2026-06-21"