"""One-off: upgrade stored On This Day images from the blurry 330px thumbnail to a sharp, size-capped URL. Re-fetches the Wikimedia events for each stored MM-DD, matches by page_url, and rewrites image_url in onthisday_pool + daily_onthisday in place (no re-picking). Idempotent. Run on the host: python -m scripts.otd_image_upsize_backfill """ import os from goodnews import daily, onthisday from goodnews.db import connect conn = connect(os.environ.get("GOODNEWS_DB", "data/goodnews.sqlite3")) # distinct dates we've harvested mds = [r[0] for r in conn.execute( "SELECT DISTINCT md FROM onthisday_pool WHERE image_url LIKE '%/thumb/%px-%'").fetchall()] print(f"dates to refresh: {mds}") # page_url -> sharp image_url, from a fresh feed fetch per date best: dict[str, str] = {} for md in mds: mm, dd = md.split("-") data = daily.http_json(f"{onthisday.WIKI_BASE}/{mm}/{dd}") for e in (data.get("events") or []): page = (e.get("pages") or [{}])[0] or {} page_url = (((page.get("content_urls") or {}).get("desktop") or {}).get("page")) or None img = onthisday._best_image(page) if page_url and img: best[page_url] = img print(f" {md}: {len(data.get('events') or [])} events fetched") updated_pool = updated_daily = 0 for table in ("onthisday_pool", "daily_onthisday"): for page_url, new in best.items(): cur = conn.execute( f"UPDATE {table} SET image_url=? " f"WHERE page_url=? AND image_url LIKE '%/thumb/%px-%' AND image_url<>?", (new, page_url, new)) if table == "onthisday_pool": updated_pool += cur.rowcount else: updated_daily += cur.rowcount conn.commit() print(f"updated: onthisday_pool={updated_pool}, daily_onthisday={updated_daily}") # show a few results for r in conn.execute("SELECT md, year, image_url FROM onthisday_pool " "WHERE image_url IS NOT NULL ORDER BY md LIMIT 6").fetchall(): print(f" {r['md']} {r['year']}: {r['image_url']}") conn.close()