a55ba185a8
Blocker fixes for the image cache:
- /api/img/{id} now serves cache HITS ONLY and is restricted to ACCEPTED, CANONICAL
articles. It never fetches — the cycle (newsimg.warm) owns all fetching — so the
public endpoint has no SSRF/worker-exhaustion surface. Dropped 1-year immutable
caching (image_url can change) → public, max-age=86400.
- newsimg._safe_fetch: SSRF-safe (reuses enrich._host_is_public + _NoRedirect, http(s)
only, every redirect hop re-validated, body capped). _FetchError distinguishes
permanent refusals (negative-cached via a .fail marker) from transient errors (retry).
- _encode re-encodes only decoded RASTER images to WebP and REJECTS everything else
(SVG, undecodable, decompression bombs via MAX_IMAGE_PIXELS, pathological dimensions);
originals are never retained. prune() also sweeps stale .fail markers.
- Concurrency: fetching only runs inside the cycle lock; writes stay atomic.
Smaller fixes:
- share.py visible image has onerror→this.remove() (degrade to the text unfurl, no
broken icon when an image isn't cached yet).
- share-page Back follows history only on a SAME-ORIGIN referrer (never bounce to an
external site); menu now honors Escape + resets crossing back to desktop (HubBar parity).
Tests: private host, redirect-to-private, hostile SVG/non-image, transient-vs-permanent
failure, LRU prune, warm (accepted+canonical only, idempotent), cache-only endpoint
(404 on not-cached/unaccepted/duplicate, never fetches), share chrome parity. 441 pass.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
120 lines
5.7 KiB
Python
120 lines
5.7 KiB
Python
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path, monkeypatch):
|
|
db = tmp_path / "t.sqlite3"
|
|
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
|
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
|
|
import importlib
|
|
import goodnews.api as api
|
|
importlib.reload(api)
|
|
from goodnews.db import connect, init_db
|
|
c = connect(str(db)); init_db(c)
|
|
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
|
|
# 1: accepted (renders), 2: rejected, 3: duplicate
|
|
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) "
|
|
"VALUES (1,1,'https://bbc.com/x','Water voles return','h1','https://img/v.jpg')")
|
|
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash) VALUES (2,1,'https://bbc.com/y','Meh','h2')")
|
|
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,duplicate_of) VALUES (3,1,'https://bbc.com/z','Dup','h3',1)")
|
|
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (1,1,'A hopeful restoration story.')")
|
|
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (2,0)")
|
|
c.execute("INSERT INTO article_scores (article_id,accepted) VALUES (3,1)")
|
|
c.commit(); c.close()
|
|
return api.create_app()
|
|
|
|
|
|
def test_share_page_renders(client):
|
|
r = TestClient(client).get("/a/1")
|
|
assert r.status_code == 200
|
|
html = r.text
|
|
assert "Water voles return" in html
|
|
assert 'property="og:title"' in html and 'name="twitter:card"' in html
|
|
assert 'rel="canonical" href="https://upbeatbytes.com/a/1"' in html
|
|
assert 'https://bbc.com/x' in html # source link present
|
|
assert "A hopeful restoration story." in html
|
|
assert 'content="https://img/v.jpg"' in html # og image
|
|
|
|
|
|
def test_share_page_missing_and_malformed(client):
|
|
tc = TestClient(client)
|
|
assert tc.get("/a/999").status_code == 404 # unknown
|
|
assert tc.get("/a/not-a-number").status_code == 404 # malformed → calm 404
|
|
assert tc.get("/a/2").status_code == 404 # rejected article
|
|
|
|
|
|
def test_share_page_duplicate_redirects_to_canonical(client):
|
|
# article 3 is a duplicate of the live article 1 — its URL may be indexed, so it
|
|
# 301s to the canonical (consolidates) rather than 404ing and dropping from Google.
|
|
r = TestClient(client).get("/a/3", follow_redirects=False)
|
|
assert r.status_code == 301 and r.headers["location"] == "/a/1"
|
|
|
|
|
|
def test_share_page_creates_visitor_token_for_beacons(client):
|
|
# /a/ is server-rendered outside the SPA, so its inline analytics must CREATE the
|
|
# visitor token when absent (mirroring visitorId) — else a first-time /a/ landing
|
|
# beacons an empty token and is dropped from visitor stats.
|
|
html = TestClient(client).get("/a/1").text
|
|
assert "localStorage.setItem('goodnews:visitor'" in html
|
|
assert "crypto.randomUUID" in html
|
|
assert "kind:'visit'" in html # daily visit beacon present
|
|
|
|
|
|
def test_share_page_no_image_uses_summary_card(client, tmp_path, monkeypatch):
|
|
# article 1 has an image → large card
|
|
assert 'summary_large_image' in TestClient(client).get("/a/1").text
|
|
|
|
|
|
def test_incomplete_page_is_not_cached(client):
|
|
# article 1 has no summary/explanation → "generating" page must not be cached,
|
|
# and carries no-cache so it re-fetches once the summary lands.
|
|
import goodnews.api as api
|
|
r = TestClient(client).get("/a/1")
|
|
assert r.status_code == 200
|
|
assert r.headers.get("cache-control") == "no-cache"
|
|
assert 1 not in api._SHARE_CACHE
|
|
|
|
|
|
@pytest.fixture
|
|
def app_complete(tmp_path, monkeypatch):
|
|
db = tmp_path / "t.sqlite3"
|
|
monkeypatch.setenv("GOODNEWS_DB", str(db))
|
|
monkeypatch.setenv("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com")
|
|
import importlib
|
|
import goodnews.api as api
|
|
importlib.reload(api)
|
|
from goodnews.db import connect, init_db
|
|
c = connect(str(db)); init_db(c)
|
|
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'BBC','http://s/f',5)")
|
|
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash,image_url) "
|
|
"VALUES (1,1,'https://bbc.com/x','Water voles return','h1','https://img/v.jpg')")
|
|
c.execute("INSERT INTO article_scores (article_id,accepted,reason_text) VALUES (1,1,'Hopeful.')")
|
|
c.execute("INSERT INTO article_summaries (article_id,summary,what_happened,why_matters,why_belongs) "
|
|
"VALUES (1,'Voles are back.','They returned to the river.','Biodiversity rebound.','Quietly hopeful.')")
|
|
c.commit(); c.close()
|
|
return api
|
|
|
|
|
|
def test_complete_page_is_cached_and_served_from_cache(app_complete):
|
|
api = app_complete
|
|
tc = TestClient(api.create_app())
|
|
r1 = tc.get("/a/1")
|
|
assert r1.status_code == 200
|
|
assert r1.headers.get("cache-control") == "public, max-age=300"
|
|
assert 1 in api._SHARE_CACHE # finished page cached
|
|
r2 = tc.get("/a/1") # second hit served from cache
|
|
assert r2.status_code == 200 and r2.text == r1.text
|
|
|
|
|
|
def test_share_chrome_parity_and_fallbacks():
|
|
"""HubBar-replica parity Codex asked for: Back only follows a SAME-ORIGIN referrer,
|
|
the menu honors Escape, and a failed image removes itself (clean text unfurl)."""
|
|
from goodnews import share
|
|
html = share.render_share_page(
|
|
{"id": 7, "title": "T", "image_url": "https://x/p.jpg", "canonical_url": "https://s/x",
|
|
"source_name": "S"}, "https://upbeatbytes.com")
|
|
assert "origin===location.origin" in html # Back requires same-origin referrer
|
|
assert "'Escape'" in html or "Escape" in html # menu closes on Escape (parity)
|
|
assert 'onerror="this.remove()"' in html # broken image degrades to text unfurl
|