Typographic-first imagery + opportunistic feed-HTML image extraction

Per the calm north star (images support reading, never become a stimulation layer; metadata-only stays the posture): - Image-less cards are now designed, not missing: secondary cards are text-first (no empty media band), and an image-less hero becomes a fully typographic lead with a faint topic wordmark behind it (CSS attr(data-topic)). No big empty image space is ever reserved. - Opportunistic extraction: parse the first <img src> from a feed's content/description HTML when present, canonicalized — never fetching the article page. Applies to new ingests (existing rows keep their current image). - Held by deliberate choice: og:image page enrichment, stock/AI imagery, and any image-coverage requirement for sources. Tests: feed HTML image extraction (72 total). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 23:59:36 +00:00
parent b9ecebffde
commit f46fee1197
3 changed files with 89 additions and 31 deletions
@@ -1,12 +1,9 @@
 <script>
  let { article, onaction, hero = false } = $props();

-  // Drive the image/fallback from state, not imperative class mutation — so the
-  // typographic fallback shows for BOTH missing and broken images, and Svelte
-  // sees the styles as used (no unused-selector warning).
  let imgOk = $state(!!article.image_url);
+  let hasImg = $derived(!!(article.image_url && imgOk));

-  // Only ever link out to real http(s) URLs (the API is clean, but cheap defense).
  const safeHref =
    typeof article.url === 'string' && /^https?:\/\//.test(article.url) ? article.url : '#';

@@ -15,18 +12,19 @@
  }
 </script>

-<article class:hero class:textfirst={!hero && !(article.image_url && imgOk)}>
-  <!-- Hero always shows media (image or fallback). Secondary cards only show a
-       media band when a real image exists, so image-less cards are text-first
-       rather than a row of empty blocks. -->
-  {#if hero || (article.image_url && imgOk)}
+<!-- Image-less cards are designed, not missing: secondary cards go text-first
+     (no empty media band), and the hero becomes a fully typographic lead with a
+     faint topic wordmark behind it. We never reserve big empty image space. -->
+<article
+  class:hero
+  class:textfirst={!hero && !hasImg}
+  class:herotype={hero && !hasImg}
+  data-topic={article.topic ?? ''}
+>
+  {#if hasImg}
    <a class="media" href={safeHref} target="_blank" rel="noopener">
-      {#if article.image_url && imgOk}
      <img src={article.image_url} alt="" loading="lazy" referrerpolicy="no-referrer"
           onerror={() => (imgOk = false)} />
-      {:else}
-        <span class="fallback">{article.topic ?? 'good news'}</span>
-      {/if}
    </a>
  {/if}

@@ -39,7 +37,7 @@

    <h3><a href={safeHref} target="_blank" rel="noopener">{article.title}</a></h3>

-    {#if hero && article.description}
+    {#if (hero || !hasImg) && article.description}
      <p class="desc">{article.description}</p>
    {/if}

@@ -73,12 +71,6 @@
    background: linear-gradient(135deg, var(--sage-soft), #f1ece0);
  }
  .media img { width: 100%; height: 100%; object-fit: cover; }
-  .fallback {
-    position: absolute; inset: 0; display: flex;
-    align-items: center; justify-content: center;
-    font-family: var(--serif); font-style: italic; color: var(--sage-deep); opacity: 0.5;
-    text-transform: lowercase; letter-spacing: 0.02em; font-size: 1.1rem;
-  }

  .body { padding: 16px 18px 14px; display: flex; flex-direction: column; gap: 8px; flex: 1; }
  .tags { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; font-size: 0.74rem; }
@@ -90,7 +82,6 @@
  .src { color: var(--muted); margin-left: auto; }

  h3 { font-size: 1.18rem; }
-  .hero h3 { font-size: 1.95rem; }
  h3 a:hover { color: var(--sage-deep); }
  .desc { margin: 2px 0 0; color: #3c463a; }
  .why {
@@ -103,28 +94,56 @@
    font-size: 0.76rem; border-bottom: 1px dotted var(--line);
  }
  .actions button:hover { color: var(--sage-deep); border-bottom-color: var(--sage); }
-  /* On pointer devices, keep the tuning controls quiet until the card is hovered
-     or focused — they shouldn't read as interface machinery. Touch devices (no
-     hover) keep them visible. */
  @media (hover: hover) {
    .actions { opacity: 0; transition: opacity 0.16s ease; }
    article:hover .actions,
    article:focus-within .actions { opacity: 1; }
  }

-  /* Text-first secondary cards (no real image): a little breathing room up top. */
+  /* text-first secondary card: a small accent instead of an empty image band */
  .textfirst .body { padding-top: 18px; }
  .textfirst .body::before {
-    content: ""; display: block; width: 28px; height: 3px;
+    content: ''; display: block; width: 28px; height: 3px;
    background: var(--sage-soft); border-radius: 2px; margin-bottom: 4px;
  }

+  /* hero WITH image: two columns */
  .hero { display: grid; grid-template-columns: 1.1fr 1fr; }
  .hero .media { aspect-ratio: auto; height: 100%; min-height: 280px; }
  .hero .body { padding: 28px 30px; justify-content: center; gap: 12px; }
+  .hero h3 { font-size: 1.95rem; }
+
+  /* hero WITHOUT image: a fully typographic lead with a faint topic wordmark */
+  .herotype {
+    position: relative;
+    overflow: hidden;
+    background:
+      radial-gradient(120% 140% at 100% 0%, var(--sage-soft) 0%, transparent 55%),
+      linear-gradient(180deg, var(--surface), var(--surface));
+  }
+  .herotype::after {
+    content: attr(data-topic);
+    position: absolute;
+    right: 8px;
+    bottom: -18px;
+    font-family: var(--serif);
+    font-size: clamp(4rem, 12vw, 8rem);
+    line-height: 1;
+    color: var(--sage);
+    opacity: 0.07;
+    text-transform: lowercase;
+    letter-spacing: -0.02em;
+    pointer-events: none;
+    user-select: none;
+  }
+  .herotype .body { position: relative; padding: 40px 36px; gap: 12px; }
+  .herotype h3 { font-size: 2.15rem; }
+  .herotype .desc { font-size: 1.05rem; }
+
  @media (max-width: 640px) {
    .hero { grid-template-columns: 1fr; }
    .hero .media { min-height: 200px; }
-    .hero h3 { font-size: 1.6rem; }
+    .hero h3, .herotype h3 { font-size: 1.6rem; }
+    .herotype .body { padding: 30px 24px; }
  }
 </style>
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import email.utils
+import re
 import sqlite3
 import urllib.error
 import urllib.request
@@ -361,7 +362,7 @@ def _parse_rss(root: ET.Element) -> list[FeedItem]:
                description=_first_text(item, "description", "summary", "encoded"),
                author=_first_text(item, "author", "creator"),
                published_at=_parse_date(_first_text(item, "pubDate", "published", "updated", "date")),
-                image_url=_find_image_url(item),
+                image_url=_find_image_url(item) or _html_image(item),
                language=language,
                raw_guid=guid,
            )
@@ -389,7 +390,7 @@ def _parse_atom(root: ET.Element) -> list[FeedItem]:
                description=_first_text(entry, "summary", "content"),
                author=author,
                published_at=_parse_date(_first_text(entry, "published", "updated")),
-                image_url=_find_image_url(entry),
+                image_url=_find_image_url(entry) or _html_image(entry),
                language=language,
                raw_guid=_first_text(entry, "id"),
            )
@@ -411,6 +412,27 @@ def _atom_link(entry: ET.Element) -> str | None:
    return fallback


+_IMG_SRC_RE = re.compile(r"""<img\b[^>]*?\bsrc=["']([^"']+)["']""", re.IGNORECASE)
+
+
+def _img_from_html(html: str | None) -> str | None:
+    """First <img src> in a content/description HTML blob, if any."""
+    if not html:
+        return None
+    match = _IMG_SRC_RE.search(html)
+    return match.group(1) if match else None
+
+
+def _html_image(element: ET.Element) -> str | None:
+    """Opportunistic image from the feed's content/description HTML.
+
+    Only ever reads what the feed already provides — never fetches the article
+    page. A non-http(s)/relative URL is dropped by canonicalize_url.
+    """
+    html = _first_text(element, "encoded", "content", "description", "summary")
+    return canonicalize_url(_img_from_html(html))
+
+
 def _find_image_url(element: ET.Element) -> str | None:
    for child in element.iter():
        name = _local_name(child.tag)
@@ -0,0 +1,17 @@
+from goodnews.feeds import _img_from_html, parse_feed
+
+def test_img_from_html_finds_first_src():
+    assert _img_from_html('<p>hi</p><img src="https://x.com/a.jpg" alt="">') == "https://x.com/a.jpg"
+    assert _img_from_html("no images here") is None
+    assert _img_from_html(None) is None
+
+RSS = b"""<?xml version="1.0"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/"><channel>
+<item><title>Story</title><link>https://e.com/1</link>
+<content:encoded><![CDATA[<p>lead</p><img src="https://e.com/photo.jpg"/> more]]></content:encoded></item>
+<item><title>NoImg</title><link>https://e.com/2</link><description>just text</description></item>
+</channel></rss>"""
+
+def test_parse_feed_pulls_image_from_content_html():
+    items = parse_feed(RSS)
+    assert items[0].image_url == "https://e.com/photo.jpg"
+    assert items[1].image_url is None  # opportunistic: none when absent