Typographic-first imagery + opportunistic feed-HTML image extraction

Per the calm north star (images support reading, never become a stimulation
layer; metadata-only stays the posture):
- Image-less cards are now designed, not missing: secondary cards are text-first
  (no empty media band), and an image-less hero becomes a fully typographic lead
  with a faint topic wordmark behind it (CSS attr(data-topic)). No big empty
  image space is ever reserved.
- Opportunistic extraction: parse the first <img src> from a feed's
  content/description HTML when present, canonicalized — never fetching the
  article page. Applies to new ingests (existing rows keep their current image).
- Held by deliberate choice: og:image page enrichment, stock/AI imagery, and any
  image-coverage requirement for sources.

Tests: feed HTML image extraction (72 total).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-05-30 23:59:36 +00:00
parent b9ecebffde
commit f46fee1197
3 changed files with 89 additions and 31 deletions
+46 -27
View File
@@ -1,12 +1,9 @@
<script>
let { article, onaction, hero = false } = $props();
// Drive the image/fallback from state, not imperative class mutation — so the
// typographic fallback shows for BOTH missing and broken images, and Svelte
// sees the styles as used (no unused-selector warning).
let imgOk = $state(!!article.image_url);
let hasImg = $derived(!!(article.image_url && imgOk));
// Only ever link out to real http(s) URLs (the API is clean, but cheap defense).
const safeHref =
typeof article.url === 'string' && /^https?:\/\//.test(article.url) ? article.url : '#';
@@ -15,18 +12,19 @@
}
</script>
<article class:hero class:textfirst={!hero && !(article.image_url && imgOk)}>
<!-- Hero always shows media (image or fallback). Secondary cards only show a
media band when a real image exists, so image-less cards are text-first
rather than a row of empty blocks. -->
{#if hero || (article.image_url && imgOk)}
<!-- Image-less cards are designed, not missing: secondary cards go text-first
(no empty media band), and the hero becomes a fully typographic lead with a
faint topic wordmark behind it. We never reserve big empty image space. -->
<article
class:hero
class:textfirst={!hero && !hasImg}
class:herotype={hero && !hasImg}
data-topic={article.topic ?? ''}
>
{#if hasImg}
<a class="media" href={safeHref} target="_blank" rel="noopener">
{#if article.image_url && imgOk}
<img src={article.image_url} alt="" loading="lazy" referrerpolicy="no-referrer"
onerror={() => (imgOk = false)} />
{:else}
<span class="fallback">{article.topic ?? 'good news'}</span>
{/if}
</a>
{/if}
@@ -39,7 +37,7 @@
<h3><a href={safeHref} target="_blank" rel="noopener">{article.title}</a></h3>
{#if hero && article.description}
{#if (hero || !hasImg) && article.description}
<p class="desc">{article.description}</p>
{/if}
@@ -73,12 +71,6 @@
background: linear-gradient(135deg, var(--sage-soft), #f1ece0);
}
.media img { width: 100%; height: 100%; object-fit: cover; }
.fallback {
position: absolute; inset: 0; display: flex;
align-items: center; justify-content: center;
font-family: var(--serif); font-style: italic; color: var(--sage-deep); opacity: 0.5;
text-transform: lowercase; letter-spacing: 0.02em; font-size: 1.1rem;
}
.body { padding: 16px 18px 14px; display: flex; flex-direction: column; gap: 8px; flex: 1; }
.tags { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; font-size: 0.74rem; }
@@ -90,7 +82,6 @@
.src { color: var(--muted); margin-left: auto; }
h3 { font-size: 1.18rem; }
.hero h3 { font-size: 1.95rem; }
h3 a:hover { color: var(--sage-deep); }
.desc { margin: 2px 0 0; color: #3c463a; }
.why {
@@ -103,28 +94,56 @@
font-size: 0.76rem; border-bottom: 1px dotted var(--line);
}
.actions button:hover { color: var(--sage-deep); border-bottom-color: var(--sage); }
/* On pointer devices, keep the tuning controls quiet until the card is hovered
or focused — they shouldn't read as interface machinery. Touch devices (no
hover) keep them visible. */
@media (hover: hover) {
.actions { opacity: 0; transition: opacity 0.16s ease; }
article:hover .actions,
article:focus-within .actions { opacity: 1; }
}
/* Text-first secondary cards (no real image): a little breathing room up top. */
/* text-first secondary card: a small accent instead of an empty image band */
.textfirst .body { padding-top: 18px; }
.textfirst .body::before {
content: ""; display: block; width: 28px; height: 3px;
content: ''; display: block; width: 28px; height: 3px;
background: var(--sage-soft); border-radius: 2px; margin-bottom: 4px;
}
/* hero WITH image: two columns */
.hero { display: grid; grid-template-columns: 1.1fr 1fr; }
.hero .media { aspect-ratio: auto; height: 100%; min-height: 280px; }
.hero .body { padding: 28px 30px; justify-content: center; gap: 12px; }
.hero h3 { font-size: 1.95rem; }
/* hero WITHOUT image: a fully typographic lead with a faint topic wordmark */
.herotype {
position: relative;
overflow: hidden;
background:
radial-gradient(120% 140% at 100% 0%, var(--sage-soft) 0%, transparent 55%),
linear-gradient(180deg, var(--surface), var(--surface));
}
.herotype::after {
content: attr(data-topic);
position: absolute;
right: 8px;
bottom: -18px;
font-family: var(--serif);
font-size: clamp(4rem, 12vw, 8rem);
line-height: 1;
color: var(--sage);
opacity: 0.07;
text-transform: lowercase;
letter-spacing: -0.02em;
pointer-events: none;
user-select: none;
}
.herotype .body { position: relative; padding: 40px 36px; gap: 12px; }
.herotype h3 { font-size: 2.15rem; }
.herotype .desc { font-size: 1.05rem; }
@media (max-width: 640px) {
.hero { grid-template-columns: 1fr; }
.hero .media { min-height: 200px; }
.hero h3 { font-size: 1.6rem; }
.hero h3, .herotype h3 { font-size: 1.6rem; }
.herotype .body { padding: 30px 24px; }
}
</style>
+24 -2
View File
@@ -1,6 +1,7 @@
from __future__ import annotations
import email.utils
import re
import sqlite3
import urllib.error
import urllib.request
@@ -361,7 +362,7 @@ def _parse_rss(root: ET.Element) -> list[FeedItem]:
description=_first_text(item, "description", "summary", "encoded"),
author=_first_text(item, "author", "creator"),
published_at=_parse_date(_first_text(item, "pubDate", "published", "updated", "date")),
image_url=_find_image_url(item),
image_url=_find_image_url(item) or _html_image(item),
language=language,
raw_guid=guid,
)
@@ -389,7 +390,7 @@ def _parse_atom(root: ET.Element) -> list[FeedItem]:
description=_first_text(entry, "summary", "content"),
author=author,
published_at=_parse_date(_first_text(entry, "published", "updated")),
image_url=_find_image_url(entry),
image_url=_find_image_url(entry) or _html_image(entry),
language=language,
raw_guid=_first_text(entry, "id"),
)
@@ -411,6 +412,27 @@ def _atom_link(entry: ET.Element) -> str | None:
return fallback
_IMG_SRC_RE = re.compile(r"""<img\b[^>]*?\bsrc=["']([^"']+)["']""", re.IGNORECASE)
def _img_from_html(html: str | None) -> str | None:
"""First <img src> in a content/description HTML blob, if any."""
if not html:
return None
match = _IMG_SRC_RE.search(html)
return match.group(1) if match else None
def _html_image(element: ET.Element) -> str | None:
"""Opportunistic image from the feed's content/description HTML.
Only ever reads what the feed already provides — never fetches the article
page. A non-http(s)/relative URL is dropped by canonicalize_url.
"""
html = _first_text(element, "encoded", "content", "description", "summary")
return canonicalize_url(_img_from_html(html))
def _find_image_url(element: ET.Element) -> str | None:
for child in element.iter():
name = _local_name(child.tag)
+17
View File
@@ -0,0 +1,17 @@
from goodnews.feeds import _img_from_html, parse_feed
def test_img_from_html_finds_first_src():
assert _img_from_html('<p>hi</p><img src="https://x.com/a.jpg" alt="">') == "https://x.com/a.jpg"
assert _img_from_html("no images here") is None
assert _img_from_html(None) is None
RSS = b"""<?xml version="1.0"?><rss xmlns:content="http://purl.org/rss/1.0/modules/content/"><channel>
<item><title>Story</title><link>https://e.com/1</link>
<content:encoded><![CDATA[<p>lead</p><img src="https://e.com/photo.jpg"/> more]]></content:encoded></item>
<item><title>NoImg</title><link>https://e.com/2</link><description>just text</description></item>
</channel></rss>"""
def test_parse_feed_pulls_image_from_content_html():
items = parse_feed(RSS)
assert items[0].image_url == "https://e.com/photo.jpg"
assert items[1].image_url is None # opportunistic: none when absent