Paywall awareness (#6) + replace-an-article (#7)

- paywall.py: conservative domain-level paywall detection (New Scientist,
  Nature, and common hard/soft paywalls). Never fetches pages — an honest hint.
- API: Article gains a 'paywalled' flag; the brief now leads with a gentle AND
  readable story (paywalled/charged stories stay in the five, just not first).
- New GET /api/replacement returns the next-best readable, unshown article
  (honors mood+prefs via the merged prefs param; gentle=true for hero swaps).
- UI: paywalled cards show 'May need a subscription'; a Replace / 'Find one I
  can read' action (always visible, while tuning actions stay tucked) swaps the
  card for a readable alternative, with a gentle notice when none remain.
- Tests: paywall detection + replacement behavior (77 total).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-05-31 00:39:13 +00:00
parent 06c2704ae0
commit bfd612eb9b
7 changed files with 226 additions and 16 deletions
+21 -4
View File
@@ -1,5 +1,5 @@
<script>
let { article, onaction, hero = false } = $props();
let { article, onaction, onreplace, hero = false } = $props();
let imgOk = $state(!!article.image_url);
let hasImg = $derived(!!(article.image_url && imgOk));
@@ -37,6 +37,10 @@
<h3><a href={safeHref} target="_blank" rel="noopener">{article.title}</a></h3>
{#if article.paywalled}
<p class="paywall">May need a subscription to read</p>
{/if}
{#if hero && article.description}
<p class="desc">{article.description}</p>
{/if}
@@ -46,6 +50,11 @@
{/if}
<div class="actions">
{#if onreplace}
<button class="replace" onclick={() => onreplace(article)}>
{article.paywalled ? 'Find one I can read' : 'Replace'}
</button>
{/if}
{#if article.topic}<button onclick={() => act('notToday', article.topic)}>Not today</button>{/if}
{#if article.flavor}<button onclick={() => act('lessLikeThis', article.flavor)}>Less like this</button>{/if}
{#if article.topic}<button onclick={() => act('alwaysHide', article.topic)}>Hide {article.topic}</button>{/if}
@@ -84,6 +93,11 @@
h3 { font-size: 1.18rem; }
h3 a:hover { color: var(--sage-deep); }
.desc { margin: 2px 0 0; color: #3c463a; }
.paywall {
margin: 0; font-size: 0.78rem; color: var(--gold);
display: inline-flex; align-items: center; gap: 5px;
}
.paywall::before { content: '🔒'; font-size: 0.72rem; filter: grayscale(0.3); }
.why {
margin: 2px 0 0; font-style: italic; color: var(--muted);
font-size: 0.9rem; padding-left: 12px; border-left: 2px solid var(--sage-soft);
@@ -104,10 +118,13 @@
font-size: 0.76rem; border-bottom: 1px dotted var(--line);
}
.actions button:hover { color: var(--sage-deep); border-bottom-color: var(--sage); }
.actions .replace { color: var(--sage-deep); border-bottom-color: var(--sage-soft); }
/* Tuning actions stay quiet until hover/focus on pointer devices; the Replace
escape hatch stays visible so a paywalled card always shows a way through. */
@media (hover: hover) {
.actions { opacity: 0; transition: opacity 0.16s ease; }
article:hover .actions,
article:focus-within .actions { opacity: 1; }
.actions button:not(.replace) { opacity: 0; transition: opacity 0.16s ease; }
article:hover .actions button:not(.replace),
article:focus-within .actions button:not(.replace) { opacity: 1; }
}
/* text-first secondary card: a small accent instead of an empty image band */
+52 -3
View File
@@ -61,6 +61,46 @@
refreshPrefs();
}
let notice = $state('');
function flash(msg) {
notice = msg;
if (typeof window !== 'undefined') setTimeout(() => (notice = ''), 4000);
}
async function replaceArticle(article) {
const list = selected === 'today' ? brief?.items : feed;
if (!list) return;
const shown = list.map((a) => a.id).join(',');
const isHero = selected === 'today' && list[0]?.id === article.id;
const merged = P.merge(userPrefs, current?.filter ?? {});
const q = P.param(merged);
const url = `/api/replacement?exclude=${shown}&avoid_paywall=true${isHero ? '&gentle=true' : ''}${q ? '&' + q : ''}`;
let repl;
try {
repl = await getJSON(url);
} catch {
flash('Could not reach the feed just now.');
return;
}
if (!repl) {
flash('Nothing else to swap in right now — try easing a boundary.');
return;
}
if (selected === 'today') {
const i = brief.items.findIndex((a) => a.id === article.id);
if (i >= 0) {
brief.items[i] = repl;
brief = { ...brief, items: [...brief.items] };
}
} else {
const i = feed.findIndex((a) => a.id === article.id);
if (i >= 0) {
feed[i] = repl;
feed = [...feed];
}
}
}
onMount(async () => {
userPrefs = P.load();
try {
@@ -87,6 +127,10 @@
<BoundariesPanel prefs={userPrefs} onchange={refreshPrefs} onclose={() => (showBoundaries = false)} />
{/if}
{#if notice}
<p class="notice rise">{notice}</p>
{/if}
{#if loading}
<p class="muted center pad">Gathering the good news…</p>
{:else if error}
@@ -101,11 +145,11 @@
{#if selected === 'today'}
{#if brief?.items?.length}
<section class="rise">
<ArticleCard article={brief.items[0]} hero onaction={applyAction} />
<ArticleCard article={brief.items[0]} hero onaction={applyAction} onreplace={replaceArticle} />
{#if brief.items.length > 1}
<div class="grid rest">
{#each brief.items.slice(1) as a (a.id)}
<ArticleCard article={a} onaction={applyAction} />
<ArticleCard article={a} onaction={applyAction} onreplace={replaceArticle} />
{/each}
</div>
{/if}
@@ -117,7 +161,7 @@
{:else if feed.length}
<div class="grid rise">
{#each feed as a (a.id)}
<ArticleCard article={a} onaction={applyAction} />
<ArticleCard article={a} onaction={applyAction} onreplace={replaceArticle} />
{/each}
</div>
{:else}
@@ -147,6 +191,11 @@
background: var(--sage); border-radius: 2px; margin-top: 14px; opacity: 0.8;
}
.notice {
text-align: center; color: var(--sage-deep); background: var(--sage-soft);
border-radius: 999px; padding: 8px 16px; margin: 10px auto 0; width: fit-content;
font-size: 0.86rem;
}
.rest { margin-top: 18px; }
.center { text-align: center; }
.pad { padding: 48px 0; }
+57 -5
View File
@@ -30,9 +30,10 @@ from pydantic import BaseModel
from . import feeds, queries
from .db import connect, init_db
from .filters import filter_articles, prefs_from_json
from .hero import lead_with_gentle
from .hero import safe_to_lead
from .llm import LocalModelClient
from .moods import MOODS
from .moods import MOODS, mood_filter
from .paywall import is_paywalled
from .taxonomy import FLAVORS, TOPICS
ROOT = Path(__file__).resolve().parents[1]
@@ -56,6 +57,22 @@ def get_conn():
conn.close()
def _pick_lead(items: list[dict]) -> list[dict]:
"""Lead with a gentle, readable story when possible.
Tries gentle-and-readable first, then gentle, then leaves the order alone.
Charged or paywalled stories still appear in the set — they just don't lead.
"""
for ok in (
lambda a: safe_to_lead(a) and not is_paywalled(a.get("canonical_url")),
safe_to_lead,
):
for i, a in enumerate(items):
if ok(a):
return items if i == 0 else [a, *items[:i], *items[i + 1:]]
return items
# --- Response models (the companion-app contract) ---------------------------
@@ -91,6 +108,7 @@ class Article(BaseModel):
reason_text: str | None = None
model_name: str | None = None
rank: int | None = None # position within a brief, when applicable
paywalled: bool = False
@classmethod
def from_row(cls, row: dict) -> "Article":
@@ -110,6 +128,7 @@ class Article(BaseModel):
reason_text=row.get("reason_text"),
model_name=row.get("model_name"),
rank=row.get("rank"),
paywalled=is_paywalled(row.get("canonical_url")),
)
@@ -283,9 +302,9 @@ def create_app() -> FastAPI:
# MVP: filter the stored brief DOWN; no refill from outside the brief.
# Runs before hero selection, so personal avoid-terms take precedence.
items = filter_articles(items, fp, datetime.now(timezone.utc))
# Lead with an emotionally-safe story (constructive-but-charged stories
# stay in the five, just not as the first thing seen).
items = lead_with_gentle(items)
# Lead with a gentle, readable story (charged or paywalled stories stay
# in the five, just not as the first thing seen).
items = _pick_lead(items)
return BriefResponse(
brief_date=data["brief_date"],
title=data["title"],
@@ -297,6 +316,39 @@ def create_app() -> FastAPI:
with get_conn() as conn:
return queries.available_dates(conn, limit=limit)
@app.get("/api/replacement", response_model=Article | None)
def replacement(
exclude: str = Query("", description="comma-separated article ids already shown"),
prefs: str | None = Query(None),
avoid_paywall: bool = True,
gentle: bool = Query(False, description="also require lead-safe (for replacing the hero)"),
) -> Article | None:
# Swap a read or paywalled item for the next-best one the reader can
# actually open. The client merges any active mood into `prefs` (same as
# the feed), so this needs no mood param.
fp = prefs_from_json(prefs)
excl = {int(x) for x in exclude.split(",") if x.strip().lstrip("-").isdigit()}
now = datetime.now(timezone.utc)
kw = dict(
include_topics=fp.include_topics or None,
include_flavors=fp.include_flavors or None,
mute_topics=list(fp.muted_topics(now)) or None,
mute_flavors=list(fp.muted_flavors(now)) or None,
max_cortisol=fp.max_cortisol,
max_ragebait=fp.max_ragebait,
)
with get_conn() as conn:
rows = queries.feed(conn, accepted_only=True, limit=120, offset=0, **kw)
for r in filter_articles(rows, fp, now):
if r["id"] in excl:
continue
if avoid_paywall and is_paywalled(r["canonical_url"]):
continue
if gentle and not safe_to_lead(r):
continue
return Article.from_row(r)
return None
@app.get("/api/candidates", response_model=list[Candidate])
def candidates(status: str | None = Query(None)) -> list[Candidate]:
from .sources import list_candidates
+41
View File
@@ -0,0 +1,41 @@
"""Domain-level paywall hints.
We never fetch article pages, so a paywall can only be inferred from the host.
This is a curated, conservative list of hard/soft paywalls — enough to label a
card "subscription may be required" and to prefer readable stories for the lead
and for replacements. It will never be perfect; it's an honest hint, not a gate.
"""
from __future__ import annotations
from urllib.parse import urlsplit
# Host suffixes considered paywalled. Subdomains match (news.nature.com → nature.com).
PAYWALL_DOMAINS = {
"newscientist.com",
"nature.com",
"nytimes.com",
"wsj.com",
"ft.com",
"economist.com",
"wired.com",
"theatlantic.com",
"washingtonpost.com",
"bloomberg.com",
"technologyreview.com",
"newyorker.com",
"scientificamerican.com",
"nationalgeographic.com",
"thetimes.co.uk",
"telegraph.co.uk",
"foreignpolicy.com",
"hbr.org",
"harpers.org",
}
def is_paywalled(url: str | None) -> bool:
host = urlsplit(url or "").netloc.lower()
if host.startswith("www."):
host = host[4:]
return any(host == d or host.endswith("." + d) for d in PAYWALL_DOMAINS)
+4 -4
View File
@@ -5,10 +5,10 @@
* Ability to silence some categories temporarily (Maybe a user doesn't even want to see health-related articles, even good ones, so they're not reminded of an ongoing medical issue -- a way to avoid something purposely for a bit) [done: pause topics/flavors in Boundaries]
* Terms to avoid list (To filter even good news that you'd rather not hear about) [done: avoid words/phrases in Boundaries]
- Favorite/save articles
- Soothing background colors/gradients per each category as you scroll. Maybe a user preference.
| Favorite/save articles [tabled: needs accounts/logins for a larger footprint]
| Soothing background colors/gradients per each category as you scroll. Maybe a user preference. [tabled: revisit deliberately; if done, whisper-quiet translucent tints, not neon]
- I really like the coloring for the metadata highlighting in each card (The grading bubbles)
- Some articles are behind paywalls.. what can we do?
- After an article is read, can we add a refresh button to fetch a replacement for it in the list?
* Some articles are behind paywalls.. what can we do? [done: domain-level paywall flag + readable hero; future: down-weight paywalled sources like New Scientist]
* After an article is read, can we add a refresh button to fetch a replacement for it in the list? [done: "Find one I can read" / Replace swaps in the next readable article]
* I want the top 5 to be tere, but I want the remaining categories to be hidden behing their selections. So the main screen should show just the current highlights, and then the other articles should only be visible when in that category. [done]
* Title headings should be a little larger -- if you select Today, Today should look like a proper heading, bold and beautiful. Switching to Wondow should show "Wonder" all nice and whatnot. [done]
+14
View File
@@ -0,0 +1,14 @@
from goodnews.paywall import is_paywalled
def test_known_paywalls_flagged():
assert is_paywalled("https://www.newscientist.com/article/x")
assert is_paywalled("https://www.nature.com/articles/d41586")
assert is_paywalled("https://news.nature.com/foo") # subdomain
assert is_paywalled("https://www.nytimes.com/2026/05/31/x")
def test_free_domains_not_flagged():
assert not is_paywalled("https://www.theguardian.com/x")
assert not is_paywalled("https://phys.org/news/x")
assert not is_paywalled("https://www.goodnewsnetwork.org/x")
assert not is_paywalled(None)
assert not is_paywalled("")
+37
View File
@@ -0,0 +1,37 @@
import pytest
from fastapi.testclient import TestClient
from goodnews.db import connect, init_db
@pytest.fixture
def client(tmp_path, monkeypatch):
db = tmp_path / "t.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
conn = connect(db); init_db(conn)
conn.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'S','http://s/f',6)")
def add(aid, url, cort=1):
conn.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash) VALUES (?,1,?,?,?)",
(aid, url, f"t{aid}", f"h{aid}"))
conn.execute("INSERT INTO article_scores (article_id,constructive_score,agency_score,human_benefit_score,"
"cortisol_score,ragebait_score,pr_risk_score,accepted,topic,flavor) "
"VALUES (?,7,2,2,?,0,2,1,'science','discovery')", (aid, cort))
add(1, "https://phys.org/free-a") # free, best
add(2, "https://www.newscientist.com/pay") # paywalled
add(3, "https://www.goodnewsnetwork.org/free-b") # free
conn.commit(); conn.close()
from goodnews.api import create_app
return TestClient(create_app())
def test_replacement_skips_paywalled_and_excluded(client):
# exclude the top free one -> should return the OTHER free one, never the paywalled
r = client.get("/api/replacement", params={"exclude": "1"})
assert r.status_code == 200
body = r.json()
assert body is not None and body["id"] == 3 and body["paywalled"] is False
def test_replacement_none_when_only_paywalled_left(client):
r = client.get("/api/replacement", params={"exclude": "1,3"})
assert r.json() is None # only the paywalled one remains, and avoid_paywall defaults true
def test_replacement_can_include_paywalled_when_allowed(client):
r = client.get("/api/replacement", params={"exclude": "1,3", "avoid_paywall": "false"})
assert r.json()["id"] == 2