Harden Calm Filters surface before Track 3

- Add API test layer (TestClient): bad prefs -> 200, mute affects feed,
  avoid-term filters, brief filters down, counts match filtered feed.
- Render article cards via the DOM API (textContent) instead of HTML string
  interpolation, and only allow http(s) hrefs — defense-in-depth XSS guard for
  when the feed faces untrusted sources publicly.
- Refresh the stale README Next Steps to reflect what's done vs ahead.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jay
2026-05-30 19:31:45 +00:00
parent 091dec64ae
commit cabe0b6049
4 changed files with 137 additions and 23 deletions
+15 -5
View File
@@ -178,11 +178,21 @@ run missed while the machine was off is caught up on the next boot.
## Next Steps
1. Run the poller for a few days and inspect which sources produce useful candidates.
2. Add source-level quality notes and deactivate noisy feeds.
3. Replace or supplement `heuristic-v0` with a local model classifier.
4. Add a daily brief builder that selects 5 items using scores and source diversity.
5. Add a small web/API layer once the ingest data looks trustworthy.
Done so far: RSS/Atom ingestion with exact + semantic dedup, heuristic + local-LLM
classification with topic/flavor tagging, the daily brief, the FastAPI web/API layer
and site, scheduled `cycle` via systemd, a pytest suite, and device-local Calm Filters.
Still ahead:
1. **Supervised source pipeline** — paste a feed URL, preview a scored sample
(freshness, acceptance rate, topic/flavor mix, cortisol/ragebait/PR averages,
example items), then add to quarantine before it can reach the main feed.
2. **Learned "Less like this" weighting** — replace the interim flavor-pause with
real preference down-ranking.
3. **Corpus rebalancing** — add calm/feelgood sources (currently science-heavy).
4. **Retention/pruning** — soft-delete + time-window indexes as the corpus grows
toward ~10k articles (don't rush; not yet needed).
5. **Go-public hardening** — TLS via a reverse proxy, then a domain.
## Local Model Configuration
+45 -18
View File
@@ -192,28 +192,55 @@
return r.json();
}
// Build cards with the DOM API (textContent) rather than HTML strings, so
// feed-supplied text can never inject markup even if upstream cleaning misses.
const node = (tag, cls, text) => {
const e = document.createElement(tag);
if (cls) e.className = cls;
if (text != null) e.textContent = text;
return e;
};
function articleCard(a, showRank) {
const rank = showRank && a.rank ? `<span class="rank-badge">${a.rank}</span>` : "";
const tags = [a.topic, a.flavor].filter(Boolean).map(t => `<span class="tag">${t}</span>`).join(" ");
const desc = a.description ? `<p class="desc">${a.description}</p>` : "";
const why = a.reason_text ? `<div class="why">${a.reason_text}</div>` : "";
const acts = [];
if (a.topic) acts.push(`<button data-act="notToday" data-topic="${a.topic}">Not today</button>`);
if (a.flavor) acts.push(`<button data-act="lessLikeThis" data-flavor="${a.flavor}">Less like this</button>`);
if (a.topic) acts.push(`<button data-act="alwaysHide" data-topic="${a.topic}">Always hide ${a.topic}</button>`);
return `<article>
<div class="meta">${rank}${tags}<span>${a.source}</span>
${a.published_at ? `<span>· ${a.published_at.slice(0,10)}</span>` : ""}</div>
<h3><a href="${a.url}" target="_blank" rel="noopener">${a.title}</a></h3>
${desc}${why}
<div class="actions">${acts.join("")}</div>
</article>`;
const article = node("article");
const meta = node("div", "meta");
if (showRank && a.rank) meta.append(node("span", "rank-badge", a.rank));
[a.topic, a.flavor].filter(Boolean).forEach(t => meta.append(node("span", "tag", t)));
meta.append(node("span", null, a.source));
if (a.published_at) meta.append(node("span", null, "· " + a.published_at.slice(0, 10)));
article.append(meta);
const h3 = node("h3");
const link = node("a", null, a.title);
link.href = (typeof a.url === "string" && /^https?:\/\//.test(a.url)) ? a.url : "#";
link.target = "_blank"; link.rel = "noopener";
h3.append(link);
article.append(h3);
if (a.description) article.append(node("p", "desc", a.description));
if (a.reason_text) article.append(node("div", "why", a.reason_text));
const acts = node("div", "actions");
const btn = (label, act, key, val) => {
const b = node("button", null, label);
b.dataset.act = act;
if (key) b.dataset[key] = val;
return b;
};
if (a.topic) acts.append(btn("Not today", "notToday", "topic", a.topic));
if (a.flavor) acts.append(btn("Less like this", "lessLikeThis", "flavor", a.flavor));
if (a.topic) acts.append(btn("Always hide " + a.topic, "alwaysHide", "topic", a.topic));
article.append(acts);
return article;
}
function renderList(target, items, showRank) {
target.innerHTML = items.length
? items.map(a => articleCard(a, showRank)).join("")
: `<div class="empty">Nothing here right now — try easing a filter.</div>`;
target.replaceChildren();
if (!items.length) {
target.append(node("div", "empty", "Nothing here right now — try easing a filter."));
return;
}
items.forEach(a => target.append(articleCard(a, showRank)));
}
// delegated clicks for the per-article gentle actions
+1
View File
@@ -18,6 +18,7 @@ web = [
]
test = [
"pytest>=8",
"httpx>=0.27",
]
[project.scripts]
+76
View File
@@ -0,0 +1,76 @@
import json
import pytest
from fastapi.testclient import TestClient
from goodnews.db import connect, init_db
@pytest.fixture
def client(tmp_path, monkeypatch):
db = tmp_path / "test.sqlite3"
monkeypatch.setenv("GOODNEWS_DB", str(db))
conn = connect(db)
init_db(conn)
conn.execute("INSERT INTO sources (id, name, feed_url, trust_score) VALUES (1,'S','http://s/f',7)")
def add(aid, topic, flavor, title):
conn.execute(
"INSERT INTO articles (id, source_id, canonical_url, title, published_at, url_hash) "
"VALUES (?,1,?,?, '2026-05-30T10:00:00+00:00', ?)",
(aid, f"http://s/{aid}", title, f"h{aid}"),
)
conn.execute(
"INSERT INTO article_scores (article_id, constructive_score, agency_score, "
"human_benefit_score, cortisol_score, ragebait_score, pr_risk_score, accepted, topic, flavor) "
"VALUES (?, 7, 3, 4, 1, 0, 2, 1, ?, ?)",
(aid, topic, flavor),
)
add(1, "science", "discovery", "A quiet science discovery")
add(2, "health", "breakthrough", "Election season health update") # has avoid-able term
conn.execute("INSERT INTO daily_briefs (id, brief_date, title) VALUES (1,'2026-05-30','Brief')")
conn.execute("INSERT INTO daily_brief_items (brief_id, article_id, rank) VALUES (1,1,1),(1,2,2)")
conn.commit()
conn.close()
# Import after env is set so the app reads the temp DB.
from goodnews.api import create_app
return TestClient(create_app())
def _prefs(client, **kw):
return client.get("/api/feed", params={"prefs": json.dumps(kw)})
def test_bad_prefs_returns_200_and_full_feed(client):
r = client.get("/api/feed", params={"prefs": "not json at all"})
assert r.status_code == 200
assert r.json()["count"] == 2 # forgiving: bad blob ignored
def test_mute_topic_affects_feed(client):
r = _prefs(client, mute_topics=["science"])
topics = [i["topic"] for i in r.json()["items"]]
assert topics == ["health"]
def test_avoid_term_filters_feed(client):
r = _prefs(client, avoid_terms=["election"])
titles = [i["title"] for i in r.json()["items"]]
assert all("election" not in t.lower() for t in titles)
assert len(titles) == 1
def test_brief_filters_down_without_refill(client):
full = client.get("/api/brief").json()
assert len(full["items"]) == 2
muted = client.get("/api/brief", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
assert [i["topic"] for i in muted["items"]] == ["science"]
def test_category_counts_match_filtered_feed(client):
counts = client.get("/api/category-counts", params={"prefs": json.dumps({"mute_topics": ["health"]})}).json()
assert all(c["topic"] != "health" for c in counts)