89c0fbe1f6
The deploy pipeline runs from the working tree, so a wave of shipped features
had never been committed. This snapshots git to what's actually running.
SEO impression recovery (live + verified):
- Duplicate /a/{id} now 301-redirect to their canonical twin instead of 404
(a hard 404 silently dropped already-indexed URLs and tanked impressions).
- Dedup representative selection reworked: accepted/serveable -> established
rep (URL stability) -> quality score, so an accepted page never retires to a
rejected rep and an indexed canonical doesn't churn when a newer twin arrives.
- HEAD /a/{id} returns the same status as GET (api_route GET+HEAD) instead of
falling through to the static mount and 404ing.
- `dedup --force-recluster`: cycle-locked, model-free re-cluster to re-apply the
policy to the existing corpus (shared cycle_lock context manager).
- CLI honors GOODNEWS_DB for its default --db (was silently ignored).
Publishing Desk (admin tool to post highlights to X via Web Intents):
- publishing.py queue/rank/handle-resolution; admin UI; full searchable emoji
picker (bundled data, no CDN) for the blurb editor.
Play games + site:
- Bloom (word-wheel), Memory Match, daily ritual set, Zen Den (dev-gated).
- English-only language gate; source prospecting; paywall + dedup hardening.
Tests: full suite green (349). Ignores tightened (node_modules, data/*.db).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
64 lines
3.0 KiB
Python
64 lines
3.0 KiB
Python
"""English-only gate: non-English articles are HELD (reason_code='non_english'),
|
|
preserved (not deleted) and distinct from calm-filter rejections, so they don't
|
|
penalize a multilingual source and can be revisited when translation lands."""
|
|
from goodnews import queries
|
|
from goodnews.db import connect, init_db
|
|
from goodnews.llm import normalize_scores, upsert_article_score
|
|
|
|
|
|
def _data(**kw):
|
|
base = {
|
|
"constructive_score": 7, "cortisol_score": 1, "ragebait_score": 1, "agency_score": 5,
|
|
"human_benefit_score": 6, "novelty_score": 4, "pr_risk_score": 2, "accepted": True,
|
|
"topic": "science", "flavor": "discovery", "tags": [],
|
|
"reason_code": "ok", "reason_text": "good",
|
|
}
|
|
base.update(kw)
|
|
return base
|
|
|
|
|
|
def test_english_passes_through():
|
|
s = normalize_scores(_data(language="en"), "m")
|
|
assert s["accepted"] == 1 and s["reason_code"] == "ok" and s["language"] == "en"
|
|
|
|
|
|
def test_en_variants_count_as_english():
|
|
for lang in ("en-US", "EN", "en_us", "en-GB"):
|
|
assert normalize_scores(_data(language=lang), "m")["accepted"] == 1
|
|
|
|
|
|
def test_non_english_is_held_not_a_rejection():
|
|
s = normalize_scores(_data(language="de"), "m")
|
|
assert s["accepted"] == 0
|
|
assert s["reason_code"] == "non_english" # distinct bucket, not a calm-filter reject
|
|
assert s["language"] == "de"
|
|
assert "non-English" in s["reason_text"]
|
|
|
|
|
|
def test_missing_or_unknown_language_defaults_to_english():
|
|
# a model hiccup must never silently drop genuine English content
|
|
assert normalize_scores(_data(language=""), "m")["accepted"] == 1
|
|
assert normalize_scores(_data(language="und"), "m")["accepted"] == 1
|
|
assert normalize_scores(_data(), "m")["accepted"] == 1 # no language key at all
|
|
|
|
|
|
def test_non_english_buckets_even_a_content_reject():
|
|
# a non-English item that was also content-rejected is still 'held', so source
|
|
# metrics can separate language-holds from calm rejections cleanly
|
|
s = normalize_scores(_data(language="es", accepted=False, reason_code="ragebait"), "m")
|
|
assert s["accepted"] == 0 and s["reason_code"] == "non_english"
|
|
|
|
|
|
def test_language_persisted_structurally_and_inspector_marks_held():
|
|
c = connect(":memory:"); init_db(c)
|
|
c.execute("INSERT INTO sources (id,name,feed_url,trust_score) VALUES (1,'S','http://s/f',5)")
|
|
c.execute("INSERT INTO articles (id,source_id,canonical_url,title,url_hash) VALUES (1,1,'http://x','T','h1')")
|
|
c.commit()
|
|
upsert_article_score(c, 1, normalize_scores(_data(language="de"), "m"))
|
|
row = c.execute("SELECT accepted, reason_code, language FROM article_scores WHERE article_id=1").fetchone()
|
|
assert row["language"] == "de" and row["reason_code"] == "non_english" and row["accepted"] == 0 # structured, not parsed
|
|
# inspector: shows under 'held', flagged held=True, and NOT under 'rejected'
|
|
held = queries.source_articles(c, 1, filter="held")
|
|
assert len(held) == 1 and held[0]["held"] is True
|
|
assert queries.source_articles(c, 1, filter="rejected") == []
|