Files
thejayman77 068073423f Initial commit: goodNews constructive-news ingestion prototype
Local-first RSS/Atom ingestion pipeline with metadata-only storage,
heuristic + local-LLM scoring, and daily brief builder.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 00:48:26 +00:00

170 lines
3.8 KiB
Python

from __future__ import annotations
import re
POSITIVE_TERMS = {
"breakthrough",
"progress",
"improve",
"improves",
"improved",
"solution",
"solutions",
"recovery",
"restore",
"restores",
"rescued",
"rescue",
"volunteer",
"community",
"donate",
"donation",
"cure",
"treatment",
"therapy",
"clean energy",
"renewable",
"conservation",
"protect",
"protects",
"restoration",
"kindness",
"hope",
"first",
"record",
}
AGENCY_TERMS = {
"how",
"helps",
"helping",
"protect",
"protects",
"builds",
"creates",
"launches",
"teaches",
"learn",
"guide",
"tool",
"program",
"initiative",
"effort",
"plan",
"rebuild",
}
CORTISOL_TERMS = {
"war",
"killed",
"dead",
"death",
"murder",
"shooting",
"attack",
"crisis",
"catastrophe",
"disaster",
"collapse",
"panic",
"warning",
"threat",
"fear",
"fears",
"lawsuit",
"scandal",
}
RAGEBAIT_TERMS = {
"slams",
"blasts",
"furious",
"outrage",
"rage",
"shocking",
"you won't believe",
"sparks backlash",
"destroyed",
"humiliates",
}
PR_TERMS = {
"announces",
"unveils",
"funding round",
"raises",
"partnership",
"brand",
"sponsored",
"press release",
}
WORD_RE = re.compile(r"[a-z0-9']+")
def _count_terms(text: str, terms: set[str]) -> int:
lowered = text.lower()
words = set(WORD_RE.findall(lowered))
count = 0
for term in terms:
if " " in term:
count += 1 if term in lowered else 0
elif term in words:
count += 1
return count
def score_article(title: str, description: str | None, source_pr_risk: int) -> dict:
text = f"{title}. {description or ''}"
positive = _count_terms(text, POSITIVE_TERMS)
agency = _count_terms(text, AGENCY_TERMS)
cortisol = _count_terms(text, CORTISOL_TERMS)
ragebait = _count_terms(text, RAGEBAIT_TERMS)
pr_terms = _count_terms(text, PR_TERMS)
constructive_score = min(10, 2 + positive * 2 + agency)
agency_score = min(10, 1 + agency * 2)
cortisol_score = min(10, cortisol * 3)
ragebait_score = min(10, ragebait * 4)
pr_risk_score = min(10, source_pr_risk + pr_terms * 2)
human_benefit_score = min(10, positive * 2 + agency)
novelty_score = 5
accepted = (
constructive_score >= 5
and cortisol_score <= 5
and ragebait_score <= 3
and pr_risk_score <= 7
)
if accepted:
reason_code = "heuristic_constructive_candidate"
reason_text = "Constructive or agency-oriented language with low obvious cortisol/ragebait signals."
elif ragebait_score > 3:
reason_code = "heuristic_reject_ragebait_language"
reason_text = "Headline or snippet contains outrage-oriented language."
elif cortisol_score > 5:
reason_code = "heuristic_reject_cortisol_heavy"
reason_text = "Headline or snippet appears tragedy, threat, conflict, or crisis centered."
elif pr_risk_score > 7:
reason_code = "heuristic_reject_pr_risk"
reason_text = "Headline or source has signs of corporate PR framing."
else:
reason_code = "heuristic_needs_review"
reason_text = "Not enough constructive signal for automatic acceptance."
return {
"constructive_score": constructive_score,
"cortisol_score": cortisol_score,
"ragebait_score": ragebait_score,
"agency_score": agency_score,
"human_benefit_score": human_benefit_score,
"novelty_score": novelty_score,
"pr_risk_score": pr_risk_score,
"accepted": 1 if accepted else 0,
"reason_code": reason_code,
"reason_text": reason_text,
"model_name": "heuristic-v0",
}