c600145ba5
queries.feed was the main chokepoint, but several discovery paths have their own SQL. Apply the shared source exclusion to all of them so "no paywalls" is truly site-wide: - briefs.build_daily_brief: EXCLUDE paywalled candidates (was: demote) — never stored in a new brief. - queries.brief: stored-brief retrieval (covers /today + /api/brief) filters the paywalled source. - digest.digest_items + followed_digest_items: the morning email + "from what you follow" omit paywalled sources. - sitemap(): paywalled article pages excluded from the sitemap. All reuse queries.paywalled_source_ids (admin override still wins). Regression tests (test_paywall_exclusion.py): never stored in a new brief; /today + digest omit it; followed-source email omits it; Saved retains it; 'free' override restores eligibility. 423 backend tests green. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
263 lines
12 KiB
Python
263 lines
12 KiB
Python
"""Opt-in daily digest — a finite, calm morning email of today's brief.
|
||
|
||
Ritual, not capture: no streaks, no "you missed", no urgency, no unread counts.
|
||
One send per opted-in user per day, gated to a morning window in the site
|
||
timezone and deduped via digest_sends. On a thin day it skips quietly rather
|
||
than padding. Reuses the existing SMTP/email pipeline.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
import secrets
|
||
import sqlite3
|
||
from datetime import datetime
|
||
from html import escape
|
||
|
||
from . import email_send
|
||
from .localtime import local_now, local_today
|
||
from .paywall import is_paywalled, is_paywalled_for_source
|
||
from .queries import paywalled_source_ids
|
||
|
||
DIGEST_HOUR = int(os.environ.get("GOODNEWS_DIGEST_HOUR", "7"))
|
||
DIGEST_WINDOW_HOURS = 4 # send between DIGEST_HOUR and +4h, site-local
|
||
MIN_ITEMS = 4 # below this, skip the day rather than pad
|
||
|
||
|
||
def _base_url() -> str:
|
||
return os.environ.get("GOODNEWS_PUBLIC_BASE_URL", "https://upbeatbytes.com").rstrip("/")
|
||
|
||
|
||
def digest_items(conn: sqlite3.Connection, brief_date: str, limit: int = 7) -> list[dict]:
|
||
"""The brief's items with the bits a calm email needs (visible sources only)."""
|
||
pwx = paywalled_source_ids(conn)
|
||
pw_clause = f" AND a.source_id NOT IN ({','.join('?' * len(pwx))})" if pwx else ""
|
||
rows = conn.execute(
|
||
f"""
|
||
SELECT a.id, a.title, a.canonical_url, s.name AS source, s.paywall_override, sc.reason_text,
|
||
(SELECT summary FROM article_summaries WHERE article_id = a.id) AS summary
|
||
FROM daily_briefs b
|
||
JOIN daily_brief_items bi ON bi.brief_id = b.id
|
||
JOIN articles a ON a.id = bi.article_id
|
||
JOIN sources s ON s.id = a.source_id
|
||
LEFT JOIN article_scores sc ON sc.article_id = a.id
|
||
WHERE b.brief_date = ? AND s.content_visible = 1{pw_clause}
|
||
ORDER BY bi.rank
|
||
LIMIT ?
|
||
""",
|
||
(brief_date, *pwx, limit),
|
||
).fetchall()
|
||
items = []
|
||
for r in rows:
|
||
d = dict(r)
|
||
d["paywalled"] = is_paywalled_for_source(d["canonical_url"], d.get("paywall_override"))
|
||
items.append(d)
|
||
return items
|
||
|
||
|
||
def followed_digest_items(conn: sqlite3.Connection, user_id: int, exclude_ids, limit: int = 3) -> list[dict]:
|
||
"""Up to `limit` recent items from the user's followed sources/tags for the
|
||
"From what you follow" section — same accepted/non-dup/visible gate as the feed,
|
||
excluding what's already in the brief, capped to one per source so a single
|
||
follow can't dominate. Returns [] if they follow nothing (→ section omitted)."""
|
||
frows = conn.execute("SELECT kind, value FROM user_follows WHERE user_id = ?", (user_id,)).fetchall()
|
||
fsources = [int(r["value"]) for r in frows if r["kind"] == "source" and str(r["value"]).isdigit()]
|
||
ftags = [str(r["value"]).lower() for r in frows if r["kind"] == "tag"]
|
||
if not fsources and not ftags:
|
||
return []
|
||
ors, params = [], []
|
||
if fsources:
|
||
ors.append(f"a.source_id IN ({','.join('?' * len(fsources))})")
|
||
params += fsources
|
||
if ftags:
|
||
ors.append(
|
||
f"EXISTS (SELECT 1 FROM article_tags at WHERE at.article_id = a.id "
|
||
f"AND at.tag IN ({','.join('?' * len(ftags))}))"
|
||
)
|
||
params += ftags
|
||
pwx = paywalled_source_ids(conn)
|
||
pw_clause = f" AND a.source_id NOT IN ({','.join('?' * len(pwx))})" if pwx else ""
|
||
rows = conn.execute(
|
||
f"""
|
||
SELECT a.id, a.title, a.canonical_url, s.name AS source, s.paywall_override, a.source_id, sc.reason_text,
|
||
(SELECT summary FROM article_summaries WHERE article_id = a.id) AS summary
|
||
FROM articles a
|
||
JOIN sources s ON s.id = a.source_id
|
||
JOIN article_scores sc ON sc.article_id = a.id
|
||
WHERE sc.accepted = 1 AND a.duplicate_of IS NULL AND s.content_visible = 1{pw_clause}
|
||
AND ({' OR '.join(ors)})
|
||
ORDER BY COALESCE(a.published_at, a.discovered_at) DESC
|
||
LIMIT 30
|
||
""",
|
||
[*pwx, *params],
|
||
).fetchall()
|
||
exclude, per_source, out = set(exclude_ids), {}, []
|
||
for r in rows:
|
||
d = dict(r)
|
||
if d["id"] in exclude or per_source.get(d["source_id"], 0) >= 1:
|
||
continue
|
||
per_source[d["source_id"]] = 1
|
||
d["paywalled"] = is_paywalled_for_source(d["canonical_url"], d.get("paywall_override"))
|
||
out.append(d)
|
||
if len(out) >= limit:
|
||
break
|
||
return out
|
||
|
||
|
||
def _weekday(brief_date: str) -> str:
|
||
try:
|
||
return datetime.strptime(brief_date, "%Y-%m-%d").strftime("%A")
|
||
except (ValueError, TypeError):
|
||
return "today"
|
||
|
||
|
||
def _item_text_lines(it: dict, base: str) -> list[str]:
|
||
lines = [f"• {it['title']} ({it['source']})"]
|
||
if it.get("summary"):
|
||
lines.append(f" {it['summary']}")
|
||
if it.get("reason_text"):
|
||
lines.append(f" Why it's here: {it['reason_text']}")
|
||
lines.append(f" Read: {base}/a/{it['id']}")
|
||
lines.append(f" Source: {it['canonical_url']}\n")
|
||
return lines
|
||
|
||
|
||
def _item_html(it: dict, base: str) -> str:
|
||
summary = f'<div style="font-size:15px;line-height:1.5;color:#16263a">{escape(it["summary"])}</div>' if it.get("summary") else ""
|
||
why = f'<div style="font-size:13px;color:#5d6b78;margin-top:6px"><em>Why it’s here:</em> {escape(it["reason_text"])}</div>' if it.get("reason_text") else ""
|
||
lock = " \U0001f512" if it.get("paywalled") else ""
|
||
return (
|
||
'<div style="margin:0 0 22px;padding:0 0 18px;border-bottom:1px solid #e8e3d8">'
|
||
f'<a href="{base}/a/{it["id"]}" style="font-size:18px;font-weight:600;color:#16263a;text-decoration:none">{escape(it["title"])}</a>'
|
||
f'<div style="color:#5d6b78;font-size:13px;margin:3px 0 8px">{escape(it["source"])}</div>'
|
||
f'{summary}{why}'
|
||
'<div style="margin-top:10px;font-size:14px">'
|
||
f'<a href="{base}/a/{it["id"]}" style="color:#0083ad;text-decoration:none">Read on upbeatBytes</a>'
|
||
f' · <a href="{escape(it["canonical_url"])}" style="color:#5d6b78;text-decoration:none">Full story at source{lock}</a>'
|
||
'</div></div>'
|
||
)
|
||
|
||
|
||
def build_digest(items: list[dict], brief_date: str, unsub_url: str, base: str | None = None,
|
||
followed: list[dict] | None = None) -> tuple[str, str, str]:
|
||
"""Return (subject, text, html) for the digest — calm and dated, no urgency.
|
||
|
||
`followed` (optional) adds a small "From what you follow" section AFTER the
|
||
editorial brief — only when there are qualifying items; omitted otherwise.
|
||
The brief stays the star: subject and brief count are unchanged.
|
||
"""
|
||
base = base or _base_url()
|
||
followed = followed or []
|
||
n = len(items)
|
||
weekday = _weekday(brief_date)
|
||
subject = f"{weekday}'s upbeatBytes · {n} calm read{'' if n == 1 else 's'}"
|
||
if weekday == "today":
|
||
subject = f"Today's upbeatBytes · {n} calm reads"
|
||
|
||
text_lines = [
|
||
f"upbeatBytes — Daily Highlights",
|
||
f"{n} calm read{'' if n == 1 else 's'} for {weekday}.\n",
|
||
"Good morning. A small, hopeful handful of what's going right — and there's",
|
||
"always more waiting on the site whenever you want it.\n",
|
||
]
|
||
for it in items:
|
||
text_lines += _item_text_lines(it, base)
|
||
if followed:
|
||
text_lines.append("\n— From what you follow —\n")
|
||
for it in followed:
|
||
text_lines += _item_text_lines(it, base)
|
||
text_lines.append(f"That's today's highlights — more good news is always waiting at {base}. See you tomorrow.")
|
||
text_lines.append(f"\nTo stop these emails: {unsub_url}")
|
||
text = "\n".join(text_lines)
|
||
|
||
main_blocks = "".join(_item_html(it, base) for it in items)
|
||
followed_html = ""
|
||
if followed:
|
||
followed_html = (
|
||
# A darker, heavier rule marks the section change (the item separators
|
||
# above are a light #e8e3d8) — a clear shift from brief to personal picks.
|
||
'<div style="border-top:2px solid #9aa6b2;margin:2px 0 22px"></div>'
|
||
'<div style="font-size:20px;font-weight:700;letter-spacing:-0.01em;color:#0083ad;'
|
||
'margin:0 0 28px">From what you follow</div>'
|
||
+ "".join(_item_html(it, base) for it in followed)
|
||
)
|
||
html = (
|
||
'<div style="max-width:600px;margin:0 auto;padding:8px 4px;'
|
||
'font-family:-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;color:#16263a">'
|
||
# The real logo as a small hosted PNG (SVG isn't email-safe); alt text
|
||
# keeps the brand when a client blocks remote images.
|
||
f'<img src="{base}/logo-email.png" alt="upbeatBytes" width="180" '
|
||
'style="display:block;border:0;outline:none;text-decoration:none;height:auto;margin:0 0 2px">'
|
||
'<div style="font-size:11px;letter-spacing:0.14em;text-transform:uppercase;color:#0083ad;'
|
||
f'margin:5px 0 0">Daily Highlights · {escape(weekday)}</div>'
|
||
'<p style="font-size:15px;line-height:1.5;color:#3b4754;margin:28px 0 18px">'
|
||
'Good morning. A small, hopeful handful of what’s going right — and there’s always more '
|
||
f'<a href="{base}" style="color:#0083ad;text-decoration:none">waiting on the site</a> when you want it.</p>'
|
||
'<div style="border-top:1px solid #e8e3d8;margin:0 0 24px"></div>'
|
||
+ main_blocks
|
||
+ followed_html
|
||
+ '<p style="font-size:15px;color:#3f7048;margin:8px 0 0">That’s today’s highlights — more good news is '
|
||
f'always <a href="{base}" style="color:#3f7048">waiting on upbeatBytes</a>. See you tomorrow.</p>'
|
||
f'<p style="font-size:12px;color:#9aa6b2;margin-top:24px">You’re getting this because you turned on '
|
||
f'the daily digest. <a href="{unsub_url}" style="color:#9aa6b2">Unsubscribe</a>.</p>'
|
||
'</div>'
|
||
)
|
||
return subject, text, html
|
||
|
||
|
||
def unsub_url(user: dict, base: str | None = None) -> str:
|
||
base = base or _base_url()
|
||
return f"{base}/api/digest/unsubscribe?u={user['id']}&t={user['digest_unsub_token']}"
|
||
|
||
|
||
def send_due_digests(conn: sqlite3.Connection, force: bool = False, base: str | None = None) -> int:
|
||
"""Send today's digest to opted-in users who haven't received it yet.
|
||
|
||
Gated to the morning window unless force=True (manual CLI). Skips quietly on
|
||
a thin day. Deduped via digest_sends. Returns the number sent.
|
||
"""
|
||
if not force:
|
||
hour = local_now().hour
|
||
if hour < DIGEST_HOUR or hour >= DIGEST_HOUR + DIGEST_WINDOW_HOURS:
|
||
return 0
|
||
brief_date = local_today()
|
||
items = digest_items(conn, brief_date)
|
||
if len(items) < MIN_ITEMS:
|
||
return 0
|
||
base = base or _base_url()
|
||
|
||
users = conn.execute(
|
||
"""
|
||
SELECT u.id, u.email, u.digest_unsub_token
|
||
FROM users u
|
||
WHERE u.digest_enabled = 1
|
||
AND NOT EXISTS (SELECT 1 FROM digest_sends d WHERE d.user_id = u.id AND d.brief_date = ?)
|
||
""",
|
||
(brief_date,),
|
||
).fetchall()
|
||
|
||
sent = 0
|
||
for row in users:
|
||
user = dict(row)
|
||
if not user.get("digest_unsub_token"):
|
||
token = secrets.token_urlsafe(18)
|
||
conn.execute("UPDATE users SET digest_unsub_token = ? WHERE id = ?", (token, user["id"]))
|
||
conn.commit()
|
||
user["digest_unsub_token"] = token
|
||
link = unsub_url(user, base)
|
||
followed = followed_digest_items(conn, user["id"], [it["id"] for it in items])
|
||
subject, text, html = build_digest(items, brief_date, link, base, followed=followed)
|
||
# RFC 2369 / 8058: let inboxes offer native one-click unsubscribe.
|
||
headers = {"List-Unsubscribe": f"<{link}>", "List-Unsubscribe-Post": "List-Unsubscribe=One-Click"}
|
||
try:
|
||
email_send.send_email(user["email"], subject, text, html=html, headers=headers)
|
||
except Exception: # noqa: BLE001 — one bad send shouldn't stop the rest; retry next window
|
||
continue
|
||
conn.execute(
|
||
"INSERT OR IGNORE INTO digest_sends (user_id, brief_date, item_count) VALUES (?, ?, ?)",
|
||
(user["id"], brief_date, len(items)),
|
||
)
|
||
conn.commit()
|
||
sent += 1
|
||
return sent
|