From a5cea7cd7431a03ac751f826f91d7cd589cf1e79 Mon Sep 17 00:00:00 2001
From: jay <thejayman77@gmail.com>
Date: Tue, 9 Jun 2026 09:10:57 -0400
Subject: [PATCH] Feedback reply: admin-only WYSIWYG editor (server stays the
 adult)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the Markdown composer with a small contenteditable WYSIWYG (Codex
greenlit for this narrow, admin-only surface).

* markup.py: render_reply_html → sanitize_reply_html + reply_html_to_text.
  Allowlist rebuild via stdlib HTMLParser — keeps strong/em/p/br/ul/ol/li and
  span ONLY with a whitelisted font-size (13/15/18/22px); normalizes b→strong,
  i→em, div→p, <font size> → safe span; drops links/images/arbitrary styles
  (content kept as escaped text) and discards script/style content entirely.
* API: FeedbackReplyBody.html (raw editor HTML); endpoint sanitizes → message_html,
  derives plain text → stored message + the email text/plain part. Unchanged:
  multipart send, store-on-success, conn released during SMTP, mark-read, 404/400/422.
* Frontend: contenteditable editor + toolbar (Bold/Italic/Size/• List/1. List),
  execCommand with styleWithCSS=false for semantic tags, font size wraps the
  selection in a fixed-px span, paste intercepted as plain text. No links yet.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 frontend/src/routes/admin/+page.svelte | 128 +++++++++++--------
 goodnews/api.py                        |  15 +--
 goodnews/markup.py                     | 164 +++++++++++++++++++------
 tests/test_feedback.py                 |  13 +-
 tests/test_markup.py                   |  48 +++++---
 5 files changed, 254 insertions(+), 114 deletions(-)
diff --git a/frontend/src/routes/admin/+page.svelte b/frontend/src/routes/admin/+page.svelte
index 87f7b79..9e363a4 100644
--- a/frontend/src/routes/admin/+page.svelte
+++ b/frontend/src/routes/admin/+page.svelte
@@ -136,52 +136,58 @@
     }
   }
 
-  // In-site reply (Markdown-ish: textarea + toolbar; server renders sanitized HTML).
+  // In-site reply: small admin-only WYSIWYG (contenteditable + execCommand).
+  // The editor's HTML is sanitized SERVER-SIDE; the client just collects it.
   let replyingId = $state(null);
-  let replyText = $state('');
   let replyBusy = $state(false);
   let replyErr = $state('');
-  let replyTextarea = $state(null);
-  function openReply(f) { replyingId = f.id; replyText = ''; replyErr = ''; }
-  function cancelReply() { replyingId = null; replyText = ''; replyErr = ''; }
+  let replyEmpty = $state(true);
+  let replyEditor = $state(null);
+  const SIZE_PX = { small: 13, normal: 15, large: 18, xlarge: 22 };
 
-  // Wrap the current selection (e.g. **bold**), restoring focus + selection.
-  function mdWrap(token) {
-    const el = replyTextarea;
-    if (!el) return;
-    const s = el.selectionStart, e = el.selectionEnd;
-    const sel = replyText.slice(s, e) || 'text';
-    replyText = replyText.slice(0, s) + token + sel + token + replyText.slice(e);
-    queueMicrotask(() => {
-      el.focus();
-      el.selectionStart = s + token.length;
-      el.selectionEnd = s + token.length + sel.length;
-    });
+  function openReply(f) { replyingId = f.id; replyErr = ''; replyEmpty = true; }
+  function cancelReply() { replyingId = null; replyErr = ''; }
+  function focusEditor(node) { queueMicrotask(() => node.focus()); }
+  function onEditorInput() { replyEmpty = !(replyEditor?.textContent || '').trim(); }
+  // Paste as plain text — never let rich/pasted HTML into the editor.
+  function onPaste(e) {
+    e.preventDefault();
+    const text = (e.clipboardData || window.clipboardData).getData('text/plain');
+    document.execCommand('insertText', false, text);
   }
-  // Prefix each selected line (e.g. "- " or "## ").
-  function mdPrefix(prefix) {
-    const el = replyTextarea;
-    if (!el) return;
-    const s = el.selectionStart, e = el.selectionEnd;
-    const start = replyText.lastIndexOf('\n', s - 1) + 1;
-    let end = replyText.indexOf('\n', e);
-    if (end === -1) end = replyText.length;
-    const block = replyText.slice(start, end) || 'item';
-    const prefixed = block.split('\n').map((ln) => prefix + ln).join('\n');
-    replyText = replyText.slice(0, start) + prefixed + replyText.slice(end);
-    queueMicrotask(() => el.focus());
+  // Bold/italic/lists via execCommand; styleWithCSS=false → semantic <b>/<i>/<ul>.
+  function cmd(c) {
+    replyEditor?.focus();
+    try { document.execCommand('styleWithCSS', false, false); } catch { /* not all browsers */ }
+    document.execCommand(c, false, null);
+    onEditorInput();
+  }
+  // Font size: wrap the selection in a span with a fixed, whitelisted px.
+  function setSize(key) {
+    const px = SIZE_PX[key];
+    if (!px) return;
+    replyEditor?.focus();
+    const sel = window.getSelection();
+    if (!sel || !sel.rangeCount) return;
+    const range = sel.getRangeAt(0);
+    if (range.collapsed) return; // need a selection to size
+    const span = document.createElement('span');
+    span.style.fontSize = px + 'px';
+    try { span.appendChild(range.extractContents()); range.insertNode(span); } catch { /* ignore */ }
+    sel.removeAllRanges();
+    onEditorInput();
   }
   async function sendReply(f) {
-    const msg = replyText.trim();
-    if (!msg || replyBusy) return;
+    if (replyEmpty || replyBusy) return;
+    const html = replyEditor?.innerHTML || '';
     replyBusy = true; replyErr = '';
     try {
-      const res = await postJSON(`/api/admin/feedback/${f.id}/reply`, { message: msg });
+      const res = await postJSON(`/api/admin/feedback/${f.id}/reply`, { html });
       f.replies = [...(f.replies || []), res.reply];
       f.read_at = f.read_at || res.reply.sent_at; // server marked it read
-      replyingId = null; replyText = '';
+      replyingId = null;
     } catch (e) {
-      replyErr = e?.message || 'Could not send — the draft is kept.';
+      replyErr = e?.message || 'Could not send — your draft is kept.';
     } finally {
       replyBusy = false;
     }
@@ -475,15 +481,33 @@
 
                 {#if replyingId === f.id}
                   <div class="composer">
-                    <div class="mdbar">
-                      <button type="button" class="mdbtn" title="Bold (**…**)" onclick={() => mdWrap('**')}><b>B</b></button>
-                      <button type="button" class="mdbtn" title="Bullet list" onclick={() => mdPrefix('- ')}>• List</button>
-                      <button type="button" class="mdbtn" title="Heading" onclick={() => mdPrefix('## ')}>H</button>
+                    <div class="rtbar">
+                      <button type="button" class="rtbtn" title="Bold" onmousedown={(e) => e.preventDefault()} onclick={() => cmd('bold')}><b>B</b></button>
+                      <button type="button" class="rtbtn" title="Italic" onmousedown={(e) => e.preventDefault()} onclick={() => cmd('italic')}><i>I</i></button>
+                      <select class="szsel" title="Text size" onmousedown={(e) => e.stopPropagation()} onchange={(e) => { setSize(e.currentTarget.value); e.currentTarget.value = ''; }}>
+                        <option value="">Size</option>
+                        <option value="small">Small</option>
+                        <option value="normal">Normal</option>
+                        <option value="large">Large</option>
+                        <option value="xlarge">X-Large</option>
+                      </select>
+                      <button type="button" class="rtbtn" title="Bullet list" onmousedown={(e) => e.preventDefault()} onclick={() => cmd('insertUnorderedList')}>• List</button>
+                      <button type="button" class="rtbtn" title="Numbered list" onmousedown={(e) => e.preventDefault()} onclick={() => cmd('insertOrderedList')}>1. List</button>
                     </div>
-                    <textarea bind:this={replyTextarea} bind:value={replyText} rows="4" placeholder="Write a reply… **bold**, - bullets, ## heading"></textarea>
+                    <div
+                      class="rtedit"
+                      contenteditable="true"
+                      bind:this={replyEditor}
+                      use:focusEditor
+                      oninput={onEditorInput}
+                      onpaste={onPaste}
+                      role="textbox"
+                      aria-multiline="true"
+                      aria-label="Write a reply"
+                    ></div>
                     {#if replyErr}<p class="cerr">{replyErr}</p>{/if}
                     <div class="cbtns">
-                      <button class="csend" onclick={() => sendReply(f)} disabled={replyBusy || !replyText.trim()}>
+                      <button class="csend" onclick={() => sendReply(f)} disabled={replyBusy || replyEmpty}>
                         {replyBusy ? 'Sending…' : 'Send reply'}
                       </button>
                       <button class="act" onclick={cancelReply}>Cancel</button>
@@ -669,18 +693,24 @@
   .rep .repmsg :global(h4),
   .rep .repmsg :global(h5) { margin: 6px 0 4px; font-size: 0.95rem; }
   .composer { margin: 10px 0 0; }
-  .mdbar { display: flex; gap: 6px; margin-bottom: 6px; }
-  .mdbtn {
-    font: inherit; font-size: 0.78rem; background: var(--surface); border: 1px solid var(--line);
-    color: var(--ink); border-radius: 7px; padding: 3px 9px; cursor: pointer; line-height: 1.4;
+  .rtbar { display: flex; gap: 6px; margin-bottom: 6px; flex-wrap: wrap; align-items: center; }
+  .rtbtn {
+    font: inherit; font-size: 0.82rem; background: var(--surface); border: 1px solid var(--line);
+    color: var(--ink); border-radius: 7px; padding: 3px 10px; cursor: pointer; line-height: 1.4; min-width: 30px;
   }
-  .mdbtn:hover { border-color: var(--accent); color: var(--accent-deep); }
-  .composer textarea {
-    width: 100%; box-sizing: border-box; font: inherit; font-size: 0.9rem;
+  .rtbtn:hover { border-color: var(--accent); color: var(--accent-deep); }
+  .szsel {
+    font: inherit; font-size: 0.8rem; background: var(--surface); border: 1px solid var(--line);
+    color: var(--ink); border-radius: 7px; padding: 3px 6px; cursor: pointer;
+  }
+  .rtedit {
+    min-height: 90px; box-sizing: border-box; font-size: 0.92rem; line-height: 1.5;
     padding: 10px 12px; border: 1px solid var(--line); border-radius: 10px;
-    background: var(--bg); color: var(--ink); resize: vertical;
+    background: var(--bg); color: var(--ink);
   }
-  .composer textarea:focus { outline: none; border-color: var(--accent); }
+  .rtedit:focus { outline: none; border-color: var(--accent); }
+  .rtedit :global(ul), .rtedit :global(ol) { margin: 4px 0; padding-left: 22px; }
+  .rtedit :global(p) { margin: 0 0 6px; }
   .cerr { margin: 6px 0 0; color: #9a3b3b; font-size: 0.82rem; }
   .cbtns { display: flex; align-items: center; gap: 12px; margin-top: 8px; }
   .csend {
diff --git a/goodnews/api.py b/goodnews/api.py
index 616db78..31a6286 100644
--- a/goodnews/api.py
+++ b/goodnews/api.py
@@ -32,7 +32,7 @@ from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 
 from . import auth, email_send, feeds, oauth_google, queries, share, summarize
-from .markup import render_reply_html
+from .markup import reply_html_to_text, sanitize_reply_html
 from .db import connect
 from .filters import filter_articles, prefs_from_json
 from .hero import safe_to_lead
@@ -383,7 +383,7 @@ class FeedbackReadBody(BaseModel):
 
 
 class FeedbackReplyBody(BaseModel):
-    message: str = ""
+    html: str = ""  # raw editor HTML — sanitized server-side before use
 
 
 class SourceActiveBody(BaseModel):
@@ -843,8 +843,10 @@ def create_app() -> FastAPI:
 
     @app.post("/api/admin/feedback/{fid}/reply")
     def admin_feedback_reply(fid: int, body: FeedbackReplyBody, request: Request) -> dict:
-        message = (body.message or "").strip()[:4000]
-        if not message:
+        # Sanitize the editor HTML to our allowlist; derive the plain-text fallback.
+        reply_html = sanitize_reply_html(body.html)[:8000]
+        reply_text = reply_html_to_text(reply_html)
+        if not reply_text:
             raise HTTPException(status_code=422, detail="Reply message is required.")
         # 1. Validate + gather, then release the DB connection — SMTP can take
         #    ~20s and shouldn't keep a connection open.
@@ -856,11 +858,10 @@ def create_app() -> FastAPI:
             if not fb["contact_email"]:
                 raise HTTPException(status_code=400, detail="No reply address for this feedback.")
             admin_id, to, original = admin["id"], fb["contact_email"], fb["message"]
-        reply_html = render_reply_html(message)  # tiny safe Markdown subset
         # 2. Send with no DB connection held; only record a reply that actually
         #    went out, so the UI can keep the draft on failure.
         try:
-            email_send.send_feedback_reply(to, message, reply_html, original)
+            email_send.send_feedback_reply(to, reply_text, reply_html, original)
         except Exception as exc:  # noqa: BLE001 — surface any SMTP failure to the admin
             raise HTTPException(status_code=502, detail=f"Could not send the reply: {exc}")
         # 3. Record the sent reply + mark the item read.
@@ -868,7 +869,7 @@ def create_app() -> FastAPI:
             cur = conn.execute(
                 "INSERT INTO feedback_replies (feedback_id, user_id, message, message_html, sent_to) "
                 "VALUES (?, ?, ?, ?, ?)",
-                (fid, admin_id, message, reply_html, to),
+                (fid, admin_id, reply_text, reply_html, to),
             )
             conn.execute(
                 "UPDATE feedback SET read_at = COALESCE(read_at, CURRENT_TIMESTAMP) WHERE id = ?", (fid,)
diff --git a/goodnews/markup.py b/goodnews/markup.py
index db83382..cbd8159 100644
--- a/goodnews/markup.py
+++ b/goodnews/markup.py
@@ -1,48 +1,138 @@
-"""A tiny, safe Markdown subset → HTML, for admin feedback replies.
+"""Server-side sanitizing for admin feedback replies.
 
-Everything is HTML-escaped FIRST, then a small whitelist of structures is
-introduced: bold (**…**), bullet lists (- / *), headings (#/##/###),
-paragraphs, and line breaks. No links, no attributes, no inline styles, no raw
-HTML passthrough — so the output is safe to render and to email. Authored only
-by admins, but sanitized on principle regardless.
+The reply composer is a small admin-only WYSIWYG (contenteditable + execCommand).
+Browsers emit inconsistent HTML, so the server is the adult in the room: it
+takes whatever the editor produced and rebuilds it from a strict allowlist —
+
+  strong, em, p, br, ul, ol, li, and span ONLY with a whitelisted font-size.
+
+Everything else (links, images, scripts, arbitrary styles, colors, fonts) is
+dropped; disallowed-tag *content* is kept as escaped text, except script/style
+whose content is discarded entirely. No raw HTML is ever trusted downstream —
+we store and send only this sanitized output.
 """
 
 from __future__ import annotations
 
-import html
+import html as _html
 import re
+from html.parser import HTMLParser
 
-_BULLET_RE = re.compile(r"^\s*[-*]\s+")
-_HEADING_RE = re.compile(r"^\s*(#{1,3})\s+")
-_BOLD_RE = re.compile(r"\*\*(.+?)\*\*")
-_HEADING_TAG = {1: "h3", 2: "h4", 3: "h5"}
+# Canonicalise the tags we keep (b→strong, i→em, div→p); anything not here is dropped.
+_TAG_MAP = {
+    "b": "strong", "strong": "strong",
+    "i": "em", "em": "em",
+    "p": "p", "div": "p",
+    "br": "br",
+    "ul": "ul", "ol": "ol", "li": "li",
+    "span": "span",
+}
+_ALLOWED_SIZES = {"13px", "15px", "18px", "22px"}
+_DROP_CONTENT = {"script", "style"}
+_FONT_SIZE_RE = re.compile(r"font-size:\s*(\d+px)", re.I)
+# execCommand fontSize emits <font size="1..7">; map the common ones to our scale.
+_FONT_TAG_SIZE = {"1": "13px", "2": "13px", "3": "15px", "4": "18px", "5": "22px", "6": "22px", "7": "22px"}
 
 
-def _inline(s: str) -> str:
-    """Escape, then apply the only inline transform we allow: **bold**."""
-    s = html.escape(s, quote=False)
-    return _BOLD_RE.sub(r"<strong>\1</strong>", s)
+class _Sanitizer(HTMLParser):
+    def __init__(self) -> None:
+        super().__init__(convert_charrefs=True)
+        self.out: list[str] = []
+        self.open: list[str | None] = []  # emitted canonical tag, or None for a dropped wrapper
+        self.skip = 0  # depth inside script/style (content discarded)
 
-
-def render_reply_html(text: str) -> str:
-    """Render the supported Markdown subset to sanitized HTML (or '' if empty)."""
-    text = (text or "").replace("\r\n", "\n").replace("\r", "\n").strip()
-    if not text:
-        return ""
-    out: list[str] = []
-    for block in re.split(r"\n\s*\n", text):
-        lines = [ln for ln in block.split("\n")]
-        nonempty = [ln for ln in lines if ln.strip()]
-        if not nonempty:
-            continue
-        if all(_BULLET_RE.match(ln) for ln in nonempty):
-            items = "".join("<li>" + _inline(_BULLET_RE.sub("", ln)) + "</li>" for ln in nonempty)
-            out.append("<ul>" + items + "</ul>")
-        elif len(nonempty) == 1 and _HEADING_RE.match(nonempty[0]):
-            level = len(_HEADING_RE.match(nonempty[0]).group(1))
-            content = _HEADING_RE.sub("", nonempty[0])
-            tag = _HEADING_TAG[level]
-            out.append(f"<{tag}>" + _inline(content) + f"</{tag}>")
+    def handle_starttag(self, tag, attrs):
+        t = tag.lower()
+        if t in _DROP_CONTENT:
+            self.skip += 1
+            self.open.append(None)
+            return
+        if self.skip:
+            return
+        if t == "br":
+            self.out.append("<br>")
+            return  # void — no stack entry
+        size = self._size(t, attrs)
+        if t == "span":
+            if size:
+                self.out.append(f'<span style="font-size:{size}">')
+                self.open.append("span")
+            else:
+                self.open.append(None)  # unstyled/!whitelisted span → drop wrapper, keep text
+            return
+        if t == "font":
+            if size:
+                self.out.append(f'<span style="font-size:{size}">')
+                self.open.append("span")  # normalise <font size> → safe span
+            else:
+                self.open.append(None)
+            return
+        canon = _TAG_MAP.get(t)
+        if canon:
+            self.out.append(f"<{canon}>")
+            self.open.append(canon)
         else:
-            out.append("<p>" + "<br>".join(_inline(ln) for ln in nonempty) + "</p>")
-    return "".join(out)
+            self.open.append(None)  # disallowed tag — drop tag, keep children
+
+    def handle_startendtag(self, tag, attrs):
+        if tag.lower() == "br" and not self.skip:
+            self.out.append("<br>")
+
+    def handle_endtag(self, tag):
+        t = tag.lower()
+        if t in _DROP_CONTENT:
+            if self.skip:
+                self.skip -= 1
+            if self.open:
+                self.open.pop()
+            return
+        if self.skip or t == "br":
+            return
+        if not self.open:
+            return
+        canon = self.open.pop()
+        if canon:
+            self.out.append(f"</{canon}>")
+
+    def handle_data(self, data):
+        if not self.skip:
+            self.out.append(_html.escape(data, quote=False))
+
+    def _size(self, tag, attrs):
+        for k, v in attrs:
+            if not v:
+                continue
+            if k.lower() == "style":
+                m = _FONT_SIZE_RE.search(v)
+                if m and m.group(1) in _ALLOWED_SIZES:
+                    return m.group(1)
+            elif tag == "font" and k.lower() == "size":
+                return _FONT_TAG_SIZE.get(v.strip())
+        return None
+
+
+def sanitize_reply_html(raw: str) -> str:
+    """Rebuild editor HTML from the strict allowlist (or '' if it has no content)."""
+    if not raw:
+        return ""
+    p = _Sanitizer()
+    p.feed(raw)
+    p.close()
+    html = "".join(p.out)
+    # If nothing but markup/whitespace survived, treat as empty.
+    if not re.sub(r"<[^>]+>", "", html).strip():
+        return ""
+    return html.strip()
+
+
+def reply_html_to_text(clean_html: str) -> str:
+    """Plain-text fallback derived from the already-sanitized HTML."""
+    if not clean_html:
+        return ""
+    s = re.sub(r"</li>", "\n", clean_html)
+    s = re.sub(r"<li>", "- ", s)
+    s = re.sub(r"</p>", "\n\n", s)
+    s = re.sub(r"<br\s*/?>", "\n", s)
+    s = re.sub(r"<[^>]+>", "", s)
+    s = _html.unescape(s)
+    return re.sub(r"\n{3,}", "\n\n", s).strip()
diff --git a/tests/test_feedback.py b/tests/test_feedback.py
index 5b4621c..2c158a7 100644
--- a/tests/test_feedback.py
+++ b/tests/test_feedback.py
@@ -88,7 +88,7 @@ def test_feedback_admin_actions_gated(tmp_path, monkeypatch):
     anon = TestClient(app)
     assert anon.post("/api/admin/feedback/1/read", json={"read": True}).status_code == 401
     assert anon.delete("/api/admin/feedback/1").status_code == 401
-    assert anon.post("/api/admin/feedback/1/reply", json={"message": "hi"}).status_code == 401
+    assert anon.post("/api/admin/feedback/1/reply", json={"html": "hi"}).status_code == 401
 
 
 def test_feedback_actions_404_on_missing_id(tmp_path, monkeypatch):
@@ -109,10 +109,10 @@ def test_feedback_reply_sends_stores_marks_read(tmp_path, monkeypatch):
     tc.post("/api/auth/email/start", json={"email": "boss@x.com"})
     tc.post("/api/auth/email/verify", json={"token": link["l"].split("token=")[1]})
     fid = tc.get("/api/admin/feedback").json()[0]["id"]
-    r = tc.post(f"/api/admin/feedback/{fid}/reply", json={"message": "Yes — a **companion app** is coming."})
+    r = tc.post(f"/api/admin/feedback/{fid}/reply", json={"html": "Yes — a <b>companion app</b> is coming."})
     assert r.status_code == 200
     assert sent["to"] == "reader@x.com" and "companion app" in sent["msg"] and "is there an app?" in sent["orig"]
-    assert "<strong>companion app</strong>" in sent["html"]   # rendered markdown sent as HTML
+    assert "<strong>companion app</strong>" in sent["html"]   # sanitized editor HTML (b→strong)
     fb = tc.get("/api/admin/feedback").json()[0]
     assert fb["read_at"] is not None                       # marked read on reply
     rep = fb["replies"][0]
@@ -125,7 +125,6 @@ def test_feedback_reply_requires_address_and_message(tmp_path, monkeypatch):
     monkeypatch.setattr("goodnews.email_send.send_feedback_reply",
                         lambda *a, **k: (_ for _ in ()).throw(AssertionError("should not send")))
     fid = tc.get("/api/admin/feedback").json()[0]["id"]
-    assert tc.post(f"/api/admin/feedback/{fid}/reply", json={"message": ""}).status_code == 422
-    assert tc.post(f"/api/admin/feedback/{fid}/reply", json={"message": "hi"}).status_code == 400  # no address
-    assert tc.post("/api/admin/feedback/9999/reply", json={"message": "hi"}).status_code == 404
-    import goodnews.api as api  # already reloaded by _admin_client's app
+    assert tc.post(f"/api/admin/feedback/{fid}/reply", json={"html": "  "}).status_code == 422
+    assert tc.post(f"/api/admin/feedback/{fid}/reply", json={"html": "hi"}).status_code == 400  # no address
+    assert tc.post("/api/admin/feedback/9999/reply", json={"html": "hi"}).status_code == 404
diff --git a/tests/test_markup.py b/tests/test_markup.py
index 4f74a8f..b7fe947 100644
--- a/tests/test_markup.py
+++ b/tests/test_markup.py
@@ -1,23 +1,43 @@
-from goodnews.markup import render_reply_html as r
+from goodnews.markup import sanitize_reply_html as s, reply_html_to_text as t2
 
 
-def test_escapes_before_formatting():
-    out = r("<script>alert(1)</script> and **bold**")
-    assert "<script>" not in out and "&lt;script&gt;" in out
-    assert "<strong>bold</strong>" in out
+def test_keeps_allowed_formatting_and_normalizes_tags():
+    out = s("<b>bold</b> and <i>it</i> and <strong>x</strong> and <em>y</em>")
+    assert out == "<strong>bold</strong> and <em>it</em> and <strong>x</strong> and <em>y</em>"
 
 
-def test_bullets_and_heading_and_paragraphs():
-    assert r("- one\n- two") == "<ul><li>one</li><li>two</li></ul>"
-    assert r("## Update") == "<h4>Update</h4>"
-    assert r("a\nb\n\nc") == "<p>a<br>b</p><p>c</p>"
+def test_lists_and_paragraphs():
+    assert s("<ul><li>a</li><li>b</li></ul>") == "<ul><li>a</li><li>b</li></ul>"
+    assert s("<ol><li>one</li></ol>") == "<ol><li>one</li></ol>"
+    assert s("<div>line</div>") == "<p>line</p>"  # div canonicalised to p
+    assert s("a<br>b") == "a<br>b"
 
 
-def test_no_links_or_attrs_pass_through():
-    out = r('[click](http://x) <a href="x" onclick="y">hi</a>')
-    assert "<a" not in out      # no anchor tag produced or passed through (escaped to &lt;a)
-    assert "[click]" in out     # markdown links unsupported → left as literal text
+def test_font_size_whitelist():
+    assert s('<span style="font-size:18px">big</span>') == '<span style="font-size:18px">big</span>'
+    # off-whitelist size → span dropped, text kept
+    assert s('<span style="font-size:40px">huge</span>') == "huge"
+    # <font size> normalised to a safe span
+    assert s('<font size="5">x</font>') == '<span style="font-size:22px">x</span>'
+
+
+def test_strips_dangerous_and_arbitrary():
+    # script content discarded entirely
+    assert "alert" not in s("<script>alert(1)</script>hi") and s("<script>alert(1)</script>hi") == "hi"
+    # links dropped, text kept; no href/onclick survive
+    out = s('<a href="http://x" onclick="y()">click</a>')
+    assert out == "click"
+    # arbitrary styles/colors stripped, content kept
+    assert s('<span style="color:red;font-weight:bold">z</span>') == "z"
+    # escapes stray angle brackets in text
+    assert s("2 < 3 & 4 > 1") == "2 &lt; 3 &amp; 4 &gt; 1"
 
 
 def test_empty():
-    assert r("   ") == "" and r(None) == ""
+    assert s("") == "" and s("<p></p>") == "" and s("<br>") == ""
+
+
+def test_html_to_text():
+    assert t2("<p>hi <strong>there</strong></p>") == "hi there"
+    assert t2("<ul><li>a</li><li>b</li></ul>") == "- a\n- b"
+    assert t2('<span style="font-size:18px">big</span>') == "big"