From ba92c0a04b9c5ce17b8990ce46b42eb79309df6d Mon Sep 17 00:00:00 2001 From: jay Date: Tue, 9 Jun 2026 09:22:41 -0400 Subject: [PATCH] Reply sanitizer: cap raw input, auto-close open tags (no severed HTML) Per Codex: slicing the SANITIZED html with [:8000] could cut through a tag or entity. Cap the RAW editor HTML (20k) before sanitizing instead, and have sanitize_reply_html auto-close any still-open allowed tags so malformed input can never leave a dangling/severed tag in message_html or the email body. Co-Authored-By: Claude Opus 4.8 (1M context) --- goodnews/api.py | 5 +++-- goodnews/markup.py | 5 +++++ tests/test_markup.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/goodnews/api.py b/goodnews/api.py index 31a6286..fdde4b1 100644 --- a/goodnews/api.py +++ b/goodnews/api.py @@ -843,8 +843,9 @@ def create_app() -> FastAPI: @app.post("/api/admin/feedback/{fid}/reply") def admin_feedback_reply(fid: int, body: FeedbackReplyBody, request: Request) -> dict: - # Sanitize the editor HTML to our allowlist; derive the plain-text fallback. - reply_html = sanitize_reply_html(body.html)[:8000] + # Cap the RAW editor HTML first (slicing sanitized output could sever a + # tag), then sanitize the whole thing. + reply_html = sanitize_reply_html((body.html or "")[:20000]) reply_text = reply_html_to_text(reply_html) if not reply_text: raise HTTPException(status_code=422, detail="Reply message is required.") diff --git a/goodnews/markup.py b/goodnews/markup.py index cbd8159..0392fcb 100644 --- a/goodnews/markup.py +++ b/goodnews/markup.py @@ -118,6 +118,11 @@ def sanitize_reply_html(raw: str) -> str: p = _Sanitizer() p.feed(raw) p.close() + # Close any still-open allowed tags (malformed input → never emit a dangling + # or severed tag into stored HTML / the email body). + for canon in reversed(p.open): + if canon: + p.out.append(f"") html = "".join(p.out) # If nothing but markup/whitespace survived, treat as empty. if not re.sub(r"<[^>]+>", "", html).strip(): diff --git a/tests/test_markup.py b/tests/test_markup.py index b7fe947..fd056c9 100644 --- a/tests/test_markup.py +++ b/tests/test_markup.py @@ -41,3 +41,13 @@ def test_html_to_text(): assert t2("

hi there

") == "hi there" assert t2("") == "- a\n- b" assert t2('big') == "big" + + +def test_autocloses_unclosed_tags(): + assert s("bold") == "bold" + assert s("

hi") == "

hi

" + assert s("
  • a
  • ") == "
    • a
    " # unclosed
      gets closed + assert s('big') == 'big' + # pathological input still never leaves a dangling open tag (balanced output) + out = s("x") + assert out == "x"