Files
upbeatBytes/tests/test_markup.py
T
thejayman77 ba92c0a04b Reply sanitizer: cap raw input, auto-close open tags (no severed HTML)
Per Codex: slicing the SANITIZED html with [:8000] could cut through a tag or
entity. Cap the RAW editor HTML (20k) before sanitizing instead, and have
sanitize_reply_html auto-close any still-open allowed tags so malformed input
can never leave a dangling/severed tag in message_html or the email body.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-09 09:22:41 -04:00

54 lines
2.3 KiB
Python

from goodnews.markup import sanitize_reply_html as s, reply_html_to_text as t2
def test_keeps_allowed_formatting_and_normalizes_tags():
out = s("<b>bold</b> and <i>it</i> and <strong>x</strong> and <em>y</em>")
assert out == "<strong>bold</strong> and <em>it</em> and <strong>x</strong> and <em>y</em>"
def test_lists_and_paragraphs():
assert s("<ul><li>a</li><li>b</li></ul>") == "<ul><li>a</li><li>b</li></ul>"
assert s("<ol><li>one</li></ol>") == "<ol><li>one</li></ol>"
assert s("<div>line</div>") == "<p>line</p>" # div canonicalised to p
assert s("a<br>b") == "a<br>b"
def test_font_size_whitelist():
assert s('<span style="font-size:18px">big</span>') == '<span style="font-size:18px">big</span>'
# off-whitelist size → span dropped, text kept
assert s('<span style="font-size:40px">huge</span>') == "huge"
# <font size> normalised to a safe span
assert s('<font size="5">x</font>') == '<span style="font-size:22px">x</span>'
def test_strips_dangerous_and_arbitrary():
# script content discarded entirely
assert "alert" not in s("<script>alert(1)</script>hi") and s("<script>alert(1)</script>hi") == "hi"
# links dropped, text kept; no href/onclick survive
out = s('<a href="http://x" onclick="y()">click</a>')
assert out == "click"
# arbitrary styles/colors stripped, content kept
assert s('<span style="color:red;font-weight:bold">z</span>') == "z"
# escapes stray angle brackets in text
assert s("2 < 3 & 4 > 1") == "2 &lt; 3 &amp; 4 &gt; 1"
def test_empty():
assert s("") == "" and s("<p></p>") == "" and s("<br>") == ""
def test_html_to_text():
assert t2("<p>hi <strong>there</strong></p>") == "hi there"
assert t2("<ul><li>a</li><li>b</li></ul>") == "- a\n- b"
assert t2('<span style="font-size:18px">big</span>') == "big"
def test_autocloses_unclosed_tags():
assert s("<strong>bold") == "<strong>bold</strong>"
assert s("<p>hi") == "<p>hi</p>"
assert s("<ul><li>a</li>") == "<ul><li>a</li></ul>" # unclosed <ul> gets closed
assert s('<span style="font-size:18px">big') == '<span style="font-size:18px">big</span>'
# pathological input still never leaves a dangling open tag (balanced output)
out = s("<strong><em>x")
assert out == "<strong><em>x</em></strong>"