Reply sanitizer: cap raw input, auto-close open tags (no severed HTML)
Per Codex: slicing the SANITIZED html with [:8000] could cut through a tag or entity. Cap the RAW editor HTML (20k) before sanitizing instead, and have sanitize_reply_html auto-close any still-open allowed tags so malformed input can never leave a dangling/severed tag in message_html or the email body. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+3
-2
@@ -843,8 +843,9 @@ def create_app() -> FastAPI:
|
||||
|
||||
@app.post("/api/admin/feedback/{fid}/reply")
|
||||
def admin_feedback_reply(fid: int, body: FeedbackReplyBody, request: Request) -> dict:
|
||||
# Sanitize the editor HTML to our allowlist; derive the plain-text fallback.
|
||||
reply_html = sanitize_reply_html(body.html)[:8000]
|
||||
# Cap the RAW editor HTML first (slicing sanitized output could sever a
|
||||
# tag), then sanitize the whole thing.
|
||||
reply_html = sanitize_reply_html((body.html or "")[:20000])
|
||||
reply_text = reply_html_to_text(reply_html)
|
||||
if not reply_text:
|
||||
raise HTTPException(status_code=422, detail="Reply message is required.")
|
||||
|
||||
@@ -118,6 +118,11 @@ def sanitize_reply_html(raw: str) -> str:
|
||||
p = _Sanitizer()
|
||||
p.feed(raw)
|
||||
p.close()
|
||||
# Close any still-open allowed tags (malformed input → never emit a dangling
|
||||
# or severed tag into stored HTML / the email body).
|
||||
for canon in reversed(p.open):
|
||||
if canon:
|
||||
p.out.append(f"</{canon}>")
|
||||
html = "".join(p.out)
|
||||
# If nothing but markup/whitespace survived, treat as empty.
|
||||
if not re.sub(r"<[^>]+>", "", html).strip():
|
||||
|
||||
@@ -41,3 +41,13 @@ def test_html_to_text():
|
||||
assert t2("<p>hi <strong>there</strong></p>") == "hi there"
|
||||
assert t2("<ul><li>a</li><li>b</li></ul>") == "- a\n- b"
|
||||
assert t2('<span style="font-size:18px">big</span>') == "big"
|
||||
|
||||
|
||||
def test_autocloses_unclosed_tags():
|
||||
assert s("<strong>bold") == "<strong>bold</strong>"
|
||||
assert s("<p>hi") == "<p>hi</p>"
|
||||
assert s("<ul><li>a</li>") == "<ul><li>a</li></ul>" # unclosed <ul> gets closed
|
||||
assert s('<span style="font-size:18px">big') == '<span style="font-size:18px">big</span>'
|
||||
# pathological input still never leaves a dangling open tag (balanced output)
|
||||
out = s("<strong><em>x")
|
||||
assert out == "<strong><em>x</em></strong>"
|
||||
|
||||
Reference in New Issue
Block a user