Reply sanitizer: cap raw input, auto-close open tags (no severed HTML)
Per Codex: slicing the SANITIZED html with [:8000] could cut through a tag or entity. Cap the RAW editor HTML (20k) before sanitizing instead, and have sanitize_reply_html auto-close any still-open allowed tags so malformed input can never leave a dangling/severed tag in message_html or the email body. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+3
-2
@@ -843,8 +843,9 @@ def create_app() -> FastAPI:
|
|||||||
|
|
||||||
@app.post("/api/admin/feedback/{fid}/reply")
|
@app.post("/api/admin/feedback/{fid}/reply")
|
||||||
def admin_feedback_reply(fid: int, body: FeedbackReplyBody, request: Request) -> dict:
|
def admin_feedback_reply(fid: int, body: FeedbackReplyBody, request: Request) -> dict:
|
||||||
# Sanitize the editor HTML to our allowlist; derive the plain-text fallback.
|
# Cap the RAW editor HTML first (slicing sanitized output could sever a
|
||||||
reply_html = sanitize_reply_html(body.html)[:8000]
|
# tag), then sanitize the whole thing.
|
||||||
|
reply_html = sanitize_reply_html((body.html or "")[:20000])
|
||||||
reply_text = reply_html_to_text(reply_html)
|
reply_text = reply_html_to_text(reply_html)
|
||||||
if not reply_text:
|
if not reply_text:
|
||||||
raise HTTPException(status_code=422, detail="Reply message is required.")
|
raise HTTPException(status_code=422, detail="Reply message is required.")
|
||||||
|
|||||||
@@ -118,6 +118,11 @@ def sanitize_reply_html(raw: str) -> str:
|
|||||||
p = _Sanitizer()
|
p = _Sanitizer()
|
||||||
p.feed(raw)
|
p.feed(raw)
|
||||||
p.close()
|
p.close()
|
||||||
|
# Close any still-open allowed tags (malformed input → never emit a dangling
|
||||||
|
# or severed tag into stored HTML / the email body).
|
||||||
|
for canon in reversed(p.open):
|
||||||
|
if canon:
|
||||||
|
p.out.append(f"</{canon}>")
|
||||||
html = "".join(p.out)
|
html = "".join(p.out)
|
||||||
# If nothing but markup/whitespace survived, treat as empty.
|
# If nothing but markup/whitespace survived, treat as empty.
|
||||||
if not re.sub(r"<[^>]+>", "", html).strip():
|
if not re.sub(r"<[^>]+>", "", html).strip():
|
||||||
|
|||||||
@@ -41,3 +41,13 @@ def test_html_to_text():
|
|||||||
assert t2("<p>hi <strong>there</strong></p>") == "hi there"
|
assert t2("<p>hi <strong>there</strong></p>") == "hi there"
|
||||||
assert t2("<ul><li>a</li><li>b</li></ul>") == "- a\n- b"
|
assert t2("<ul><li>a</li><li>b</li></ul>") == "- a\n- b"
|
||||||
assert t2('<span style="font-size:18px">big</span>') == "big"
|
assert t2('<span style="font-size:18px">big</span>') == "big"
|
||||||
|
|
||||||
|
|
||||||
|
def test_autocloses_unclosed_tags():
|
||||||
|
assert s("<strong>bold") == "<strong>bold</strong>"
|
||||||
|
assert s("<p>hi") == "<p>hi</p>"
|
||||||
|
assert s("<ul><li>a</li>") == "<ul><li>a</li></ul>" # unclosed <ul> gets closed
|
||||||
|
assert s('<span style="font-size:18px">big') == '<span style="font-size:18px">big</span>'
|
||||||
|
# pathological input still never leaves a dangling open tag (balanced output)
|
||||||
|
out = s("<strong><em>x")
|
||||||
|
assert out == "<strong><em>x</em></strong>"
|
||||||
|
|||||||
Reference in New Issue
Block a user