Coverage for blogModel/sanitizer.py: 0%
13 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-09-13 15:29 -0300
« prev ^ index » next coverage.py v7.5.0, created at 2025-09-13 15:29 -0300
1"""
2Small sanitizer utility using bleach to clean user-provided HTML.
3This centralizes allowed tags/attributes and can be tuned later.
4"""
6import bleach
8# Keep this conservative: allow a small set of formatting tags and images/links
9ALLOWED_TAGS = [
10 "a",
11 "b",
12 "blockquote",
13 "br",
14 "code",
15 "em",
16 "i",
17 "li",
18 "ol",
19 "p",
20 "strong",
21 "ul",
22 "img",
23 "iframe",
24 "h1",
25 "h2",
26 "h3",
27 "h4",
28 "h5",
29 "h6",
30]
32ALLOWED_ATTRIBUTES = {
33 "a": ["href", "title", "rel", "target"],
34 "img": ["src", "alt", "title", "width", "height"],
35}
37ALLOWED_PROTOCOLS = ["http", "https", "mailto", "data"]
39# Optional: link rel="nofollow" for user-provided links
40LINKIFY = True
43def sanitize_html(value: str) -> str:
44 """Return a cleaned HTML string safe for storage and rendering.
46 This removes dangerous tags and attributes while preserving basic
47 formatting. Use this on user-submitted rich text before saving.
48 """
49 if not value:
50 return value
52 # First, strip control characters
53 value = "".join(ch for ch in value if ord(ch) >= 32 or ch == "\n" or ch == "\t")
55 cleaned = bleach.clean(
56 value,
57 tags=ALLOWED_TAGS,
58 attributes=ALLOWED_ATTRIBUTES,
59 protocols=ALLOWED_PROTOCOLS,
60 strip=True,
61 )
63 if LINKIFY:
64 cleaned = bleach.linkify(cleaned)
66 return cleaned