Coverage for blogModel/sanitizer.py: 0%

13 statements  

« prev     ^ index     » next       coverage.py v7.5.0, created at 2025-09-13 15:29 -0300

1""" 

2Small sanitizer utility using bleach to clean user-provided HTML. 

3This centralizes allowed tags/attributes and can be tuned later. 

4""" 

5 

6import bleach 

7 

8# Keep this conservative: allow a small set of formatting tags and images/links 

9ALLOWED_TAGS = [ 

10 "a", 

11 "b", 

12 "blockquote", 

13 "br", 

14 "code", 

15 "em", 

16 "i", 

17 "li", 

18 "ol", 

19 "p", 

20 "strong", 

21 "ul", 

22 "img", 

23 "iframe", 

24 "h1", 

25 "h2", 

26 "h3", 

27 "h4", 

28 "h5", 

29 "h6", 

30] 

31 

32ALLOWED_ATTRIBUTES = { 

33 "a": ["href", "title", "rel", "target"], 

34 "img": ["src", "alt", "title", "width", "height"], 

35} 

36 

37ALLOWED_PROTOCOLS = ["http", "https", "mailto", "data"] 

38 

39# Optional: link rel="nofollow" for user-provided links 

40LINKIFY = True 

41 

42 

43def sanitize_html(value: str) -> str: 

44 """Return a cleaned HTML string safe for storage and rendering. 

45 

46 This removes dangerous tags and attributes while preserving basic 

47 formatting. Use this on user-submitted rich text before saving. 

48 """ 

49 if not value: 

50 return value 

51 

52 # First, strip control characters 

53 value = "".join(ch for ch in value if ord(ch) >= 32 or ch == "\n" or ch == "\t") 

54 

55 cleaned = bleach.clean( 

56 value, 

57 tags=ALLOWED_TAGS, 

58 attributes=ALLOWED_ATTRIBUTES, 

59 protocols=ALLOWED_PROTOCOLS, 

60 strip=True, 

61 ) 

62 

63 if LINKIFY: 

64 cleaned = bleach.linkify(cleaned) 

65 

66 return cleaned