Coverage for website/utils/youtube_validator.py: 15%
40 statements
« prev ^ index » next coverage.py v7.5.0, created at 2025-09-13 15:29 -0300
« prev ^ index » next coverage.py v7.5.0, created at 2025-09-13 15:29 -0300
1"""
2YouTube URL validation and sanitization utilities
3Provides secure handling of YouTube video URLs for embedding
4"""
6import re
9def extract_youtube_video_id(url):
10 """
11 Extract YouTube video ID from various YouTube URL formats
13 Args:
14 url (str): YouTube URL
16 Returns:
17 str: Video ID if valid, None otherwise
18 """
19 if not url or not isinstance(url, str):
20 return None
22 url = url.strip()
24 # Common YouTube URL patterns
25 patterns = [
26 r"(?:https?://)?(?:www\.)?youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})",
27 r"(?:https?://)?(?:www\.)?youtu\.be/([a-zA-Z0-9_-]{11})",
28 r"(?:https?://)?(?:www\.)?youtube\.com/embed/([a-zA-Z0-9_-]{11})",
29 r"(?:https?://)?(?:www\.)?youtube-nocookie\.com/embed/([a-zA-Z0-9_-]{11})",
30 ]
32 for pattern in patterns:
33 match = re.search(pattern, url)
34 if match:
35 video_id = match.group(1)
36 # Validate video ID format (exactly 11 characters, alphanumeric + _ -)
37 if re.match(r"^[a-zA-Z0-9_-]{11}$", video_id):
38 return video_id
40 return None
43def validate_youtube_url(url):
44 """
45 Validate if a URL is a valid YouTube URL
47 Args:
48 url (str): URL to validate
50 Returns:
51 bool: True if valid YouTube URL, False otherwise
52 """
53 video_id = extract_youtube_video_id(url)
54 return video_id is not None
57def create_secure_embed_url(video_id):
58 """
59 Create a secure YouTube embed URL using youtube-nocookie.com
61 Args:
62 video_id (str): YouTube video ID
64 Returns:
65 str: Secure embed URL
66 """
67 if not video_id or not re.match(r"^[a-zA-Z0-9_-]{11}$", video_id):
68 raise ValueError("Invalid YouTube video ID")
70 return f"https://www.youtube-nocookie.com/embed/{video_id}"
73def sanitize_youtube_content(content):
74 """
75 Sanitize HTML content to ensure only secure YouTube embeds are allowed
77 Args:
78 content (str): HTML content
80 Returns:
81 str: Sanitized content with secure YouTube embeds
82 """
83 # This is a basic implementation - in production, consider using
84 # a more robust HTML sanitization library like bleach
86 # Pattern to match YouTube iframe elements
87 youtube_pattern = (
88 r'<iframe[^>]*src=["\']https://(?:www\.)?youtube(?:-nocookie)?\.com/'
89 r'embed/([a-zA-Z0-9_-]{11})[^"\']*["\'][^>]*></iframe>'
90 )
92 def replace_youtube_iframe(match):
93 video_id = match.group(1)
94 if re.match(r"^[a-zA-Z0-9_-]{11}$", video_id):
95 # Create secure iframe
96 secure_url = create_secure_embed_url(video_id)
97 return (
98 f'<iframe src="{secure_url}" frameborder="0" '
99 f'allow="accelerometer; autoplay; clipboard-write; '
100 f'encrypted-media; gyroscope; picture-in-picture" '
101 f'allowfullscreen loading="lazy"></iframe>'
102 )
103 return "" # Remove invalid iframes
105 return re.sub(youtube_pattern, replace_youtube_iframe, content)
108# Example usage:
109if __name__ == "__main__":
110 # Test URLs
111 test_urls = [
112 "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
113 "https://youtu.be/dQw4w9WgXcQ",
114 "https://www.youtube.com/embed/dQw4w9WgXcQ",
115 "invalid-url",
116 "https://not-youtube.com/watch?v=dQw4w9WgXcQ",
117 ]
119 for test_url in test_urls:
120 test_video_id = extract_youtube_video_id(test_url)
121 is_valid = validate_youtube_url(test_url)
122 print(f"URL: {test_url}")
123 print(f"Video ID: {test_video_id}")
124 print(f"Valid: {is_valid}")
125 if test_video_id:
126 print(f"Secure Embed: {create_secure_embed_url(test_video_id)}")
127 print("-" * 50)