[ie/cloudflarestream] Fix `_VALID_URL` and embed extraction (#10215)
[yt-dlp3.git] / yt_dlp / extractor / cloudflarestream.py
blob8a409461a8bc74419e20e18607612ec6aa058a36
1 import base64
3 from .common import InfoExtractor
6 class CloudflareStreamIE(InfoExtractor):
7 _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
8 _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
9 _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
10 _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
11 _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
12 _EMBED_REGEX = [
13 rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
14 rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
16 _TESTS = [{
17 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
18 'info_dict': {
19 'id': '31c9291ab41fac05471db4e73aa11717',
20 'ext': 'mp4',
21 'title': '31c9291ab41fac05471db4e73aa11717',
22 'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
24 'params': {
25 'skip_download': 'm3u8',
27 }, {
28 'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
29 'info_dict': {
30 'id': '0e8e040aec776862e1d632a699edf59e',
31 'ext': 'mp4',
32 'title': '0e8e040aec776862e1d632a699edf59e',
33 'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
35 }, {
36 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
37 'only_matching': True,
38 }, {
39 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
40 'only_matching': True,
41 }, {
42 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
43 'only_matching': True,
44 }, {
45 'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
46 'only_matching': True,
47 }, {
48 'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
49 'only_matching': True,
51 _WEBPAGE_TESTS = [{
52 'url': 'https://upride.cc/incident/shoulder-pass-at-light/',
53 'info_dict': {
54 'id': 'eaef9dea5159cf968be84241b5cedfe7',
55 'ext': 'mp4',
56 'title': 'eaef9dea5159cf968be84241b5cedfe7',
57 'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
59 'params': {
60 'skip_download': 'm3u8',
64 def _real_extract(self, url):
65 video_id = self._match_id(url)
66 domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
67 base_url = f'https://{domain}/{video_id}/'
68 if '.' in video_id:
69 video_id = self._parse_json(base64.urlsafe_b64decode(
70 video_id.split('.')[1] + '==='), video_id)['sub']
71 manifest_base_url = base_url + 'manifest/video.'
73 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
74 manifest_base_url + 'm3u8', video_id, 'mp4',
75 'm3u8_native', m3u8_id='hls', fatal=False)
76 fmts, subs = self._extract_mpd_formats_and_subtitles(
77 manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False)
78 formats.extend(fmts)
79 self._merge_subtitles(subs, target=subtitles)
81 return {
82 'id': video_id,
83 'title': video_id,
84 'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
85 'formats': formats,
86 'subtitles': subtitles,