4 from .common
import InfoExtractor
5 from ..networking
.exceptions
import HTTPError
17 from ..utils
.traversal
import traverse_obj
20 class SproutVideoIE(InfoExtractor
):
21 _NO_SCHEME_RE
= r
'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
22 _VALID_URL
= rf
'https?:{_NO_SCHEME_RE}'
23 _EMBED_REGEX
= [rf
'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
25 'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
26 'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
28 'id': '4c9dddb01910e3c9c4',
30 'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
32 'thumbnail': r
're:https?://images\.sproutvideo\.com/.+\.jpg',
35 'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
36 'md5': 'cebae5cf558cca83271917cf4ec03f26',
38 'id': 'a79fdcb21f1be2c62e',
40 'title': 'HS_01_Live Stream 2023-01-14 10:00',
42 'thumbnail': r
're:https?://images\.sproutvideo\.com/.+\.jpg',
45 # http formats 'sd' and 'hd' are available
46 'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
47 'md5': 'f368c78df07e78a749508b221528672c',
49 'id': '119cd6bc1a18e6cd98',
51 'title': '3. Updating your Partner details',
52 'thumbnail': r
're:https?://images\.sproutvideo\.com/.+\.jpg',
55 'params': {'format': 'hd'},
58 'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
59 'md5': '7f6798f037d7a3e3e07e67959de68fc6',
61 'id': '119dd8ba121ee0cc98',
63 'title': 'Recipients Setup - Domestic Wire Only',
64 'thumbnail': r
're:https?://images\.sproutvideo\.com/.+\.jpg',
66 'subtitles': {'en': 'count:1'},
70 'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
72 'id': '4c9dddb01910e3c9c4',
74 'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
76 'thumbnail': r
're:https?://images\.sproutvideo\.com/.+\.jpg',
79 _M3U8_URL_TMPL
= 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
80 _QUALITIES
= ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it
83 def _policy_to_qs(policy
, signature_key
, as_string
=False):
85 for key
, value
in policy
['signatures'][signature_key
].items():
86 query
[remove_start(key
, 'CloudFront-')] = value
87 query
['sessionID'] = policy
['sessionID']
88 return urllib
.parse
.urlencode(query
, doseq
=True) if as_string
else query
91 def _extract_embed_urls(cls
, url
, webpage
):
92 for embed_url
in super()._extract
_embed
_urls
(url
, webpage
):
93 if embed_url
.startswith('//'):
94 embed_url
= f
'https:{embed_url}'
95 yield smuggle_url(embed_url
, {'referer': url
})
97 def _real_extract(self
, url
):
98 url
, smuggled_data
= unsmuggle_url(url
, {})
99 video_id
= self
._match
_id
(url
)
100 webpage
= self
._download
_webpage
(
101 url
, video_id
, headers
=traverse_obj(smuggled_data
, {'Referer': 'referer'}))
102 data
= self
._search
_json
(
103 r
'var\s+dat\s*=\s*["\']', webpage, 'data
', video_id, contains_pattern=r'[A
-Za
-z0
-9+/=]+',
104 end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
106 formats, subtitles = [], {}
109 'Origin': 'https://videos.sproutvideo.com',
113 # HLS extraction is fatal; only attempt it if the JSON data says it's available
114 if traverse_obj(data, 'hls'):
115 manifest_query = self._policy_to_qs(data, 'm')
116 fragment_query = self._policy_to_qs(data, 't', as_string=True)
117 key_query = self._policy_to_qs(data, 'k', as_string=True)
119 formats.extend(self._extract_m3u8_formats(
120 self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
121 m3u8_id='hls', headers=headers, query=manifest_query))
124 'url': update_url_query(fmt['url'], manifest_query),
125 'extra_param_to_segment_url': fragment_query,
126 'extra_param_to_key_url': key_query,
129 if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
130 quality = qualities(self._QUALITIES)
131 acodec = 'none' if data.get('has_audio') is False else None
133 'format_id': str(format_id),
136 'quality': quality(format_id),
138 } for format_id, format_url in downloads])
140 for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
141 subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
142 'url': sub_data['src'],
148 'subtitles': subtitles,
149 'http_headers': headers,
150 **traverse_obj(data, {
151 'title': ('title', {str}),
152 'duration': ('duration', {int_or_none}),
153 'thumbnail': ('posterframe_url', {url_or_none}),
158 class VidsIoIE(InfoExtractor):
160 _VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
162 'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
163 'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
165 'id': '799cd8b11c10efc1f0',
167 'title': 'How to Video: Live Streaming',
169 'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
173 def _real_extract(self, url):
174 video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
175 webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
177 if urlh.status == 403:
178 password = self.get_param('videopassword')
180 raise ExtractorError(
181 'This video is password-protected; use the --video-password option', expected=True)
183 webpage = self._download_webpage(
184 url, display_id, 'Submitting video password',
185 data=urlencode_postdata({
186 'password': password,
187 **self._hidden_inputs(webpage),
189 # Requests with user's session cookie `_sproutvideo_session` are now authorized
190 except ExtractorError as e:
191 if isinstance(e.cause, HTTPError) and e.cause.status == 403:
192 raise ExtractorError('Incorrect password', expected=True)
195 if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
196 return self.url_result(embed_url, SproutVideoIE, video_id)
198 raise ExtractorError('Unable to extract any SproutVideo embed url')