[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / vidio.py
blob955a116472ed8ac72cffd22cf3d79b09ece98eba
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 clean_html,
5 format_field,
6 get_element_by_class,
7 int_or_none,
8 parse_iso8601,
9 smuggle_url,
10 str_or_none,
11 strip_or_none,
12 try_get,
13 unsmuggle_url,
14 urlencode_postdata,
18 class VidioBaseIE(InfoExtractor):
19 _LOGIN_URL = 'https://www.vidio.com/users/login'
20 _NETRC_MACHINE = 'vidio'
22 def _perform_login(self, username, password):
23 def is_logged_in():
24 res = self._download_json(
25 'https://www.vidio.com/interactions.json', None, 'Checking if logged in', fatal=False) or {}
26 return bool(res.get('current_user'))
28 if is_logged_in():
29 return
31 login_page = self._download_webpage(
32 self._LOGIN_URL, None, 'Downloading log in page')
34 login_form = self._form_hidden_inputs('login-form', login_page)
35 login_form.update({
36 'user[login]': username,
37 'user[password]': password,
39 login_post, login_post_urlh = self._download_webpage_handle(
40 self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), expected_status=[302, 401])
42 if login_post_urlh.status == 401:
43 if get_element_by_class('onboarding-content-register-popup__title', login_post):
44 raise ExtractorError(
45 'Unable to log in: The provided email has not registered yet.', expected=True)
47 reason = get_element_by_class('onboarding-form__general-error', login_post) or get_element_by_class('onboarding-modal__title', login_post)
48 if 'Akun terhubung ke' in reason:
49 raise ExtractorError(
50 'Unable to log in: Your account is linked to a social media account. '
51 'Use --cookies to provide account credentials instead', expected=True)
52 elif reason:
53 subreason = get_element_by_class('onboarding-modal__description-text', login_post) or ''
54 raise ExtractorError(
55 f'Unable to log in: {reason}. {clean_html(subreason)}', expected=True)
56 raise ExtractorError('Unable to log in')
58 def _initialize_pre_login(self):
59 self._api_key = self._download_json(
60 'https://www.vidio.com/auth', None, data=b'')['api_key']
62 def _call_api(self, url, video_id, note=None):
63 return self._download_json(url, video_id, note=note, headers={
64 'Content-Type': 'application/vnd.api+json',
65 'X-API-KEY': self._api_key,
69 class VidioIE(VidioBaseIE):
70 _VALID_URL = r'https?://(?:www\.)?vidio\.com/(watch|embed)/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
71 _TESTS = [{
72 'url': 'http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015',
73 'md5': 'abac81b1a205a8d94c609a473b5ea62a',
74 'info_dict': {
75 'id': '165683',
76 'display_id': 'dj_ambred-booyah-live-2015',
77 'ext': 'mp4',
78 'title': 'DJ_AMBRED - Booyah (Live 2015)',
79 'description': 'md5:27dc15f819b6a78a626490881adbadf8',
80 'thumbnail': r're:^https?://.*\.jpg$',
81 'duration': 149,
82 'like_count': int,
83 'uploader': 'TWELVE Pic',
84 'timestamp': 1444902800,
85 'upload_date': '20151015',
86 'uploader_id': 'twelvepictures',
87 'channel': 'Cover Music Video',
88 'channel_id': '280236',
89 'view_count': int,
90 'dislike_count': int,
91 'comment_count': int,
92 'tags': 'count:3',
93 'uploader_url': 'https://www.vidio.com/@twelvepictures',
95 }, {
96 'url': 'https://www.vidio.com/watch/77949-south-korea-test-fires-missile-that-can-strike-all-of-the-north',
97 'only_matching': True,
98 }, {
99 # Premier-exclusive video
100 'url': 'https://www.vidio.com/watch/1550718-stand-by-me-doraemon',
101 'only_matching': True,
102 }, {
103 # embed url from https://enamplus.liputan6.com/read/5033648/video-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah
104 'url': 'https://www.vidio.com/embed/7115874-fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
105 'info_dict': {
106 'id': '7115874',
107 'ext': 'mp4',
108 'channel_id': '40172876',
109 'comment_count': int,
110 'uploader_id': 'liputan6',
111 'view_count': int,
112 'dislike_count': int,
113 'upload_date': '20220804',
114 'uploader': 'Liputan6.com',
115 'display_id': 'fakta-temuan-suspek-cacar-monyet-di-jawa-tengah',
116 'channel': 'ENAM PLUS 165',
117 'timestamp': 1659605520,
118 'title': 'Fakta Temuan Suspek Cacar Monyet di Jawa Tengah',
119 'duration': 59,
120 'like_count': int,
121 'tags': ['monkeypox indonesia', 'cacar monyet menyebar', 'suspek cacar monyet di indonesia', 'fakta', 'hoax atau bukan?', 'jawa tengah'],
122 'thumbnail': 'https://thumbor.prod.vidiocdn.com/83PN-_BKm5sS7emLtRxl506MLqQ=/640x360/filters:quality(70)/vidio-web-prod-video/uploads/video/image/7115874/fakta-suspek-cacar-monyet-di-jawa-tengah-24555a.jpg',
123 'uploader_url': 'https://www.vidio.com/@liputan6',
124 'description': 'md5:6d595a18d3b19ee378e335a6f288d5ac',
128 def _real_extract(self, url):
129 match = self._match_valid_url(url).groupdict()
130 video_id, display_id = match.get('id'), match.get('display_id')
131 data = self._call_api('https://api.vidio.com/videos/' + video_id, display_id)
132 video = data['videos'][0]
133 title = video['title'].strip()
134 is_premium = video.get('is_premium')
136 if is_premium:
137 sources = self._download_json(
138 f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=videos',
139 display_id, note='Downloading premier API JSON')
140 if not (sources.get('source') or sources.get('source_dash')):
141 self.raise_login_required('This video is only available for registered users with the appropriate subscription')
143 formats, subs = [], {}
144 if sources.get('source'):
145 hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles(
146 sources['source'], display_id, 'mp4', 'm3u8_native')
147 formats.extend(hls_formats)
148 subs.update(hls_subs)
149 if sources.get('source_dash'): # TODO: Find video example with source_dash
150 dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
151 sources['source_dash'], display_id, 'dash')
152 formats.extend(dash_formats)
153 subs.update(dash_subs)
154 else:
155 hls_url = data['clips'][0]['hls_url']
156 formats, subs = self._extract_m3u8_formats_and_subtitles(
157 hls_url, display_id, 'mp4', 'm3u8_native')
159 get_first = lambda x: try_get(data, lambda y: y[x + 's'][0], dict) or {}
160 channel = get_first('channel')
161 user = get_first('user')
162 username = user.get('username')
163 get_count = lambda x: int_or_none(video.get('total_' + x))
165 return {
166 'id': video_id,
167 'display_id': display_id,
168 'title': title,
169 'description': strip_or_none(video.get('description')),
170 'thumbnail': video.get('image_url_medium'),
171 'duration': int_or_none(video.get('duration')),
172 'like_count': get_count('likes'),
173 'formats': formats,
174 'subtitles': subs,
175 'uploader': user.get('name'),
176 'timestamp': parse_iso8601(video.get('created_at')),
177 'uploader_id': username,
178 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),
179 'channel': channel.get('name'),
180 'channel_id': str_or_none(channel.get('id')),
181 'view_count': get_count('view_count'),
182 'dislike_count': get_count('dislikes'),
183 'comment_count': get_count('comments'),
184 'tags': video.get('tag_list'),
188 class VidioPremierIE(VidioBaseIE):
189 _VALID_URL = r'https?://(?:www\.)?vidio\.com/premier/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
190 _TESTS = [{
191 'url': 'https://www.vidio.com/premier/2885/badai-pasti-berlalu',
192 'playlist_mincount': 14,
193 }, {
194 # Series with both free and premier-exclusive videos
195 'url': 'https://www.vidio.com/premier/2567/sosmed',
196 'only_matching': True,
199 def _playlist_entries(self, playlist_url, display_id):
200 index = 1
201 while playlist_url:
202 playlist_json = self._call_api(playlist_url, display_id, f'Downloading API JSON page {index}')
203 for video_json in playlist_json.get('data', []):
204 link = video_json['links']['watchpage']
205 yield self.url_result(link, 'Vidio', video_json['id'])
206 playlist_url = try_get(playlist_json, lambda x: x['links']['next'])
207 index += 1
209 def _real_extract(self, url):
210 url, idata = unsmuggle_url(url, {})
211 playlist_id, display_id = self._match_valid_url(url).groups()
213 playlist_url = idata.get('url')
214 if playlist_url: # Smuggled data contains an API URL. Download only that playlist
215 playlist_id = idata['id']
216 return self.playlist_result(
217 self._playlist_entries(playlist_url, playlist_id),
218 playlist_id=playlist_id, playlist_title=idata.get('title'))
220 playlist_data = self._call_api(f'https://api.vidio.com/content_profiles/{playlist_id}/playlists', display_id)
222 return self.playlist_from_matches(
223 playlist_data.get('data', []), playlist_id=playlist_id, ie=self.ie_key(),
224 getter=lambda data: smuggle_url(url, {
225 'url': data['relationships']['videos']['links']['related'],
226 'id': data['id'],
227 'title': try_get(data, lambda x: x['attributes']['name']),
231 class VidioLiveIE(VidioBaseIE):
232 _VALID_URL = r'https?://(?:www\.)?vidio\.com/live/(?P<id>\d+)-(?P<display_id>[^/?#&]+)'
233 _TESTS = [{
234 'url': 'https://www.vidio.com/live/204-sctv',
235 'info_dict': {
236 'id': '204',
237 'title': 'SCTV',
238 'uploader': 'SCTV',
239 'uploader_id': 'sctv',
240 'thumbnail': r're:^https?://.*\.jpg$',
242 }, {
243 # Premier-exclusive livestream
244 'url': 'https://www.vidio.com/live/6362-tvn',
245 'only_matching': True,
246 }, {
247 # DRM premier-exclusive livestream
248 'url': 'https://www.vidio.com/live/6299-bein-1',
249 'only_matching': True,
252 def _real_extract(self, url):
253 video_id, display_id = self._match_valid_url(url).groups()
254 stream_data = self._call_api(
255 f'https://www.vidio.com/api/livestreamings/{video_id}/detail', display_id)
256 stream_meta = stream_data['livestreamings'][0]
257 user = stream_data.get('users', [{}])[0]
259 title = stream_meta.get('title')
260 username = user.get('username')
262 formats = []
263 if stream_meta.get('is_drm'):
264 if not self.get_param('allow_unplayable_formats'):
265 self.report_drm(video_id)
266 if stream_meta.get('is_premium'):
267 sources = self._download_json(
268 f'https://www.vidio.com/interactions_stream.json?video_id={video_id}&type=livestreamings',
269 display_id, note='Downloading premier API JSON')
270 if not (sources.get('source') or sources.get('source_dash')):
271 self.raise_login_required('This video is only available for registered users with the appropriate subscription')
273 if str_or_none(sources.get('source')):
274 token_json = self._download_json(
275 f'https://www.vidio.com/live/{video_id}/tokens',
276 display_id, note='Downloading HLS token JSON', data=b'')
277 formats.extend(self._extract_m3u8_formats(
278 sources['source'] + '?' + token_json.get('token', ''), display_id, 'mp4', 'm3u8_native'))
279 if str_or_none(sources.get('source_dash')):
280 pass
281 else:
282 if stream_meta.get('stream_token_url'):
283 token_json = self._download_json(
284 f'https://www.vidio.com/live/{video_id}/tokens',
285 display_id, note='Downloading HLS token JSON', data=b'')
286 formats.extend(self._extract_m3u8_formats(
287 stream_meta['stream_token_url'] + '?' + token_json.get('token', ''),
288 display_id, 'mp4', 'm3u8_native'))
289 if stream_meta.get('stream_dash_url'):
290 pass
291 if stream_meta.get('stream_url'):
292 formats.extend(self._extract_m3u8_formats(
293 stream_meta['stream_url'], display_id, 'mp4', 'm3u8_native'))
295 return {
296 'id': video_id,
297 'display_id': display_id,
298 'title': title,
299 'is_live': True,
300 'description': strip_or_none(stream_meta.get('description')),
301 'thumbnail': stream_meta.get('image'),
302 'like_count': int_or_none(stream_meta.get('like')),
303 'dislike_count': int_or_none(stream_meta.get('dislike')),
304 'formats': formats,
305 'uploader': user.get('name'),
306 'timestamp': parse_iso8601(stream_meta.get('start_time')),
307 'uploader_id': username,
308 'uploader_url': format_field(username, None, 'https://www.vidio.com/@%s'),