5 from .gigya
import GigyaBaseIE
6 from ..networking
.exceptions
import HTTPError
13 get_element_html_by_class
,
29 class VRTBaseIE(GigyaBaseIE
):
32 'platform': 'desktop',
37 'device': 'undefined (undefined)',
43 'name': 'VRT web player',
44 'version': '2.7.4-prod-2023-04-19T06:05:45',
47 # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
48 _JWT_KEY_ID
= '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
49 _JWT_SIGNING_KEY
= 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev
50 # player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
51 # player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
53 def _extract_formats_and_subtitles(self
, data
, video_id
):
54 if traverse_obj(data
, 'drm'):
55 self
.report_drm(video_id
)
57 formats
, subtitles
= [], {}
58 for target
in traverse_obj(data
, ('targetUrls', lambda _
, v
: url_or_none(v
['url']) and v
['type'])):
59 format_type
= target
['type'].upper()
60 format_url
= target
['url']
61 if format_type
in ('HLS', 'HLS_AES'):
62 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
63 format_url
, video_id
, 'mp4', m3u8_id
=format_type
, fatal
=False)
65 self
._merge
_subtitles
(subs
, target
=subtitles
)
66 elif format_type
== 'HDS':
67 formats
.extend(self
._extract
_f
4m
_formats
(
68 format_url
, video_id
, f4m_id
=format_type
, fatal
=False))
69 elif format_type
== 'MPEG_DASH':
70 fmts
, subs
= self
._extract
_mpd
_formats
_and
_subtitles
(
71 format_url
, video_id
, mpd_id
=format_type
, fatal
=False)
73 self
._merge
_subtitles
(subs
, target
=subtitles
)
74 elif format_type
== 'HSS':
75 fmts
, subs
= self
._extract
_ism
_formats
_and
_subtitles
(
76 format_url
, video_id
, ism_id
='mss', fatal
=False)
78 self
._merge
_subtitles
(subs
, target
=subtitles
)
81 'format_id': format_type
,
85 for sub
in traverse_obj(data
, ('subtitleUrls', lambda _
, v
: v
['url'] and v
['type'] == 'CLOSED')):
86 subtitles
.setdefault('nl', []).append({'url': sub
['url']})
88 return formats
, subtitles
90 def _call_api(self
, video_id
, client
='null', id_token
=None, version
='v2'):
91 player_info
= {'exp': (round(time
.time(), 3) + 900), **self
._PLAYER
_INFO
}
92 player_token
= self
._download
_json
(
93 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
94 video_id
, 'Downloading player token', headers
={
95 **self
.geo_verification_headers(),
96 'Content-Type': 'application/json',
98 'identityToken': id_token
or {},
99 'playerInfo': jwt_encode_hs256(player_info
, self
._JWT
_SIGNING
_KEY
, headers
={
100 'kid': self
._JWT
_KEY
_ID
,
102 }, separators
=(',', ':')).encode())['vrtPlayerToken']
104 return self
._download
_json
(
105 f
'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}',
106 video_id
, 'Downloading API JSON', query
={
107 'vrtPlayerToken': player_token
,
109 }, expected_status
=400)
112 class VRTIE(VRTBaseIE
):
113 IE_DESC
= 'VRT NWS, Flanders News, Flandern Info and Sporza'
114 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
116 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
118 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
120 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
121 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff',
123 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg',
125 'params': {'skip_download': 'm3u8'},
127 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
129 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
131 'title': 'De Belgian Cats zijn klaar voor het EK',
132 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal',
134 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg',
136 'params': {'skip_download': 'm3u8'},
139 'vrt.be/vrtnws': 'vrtnieuws',
140 'sporza.be': 'sporza',
143 def _real_extract(self
, url
):
144 site
, display_id
= self
._match
_valid
_url
(url
).groups()
145 webpage
= self
._download
_webpage
(url
, display_id
)
146 attrs
= extract_attributes(get_element_html_by_class('vrtvideo', webpage
) or '')
148 asset_id
= attrs
.get('data-video-id') or attrs
['data-videoid']
149 publication_id
= traverse_obj(attrs
, 'data-publication-id', 'data-publicationid')
151 asset_id
= f
'{publication_id}${asset_id}'
152 client
= traverse_obj(attrs
, 'data-client-code', 'data-client') or self
._CLIENT
_MAP
[site
]
154 data
= self
._call
_api
(asset_id
, client
)
155 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(data
, asset_id
)
157 description
= self
._html
_search
_meta
(
158 ['og:description', 'twitter:description', 'description'], webpage
)
159 if description
== '…':
165 'subtitles': subtitles
,
166 'description': description
,
167 'thumbnail': url_or_none(attrs
.get('data-posterimage')),
168 'duration': float_or_none(attrs
.get('data-duration'), 1000),
169 '_old_archive_ids': [make_archive_id('Canvas', asset_id
)],
170 **traverse_obj(data
, {
171 'title': ('title', {str}
),
172 'description': ('shortDescription', {str}
),
173 'duration': ('duration', {float_or_none(scale
=1000)}),
174 'thumbnail': ('posterImageUrl', {url_or_none}
),
179 class VrtNUIE(VRTBaseIE
):
181 _VALID_URL
= r
'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
183 # CONTENT_IS_AGE_RESTRICTED
184 'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/',
186 'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f',
189 'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.',
190 'timestamp': 1673905125,
191 'release_timestamp': 1673905125,
192 'series': 'De ideale wereld',
193 'season_id': '1672830988794',
194 'episode': 'Aflevering 1',
196 'episode_id': '1672830988861',
197 'display_id': 'de-ideale-wereld-d20230116',
200 'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg',
201 'release_date': '20230116',
202 'upload_date': '20230116',
206 'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/',
208 'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee',
210 'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'',
211 'description': 'md5:197424726c61384b4e5c519f16c0cf02',
212 'timestamp': 1652940000,
213 'release_timestamp': 1652940000,
214 'series': 'Buurman, wat doet u nu?',
215 'season': 'Seizoen 6',
217 'season_id': '1652344200907',
218 'episode': 'Aflevering 0',
220 'episode_id': '1652951873524',
221 'display_id': 'buurman--wat-doet-u-nu--s6-trailer',
224 'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg',
225 'release_date': '20220519',
226 'upload_date': '20220519',
228 'params': {'skip_download': 'm3u8'},
230 _NETRC_MACHINE
= 'vrtnu'
231 _authenticated
= False
233 def _perform_login(self
, username
, password
):
234 auth_info
= self
._gigya
_login
({
235 'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy',
236 'targetEnv': 'jssdk',
238 'password': password
,
239 'authMode': 'cookie',
242 if auth_info
.get('errorDetails'):
243 raise ExtractorError(f
'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected
=True)
245 # Sometimes authentication fails for no good reason, retry
246 for retry
in self
.RetryManager():
247 if retry
.attempt
> 1:
250 self
._request
_webpage
(
251 'https://token.vrt.be/vrtnuinitlogin', None, note
='Requesting XSRF Token',
252 errnote
='Could not get XSRF Token', query
={
254 'destination': 'https://www.vrt.be/vrtnu/',
256 self
._request
_webpage
(
257 'https://login.vrt.be/perform_login', None,
258 note
='Performing login', errnote
='Login failed',
259 query
={'client_id': 'vrtnu-site'}, data
=urlencode_postdata({
260 'UID': auth_info
['UID'],
261 'UIDSignature': auth_info
['UIDSignature'],
262 'signatureTimestamp': auth_info
['signatureTimestamp'],
263 '_csrf': self
._get
_cookies
('https://login.vrt.be').get('OIDCXSRF').value
,
265 except ExtractorError
as e
:
266 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 401:
271 self
._authenticated
= True
273 def _real_extract(self
, url
):
274 display_id
= self
._match
_id
(url
)
275 parsed_url
= urllib
.parse
.urlparse(url
)
276 details
= self
._download
_json
(
277 f
'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json',
278 display_id
, 'Downloading asset JSON', 'Unable to download asset JSON')['details']
280 watch_info
= traverse_obj(details
, (
281 'actions', lambda _
, v
: v
['type'] == 'watch-episode', {dict}
), get_all
=False) or {}
282 video_id
= join_nonempty(
283 'episodePublicationId', 'episodeVideoId', delim
='$', from_dict
=watch_info
)
284 if '$' not in video_id
:
285 raise ExtractorError('Unable to extract video ID')
287 vrtnutoken
= self
._download
_json
(
288 'https://token.vrt.be/refreshtoken', video_id
, note
='Retrieving vrtnutoken',
289 errnote
='Token refresh failed')['vrtnutoken'] if self
._authenticated
else None
291 video_info
= self
._call
_api
(video_id
, 'vrtnu-web@PROD', vrtnutoken
)
293 if 'title' not in video_info
:
294 code
= video_info
.get('code')
295 if code
in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'):
296 self
.raise_login_required(code
, method
='password')
297 elif code
in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'):
298 self
.raise_geo_restricted(countries
=['BE'])
299 elif code
== 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS':
300 if not self
._authenticated
:
301 self
.raise_login_required(code
, method
='password')
302 self
.raise_geo_restricted(countries
=['BE'])
303 raise ExtractorError(code
, expected
=True)
305 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(video_info
, video_id
)
308 **traverse_obj(details
, {
310 'description': ('description', {clean_html}
),
311 'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}
),
312 'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}
),
313 'series': ('data', 'program', 'title'),
314 'season': ('data', 'season', 'title', 'value'),
315 'season_number': ('data', 'season', 'title', 'raw', {int_or_none}
),
316 'season_id': ('data', 'season', 'id', {str_or_none}
),
317 'episode': ('data', 'episode', 'number', 'value', {str_or_none}
),
318 'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}
),
319 'episode_id': ('data', 'episode', 'id', {str_or_none}
),
320 'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}
),
323 'display_id': display_id
,
326 'duration': float_or_none(video_info
.get('duration'), 1000),
327 'thumbnail': url_or_none(video_info
.get('posterImageUrl')),
328 'subtitles': subtitles
,
329 '_old_archive_ids': [make_archive_id('Canvas', video_id
)],
333 class KetnetIE(VRTBaseIE
):
334 _VALID_URL
= r
'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
336 'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5',
338 'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
341 'episode': 'Reeks 6: Week 5',
344 'timestamp': 1685251800,
345 'upload_date': '20230528',
347 'params': {'skip_download': 'm3u8'},
350 def _real_extract(self
, url
):
351 display_id
= self
._match
_id
(url
)
353 video
= self
._download
_json
(
354 'https://senior-bff.ketnet.be/graphql', display_id
, query
={
356 video(id: "content/ketnet/nl/%s.model.json") {
367 }''' % display_id
, # noqa: UP031
370 video_id
= urllib
.parse
.unquote(video
['mediaReference'])
371 data
= self
._call
_api
(video_id
, 'ketnet@PROD', version
='v1')
372 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(data
, video_id
)
377 'subtitles': subtitles
,
378 '_old_archive_ids': [make_archive_id('Canvas', video_id
)],
379 **traverse_obj(video
, {
380 'title': ('titleVideodetail', {str}
),
381 'description': ('description', {str}
),
382 'thumbnail': ('thumbnail', {url_or_none}
),
383 'timestamp': ('publicationDate', {parse_iso8601}
),
384 'series': ('programTitle', {str}
),
385 'season': ('seasonTitle', {str}
),
386 'episode': ('subtitleVideodetail', {str}
),
387 'episode_number': ('episodeNr', {int_or_none}
),
392 class DagelijkseKostIE(VRTBaseIE
):
393 IE_DESC
= 'dagelijksekost.een.be'
394 _VALID_URL
= r
'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
396 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
398 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
400 'title': 'Hachis parmentier met witloof',
401 'description': 'md5:9960478392d87f63567b5b117688cdc5',
402 'display_id': 'hachis-parmentier-met-witloof',
404 'params': {'skip_download': 'm3u8'},
407 def _real_extract(self
, url
):
408 display_id
= self
._match
_id
(url
)
409 webpage
= self
._download
_webpage
(url
, display_id
)
410 video_id
= self
._html
_search
_regex
(
411 r
'data-url=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', webpage, 'video
id', group='id')
413 data = self._call_api(video_id, 'dako
@prod', version='v1
')
414 formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
419 'subtitles
': subtitles,
420 'display_id
': display_id,
421 'title
': strip_or_none(get_element_by_class(
422 'dish
-metadata__title
', webpage) or self._html_search_meta('twitter
:title
', webpage)),
423 'description
': clean_html(get_element_by_class(
424 'dish
-description
', webpage)) or self._html_search_meta(
425 ['description
', 'twitter
:description
', 'og
:description
'], webpage),
426 '_old_archive_ids
': [make_archive_id('Canvas
', video_id)],
430 class Radio1BeIE(VRTBaseIE):
431 _VALID_URL = r'https?
://radio1\
.be
/(?
:lees|luister
/select
)/(?P
<id>[\w
/-]+)'
433 'url
': 'https
://radio1
.be
/luister
/select
/de
-ochtend
/komt
-n
-va
-volgend
-jaar
-op
-in-wallonie
',
435 'id': 'eb6c22e9
-544f
-44f4
-af39
-cf8cccd29e22
',
436 'title
': 'Komt N
-VA volgend jaar op
in Wallonië?
',
437 'display_id
': 'de
-ochtend
/komt
-n
-va
-volgend
-jaar
-op
-in-wallonie
',
438 'description
': 'md5
:b374ea1c9302f38362df9dea1931468e
',
439 'thumbnail
': r're
:https?
://cds\
.vrt\
.radio
/[^
/#\?&]+',
441 'playlist_mincount': 1,
443 'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web',
445 'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2',
446 'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza',
447 'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64',
448 'thumbnail': r
're:https?://cds\.vrt\.radio/[^/#\?&]+',
449 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza',
451 'playlist_mincount': 1,
454 def _extract_video_entries(self
, next_js_data
, display_id
):
455 video_data
= traverse_obj(
456 next_js_data
, ((None, ('paragraphs', ...)), {lambda x
: x
if x
['mediaReference'] else None}))
457 for data
in video_data
:
458 media_reference
= data
['mediaReference']
459 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(
460 self
._call
_api
(media_reference
), display_id
)
463 'id': media_reference
,
465 'subtitles': subtitles
,
466 **traverse_obj(data
, {
467 'title': ('title', {str}
),
468 'description': ('body', {clean_html}
),
472 def _real_extract(self
, url
):
473 display_id
= self
._match
_id
(url
)
474 webpage
= self
._download
_webpage
(url
, display_id
)
475 next_js_data
= self
._search
_nextjs
_data
(webpage
, display_id
)['props']['pageProps']['item']
477 return self
.playlist_result(
478 self
._extract
_video
_entries
(next_js_data
, display_id
), **merge_dicts(traverse_obj(
481 'title': ('title', {str}
),
482 'description': (('description', 'content'), {clean_html}
),
483 }), get_all
=False), {
484 'display_id': display_id
,
485 'title': self
._html
_search
_meta
(['name', 'og:title', 'twitter:title'], webpage
),
486 'description': self
._html
_search
_meta
(['description', 'og:description', 'twitter:description'], webpage
),
487 'thumbnail': self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
),