6 from .gigya
import GigyaBaseIE
7 from ..networking
.exceptions
import HTTPError
14 get_element_html_by_class
,
30 class VRTBaseIE(GigyaBaseIE
):
33 'platform': 'desktop',
38 'device': 'undefined (undefined)',
44 'name': 'VRT web player',
45 'version': '2.7.4-prod-2023-04-19T06:05:45',
48 # From https://player.vrt.be/vrtnws/js/main.js & https://player.vrt.be/ketnet/js/main.8cdb11341bcb79e4cd44.js
49 _JWT_KEY_ID
= '0-0Fp51UZykfaiCJrfTE3+oMI8zvDteYfPtR+2n1R+z8w='
50 _JWT_SIGNING_KEY
= 'b5f500d55cb44715107249ccd8a5c0136cfb2788dbb71b90a4f142423bacaf38' # -dev
51 # player-stag.vrt.be key: d23987504521ae6fbf2716caca6700a24bb1579477b43c84e146b279de5ca595
52 # player.vrt.be key: 2a9251d782700769fb856da5725daf38661874ca6f80ae7dc2b05ec1a81a24ae
54 def _extract_formats_and_subtitles(self
, data
, video_id
):
55 if traverse_obj(data
, 'drm'):
56 self
.report_drm(video_id
)
58 formats
, subtitles
= [], {}
59 for target
in traverse_obj(data
, ('targetUrls', lambda _
, v
: url_or_none(v
['url']) and v
['type'])):
60 format_type
= target
['type'].upper()
61 format_url
= target
['url']
62 if format_type
in ('HLS', 'HLS_AES'):
63 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
64 format_url
, video_id
, 'mp4', m3u8_id
=format_type
, fatal
=False)
66 self
._merge
_subtitles
(subs
, target
=subtitles
)
67 elif format_type
== 'HDS':
68 formats
.extend(self
._extract
_f
4m
_formats
(
69 format_url
, video_id
, f4m_id
=format_type
, fatal
=False))
70 elif format_type
== 'MPEG_DASH':
71 fmts
, subs
= self
._extract
_mpd
_formats
_and
_subtitles
(
72 format_url
, video_id
, mpd_id
=format_type
, fatal
=False)
74 self
._merge
_subtitles
(subs
, target
=subtitles
)
75 elif format_type
== 'HSS':
76 fmts
, subs
= self
._extract
_ism
_formats
_and
_subtitles
(
77 format_url
, video_id
, ism_id
='mss', fatal
=False)
79 self
._merge
_subtitles
(subs
, target
=subtitles
)
82 'format_id': format_type
,
86 for sub
in traverse_obj(data
, ('subtitleUrls', lambda _
, v
: v
['url'] and v
['type'] == 'CLOSED')):
87 subtitles
.setdefault('nl', []).append({'url': sub
['url']})
89 return formats
, subtitles
91 def _call_api(self
, video_id
, client
='null', id_token
=None, version
='v2'):
92 player_info
= {'exp': (round(time
.time(), 3) + 900), **self
._PLAYER
_INFO
}
93 player_token
= self
._download
_json
(
94 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v2/tokens',
95 video_id
, 'Downloading player token', headers
={
96 **self
.geo_verification_headers(),
97 'Content-Type': 'application/json',
99 'identityToken': id_token
or {},
100 'playerInfo': jwt_encode_hs256(player_info
, self
._JWT
_SIGNING
_KEY
, headers
={
101 'kid': self
._JWT
_KEY
_ID
,
103 }, separators
=(',', ':')).encode())['vrtPlayerToken']
105 return self
._download
_json
(
106 f
'https://media-services-public.vrt.be/media-aggregator/{version}/media-items/{video_id}',
107 video_id
, 'Downloading API JSON', query
={
108 'vrtPlayerToken': player_token
,
110 }, expected_status
=400)
113 class VRTIE(VRTBaseIE
):
114 IE_DESC
= 'VRT NWS, Flanders News, Flandern Info and Sporza'
115 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
117 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
119 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
121 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
122 'description': 'md5:6fd85f999b2d1841aa5568f4bf02c3ff',
124 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/2d914d61-7710-11e9-abcc-02b7b76bf47f.jpg',
126 'params': {'skip_download': 'm3u8'},
128 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
130 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
132 'title': 'De Belgian Cats zijn klaar voor het EK',
133 'description': 'Video: De Belgian Cats zijn klaar voor het EK mét Ann Wauters | basketbal, sport in het journaal',
135 'thumbnail': 'https://images.vrt.be/orig/2019/05/15/11c0dba3-770e-11e9-abcc-02b7b76bf47f.jpg',
137 'params': {'skip_download': 'm3u8'},
140 'vrt.be/vrtnws': 'vrtnieuws',
141 'sporza.be': 'sporza',
144 def _real_extract(self
, url
):
145 site
, display_id
= self
._match
_valid
_url
(url
).groups()
146 webpage
= self
._download
_webpage
(url
, display_id
)
147 attrs
= extract_attributes(get_element_html_by_class('vrtvideo', webpage
) or '')
149 asset_id
= attrs
.get('data-video-id') or attrs
['data-videoid']
150 publication_id
= traverse_obj(attrs
, 'data-publication-id', 'data-publicationid')
152 asset_id
= f
'{publication_id}${asset_id}'
153 client
= traverse_obj(attrs
, 'data-client-code', 'data-client') or self
._CLIENT
_MAP
[site
]
155 data
= self
._call
_api
(asset_id
, client
)
156 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(data
, asset_id
)
158 description
= self
._html
_search
_meta
(
159 ['og:description', 'twitter:description', 'description'], webpage
)
160 if description
== '…':
166 'subtitles': subtitles
,
167 'description': description
,
168 'thumbnail': url_or_none(attrs
.get('data-posterimage')),
169 'duration': float_or_none(attrs
.get('data-duration'), 1000),
170 '_old_archive_ids': [make_archive_id('Canvas', asset_id
)],
171 **traverse_obj(data
, {
172 'title': ('title', {str}
),
173 'description': ('shortDescription', {str}
),
174 'duration': ('duration', {functools
.partial(float_or_none
, scale
=1000)}),
175 'thumbnail': ('posterImageUrl', {url_or_none}
),
180 class VrtNUIE(VRTBaseIE
):
182 _VALID_URL
= r
'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
184 # CONTENT_IS_AGE_RESTRICTED
185 'url': 'https://www.vrt.be/vrtnu/a-z/de-ideale-wereld/2023-vj/de-ideale-wereld-d20230116/',
187 'id': 'pbs-pub-855b00a8-6ce2-4032-ac4f-1fcf3ae78524$vid-d2243aa1-ec46-4e34-a55b-92568459906f',
190 'description': 'Satirisch actualiteitenmagazine met Ella Leyers. Tom Waes is te gast.',
191 'timestamp': 1673905125,
192 'release_timestamp': 1673905125,
193 'series': 'De ideale wereld',
194 'season_id': '1672830988794',
195 'episode': 'Aflevering 1',
197 'episode_id': '1672830988861',
198 'display_id': 'de-ideale-wereld-d20230116',
201 'thumbnail': 'https://images.vrt.be/orig/2023/01/10/1bb39cb3-9115-11ed-b07d-02b7b76bf47f.jpg',
202 'release_date': '20230116',
203 'upload_date': '20230116',
207 'url': 'https://www.vrt.be/vrtnu/a-z/buurman--wat-doet-u-nu-/6/buurman--wat-doet-u-nu--s6-trailer/',
209 'id': 'pbs-pub-ad4050eb-d9e5-48c2-9ec8-b6c355032361$vid-0465537a-34a8-4617-8352-4d8d983b4eee',
211 'title': 'Trailer seizoen 6 \'Buurman, wat doet u nu?\'',
212 'description': 'md5:197424726c61384b4e5c519f16c0cf02',
213 'timestamp': 1652940000,
214 'release_timestamp': 1652940000,
215 'series': 'Buurman, wat doet u nu?',
216 'season': 'Seizoen 6',
218 'season_id': '1652344200907',
219 'episode': 'Aflevering 0',
221 'episode_id': '1652951873524',
222 'display_id': 'buurman--wat-doet-u-nu--s6-trailer',
225 'thumbnail': 'https://images.vrt.be/orig/2022/05/23/3c234d21-da83-11ec-b07d-02b7b76bf47f.jpg',
226 'release_date': '20220519',
227 'upload_date': '20220519',
229 'params': {'skip_download': 'm3u8'},
231 _NETRC_MACHINE
= 'vrtnu'
232 _authenticated
= False
234 def _perform_login(self
, username
, password
):
235 auth_info
= self
._gigya
_login
({
236 'APIKey': '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy',
237 'targetEnv': 'jssdk',
239 'password': password
,
240 'authMode': 'cookie',
243 if auth_info
.get('errorDetails'):
244 raise ExtractorError(f
'Unable to login. VrtNU said: {auth_info["errorDetails"]}', expected
=True)
246 # Sometimes authentication fails for no good reason, retry
247 for retry
in self
.RetryManager():
248 if retry
.attempt
> 1:
251 self
._request
_webpage
(
252 'https://token.vrt.be/vrtnuinitlogin', None, note
='Requesting XSRF Token',
253 errnote
='Could not get XSRF Token', query
={
255 'destination': 'https://www.vrt.be/vrtnu/',
257 self
._request
_webpage
(
258 'https://login.vrt.be/perform_login', None,
259 note
='Performing login', errnote
='Login failed',
260 query
={'client_id': 'vrtnu-site'}, data
=urlencode_postdata({
261 'UID': auth_info
['UID'],
262 'UIDSignature': auth_info
['UIDSignature'],
263 'signatureTimestamp': auth_info
['signatureTimestamp'],
264 '_csrf': self
._get
_cookies
('https://login.vrt.be').get('OIDCXSRF').value
,
266 except ExtractorError
as e
:
267 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 401:
272 self
._authenticated
= True
274 def _real_extract(self
, url
):
275 display_id
= self
._match
_id
(url
)
276 parsed_url
= urllib
.parse
.urlparse(url
)
277 details
= self
._download
_json
(
278 f
'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path.rstrip("/")}.model.json',
279 display_id
, 'Downloading asset JSON', 'Unable to download asset JSON')['details']
281 watch_info
= traverse_obj(details
, (
282 'actions', lambda _
, v
: v
['type'] == 'watch-episode', {dict}
), get_all
=False) or {}
283 video_id
= join_nonempty(
284 'episodePublicationId', 'episodeVideoId', delim
='$', from_dict
=watch_info
)
285 if '$' not in video_id
:
286 raise ExtractorError('Unable to extract video ID')
288 vrtnutoken
= self
._download
_json
(
289 'https://token.vrt.be/refreshtoken', video_id
, note
='Retrieving vrtnutoken',
290 errnote
='Token refresh failed')['vrtnutoken'] if self
._authenticated
else None
292 video_info
= self
._call
_api
(video_id
, 'vrtnu-web@PROD', vrtnutoken
)
294 if 'title' not in video_info
:
295 code
= video_info
.get('code')
296 if code
in ('AUTHENTICATION_REQUIRED', 'CONTENT_IS_AGE_RESTRICTED'):
297 self
.raise_login_required(code
, method
='password')
298 elif code
in ('INVALID_LOCATION', 'CONTENT_AVAILABLE_ONLY_IN_BE'):
299 self
.raise_geo_restricted(countries
=['BE'])
300 elif code
== 'CONTENT_AVAILABLE_ONLY_FOR_BE_RESIDENTS_AND_EXPATS':
301 if not self
._authenticated
:
302 self
.raise_login_required(code
, method
='password')
303 self
.raise_geo_restricted(countries
=['BE'])
304 raise ExtractorError(code
, expected
=True)
306 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(video_info
, video_id
)
309 **traverse_obj(details
, {
311 'description': ('description', {clean_html}
),
312 'timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}
),
313 'release_timestamp': ('data', 'episode', 'onTime', 'raw', {parse_iso8601}
),
314 'series': ('data', 'program', 'title'),
315 'season': ('data', 'season', 'title', 'value'),
316 'season_number': ('data', 'season', 'title', 'raw', {int_or_none}
),
317 'season_id': ('data', 'season', 'id', {str_or_none}
),
318 'episode': ('data', 'episode', 'number', 'value', {str_or_none}
),
319 'episode_number': ('data', 'episode', 'number', 'raw', {int_or_none}
),
320 'episode_id': ('data', 'episode', 'id', {str_or_none}
),
321 'age_limit': ('data', 'episode', 'age', 'raw', {parse_age_limit}
),
324 'display_id': display_id
,
327 'duration': float_or_none(video_info
.get('duration'), 1000),
328 'thumbnail': url_or_none(video_info
.get('posterImageUrl')),
329 'subtitles': subtitles
,
330 '_old_archive_ids': [make_archive_id('Canvas', video_id
)],
334 class KetnetIE(VRTBaseIE
):
335 _VALID_URL
= r
'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
337 'url': 'https://www.ketnet.be/kijken/m/meisjes/6/meisjes-s6a5',
339 'id': 'pbs-pub-39f8351c-a0a0-43e6-8394-205d597d6162$vid-5e306921-a9aa-4fa9-9f39-5b82c8f1028e',
342 'episode': 'Reeks 6: Week 5',
345 'timestamp': 1685251800,
346 'upload_date': '20230528',
348 'params': {'skip_download': 'm3u8'},
351 def _real_extract(self
, url
):
352 display_id
= self
._match
_id
(url
)
354 video
= self
._download
_json
(
355 'https://senior-bff.ketnet.be/graphql', display_id
, query
={
357 video(id: "content/ketnet/nl/%s.model.json") {
368 }''' % display_id
, # noqa: UP031
371 video_id
= urllib
.parse
.unquote(video
['mediaReference'])
372 data
= self
._call
_api
(video_id
, 'ketnet@PROD', version
='v1')
373 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(data
, video_id
)
378 'subtitles': subtitles
,
379 '_old_archive_ids': [make_archive_id('Canvas', video_id
)],
380 **traverse_obj(video
, {
381 'title': ('titleVideodetail', {str}
),
382 'description': ('description', {str}
),
383 'thumbnail': ('thumbnail', {url_or_none}
),
384 'timestamp': ('publicationDate', {parse_iso8601}
),
385 'series': ('programTitle', {str}
),
386 'season': ('seasonTitle', {str}
),
387 'episode': ('subtitleVideodetail', {str}
),
388 'episode_number': ('episodeNr', {int_or_none}
),
393 class DagelijkseKostIE(VRTBaseIE
):
394 IE_DESC
= 'dagelijksekost.een.be'
395 _VALID_URL
= r
'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
397 'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
399 'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
401 'title': 'Hachis parmentier met witloof',
402 'description': 'md5:9960478392d87f63567b5b117688cdc5',
403 'display_id': 'hachis-parmentier-met-witloof',
405 'params': {'skip_download': 'm3u8'},
408 def _real_extract(self
, url
):
409 display_id
= self
._match
_id
(url
)
410 webpage
= self
._download
_webpage
(url
, display_id
)
411 video_id
= self
._html
_search
_regex
(
412 r
'data-url=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', webpage, 'video
id', group='id')
414 data = self._call_api(video_id, 'dako
@prod', version='v1
')
415 formats, subtitles = self._extract_formats_and_subtitles(data, video_id)
420 'subtitles
': subtitles,
421 'display_id
': display_id,
422 'title
': strip_or_none(get_element_by_class(
423 'dish
-metadata__title
', webpage) or self._html_search_meta('twitter
:title
', webpage)),
424 'description
': clean_html(get_element_by_class(
425 'dish
-description
', webpage)) or self._html_search_meta(
426 ['description
', 'twitter
:description
', 'og
:description
'], webpage),
427 '_old_archive_ids
': [make_archive_id('Canvas
', video_id)],
431 class Radio1BeIE(VRTBaseIE):
432 _VALID_URL = r'https?
://radio1\
.be
/(?
:lees|luister
/select
)/(?P
<id>[\w
/-]+)'
434 'url
': 'https
://radio1
.be
/luister
/select
/de
-ochtend
/komt
-n
-va
-volgend
-jaar
-op
-in-wallonie
',
436 'id': 'eb6c22e9
-544f
-44f4
-af39
-cf8cccd29e22
',
437 'title
': 'Komt N
-VA volgend jaar op
in Wallonië?
',
438 'display_id
': 'de
-ochtend
/komt
-n
-va
-volgend
-jaar
-op
-in-wallonie
',
439 'description
': 'md5
:b374ea1c9302f38362df9dea1931468e
',
440 'thumbnail
': r're
:https?
://cds\
.vrt\
.radio
/[^
/#\?&]+',
442 'playlist_mincount': 1,
444 'url': 'https://radio1.be/lees/europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza?view=web',
446 'id': '5d47f102-dbdb-4fa0-832b-26c1870311f2',
447 'title': 'Europese Unie wil "onmiddellijke humanitaire pauze" en "duurzaam staakt-het-vuren" in Gaza',
448 'description': 'md5:1aad1fae7d39edeffde5d3e67d276b64',
449 'thumbnail': r
're:https?://cds\.vrt\.radio/[^/#\?&]+',
450 'display_id': 'europese-unie-wil-onmiddellijke-humanitaire-pauze-en-duurzaam-staakt-het-vuren-in-gaza',
452 'playlist_mincount': 1,
455 def _extract_video_entries(self
, next_js_data
, display_id
):
456 video_data
= traverse_obj(
457 next_js_data
, ((None, ('paragraphs', ...)), {lambda x
: x
if x
['mediaReference'] else None}))
458 for data
in video_data
:
459 media_reference
= data
['mediaReference']
460 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(
461 self
._call
_api
(media_reference
), display_id
)
464 'id': media_reference
,
466 'subtitles': subtitles
,
467 **traverse_obj(data
, {
468 'title': ('title', {str}
),
469 'description': ('body', {clean_html}
),
473 def _real_extract(self
, url
):
474 display_id
= self
._match
_id
(url
)
475 webpage
= self
._download
_webpage
(url
, display_id
)
476 next_js_data
= self
._search
_nextjs
_data
(webpage
, display_id
)['props']['pageProps']['item']
478 return self
.playlist_result(
479 self
._extract
_video
_entries
(next_js_data
, display_id
), **merge_dicts(traverse_obj(
482 'title': ('title', {str}
),
483 'description': (('description', 'content'), {clean_html}
),
484 }), get_all
=False), {
485 'display_id': display_id
,
486 'title': self
._html
_search
_meta
(['name', 'og:title', 'twitter:title'], webpage
),
487 'description': self
._html
_search
_meta
(['description', 'og:description', 'twitter:description'], webpage
),
488 'thumbnail': self
._html
_search
_meta
(['og:image', 'twitter:image'], webpage
),