5 from .common
import InfoExtractor
19 class NPOIE(InfoExtractor
):
21 IE_DESC
= 'npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl'
29 (?:ntr|npostart)\.nl/(?:[^/]+/){2,}|
30 omroepwnl\.nl/video/fragment/[^/]+__|
31 (?:zapp|npo3)\.nl/(?:[^/]+/){2,}
38 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719',
39 'md5': '4b3f9c429157ec4775f2c9cb7b911016',
41 'id': 'VPWON_1220719',
44 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.',
45 'upload_date': '20140622',
47 'skip': 'Video was removed',
49 'url': 'http://www.npo.nl/de-mega-mike-mega-thomas-show/27-02-2009/VARA_101191800',
50 'md5': 'da50a5787dbfc1603c4ad80f31c5120b',
52 'id': 'VARA_101191800',
54 'title': 'De Mega Mike & Mega Thomas show: The best of.',
55 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4',
56 'upload_date': '20090227',
59 'skip': 'Video was removed',
61 'url': 'http://www.npo.nl/tegenlicht/25-02-2013/VPWON_1169289',
62 'md5': '1b279c0547f6b270e014c576415268c5',
64 'id': 'VPWON_1169289',
66 'title': 'Zwart geld: de toekomst komt uit Afrika',
67 'description': 'md5:dffaf3d628a9c36f78ca48d834246261',
68 'upload_date': '20130225',
71 'series': 'Tegenlicht',
72 'timestamp': 1361822340,
73 'thumbnail': 'https://images.npo.nl/tile/1280x720/142854.jpg',
74 'episode': 'Zwart geld: de toekomst komt uit Afrika',
78 'url': 'http://www.npo.nl/de-nieuwe-mens-deel-1/21-07-2010/WO_VPRO_043706',
80 'id': 'WO_VPRO_043706',
82 'title': 'De nieuwe mens - Deel 1',
83 'description': 'md5:518ae51ba1293ffb80d8d8ce90b74e4b',
85 'episode': 'De nieuwe mens - Deel 1',
86 'thumbnail': 'https://images.npo.nl/tile/1280x720/6289.jpg',
87 'timestamp': 1279716057,
88 'series': 'De nieuwe mens - Deel 1',
89 'upload_date': '20100721',
92 'skip_download': True,
96 'url': 'http://www.npo.nl/hoe-gaat-europa-verder-na-parijs/10-01-2015/WO_NOS_762771',
98 'id': 'WO_NOS_762771',
100 'title': 'Hoe gaat Europa verder na Parijs?',
103 'skip_download': True,
105 'skip': 'Video was removed',
107 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content',
109 'id': 'VPWON_1233944',
111 'title': 'Aap, poot, pies',
112 'description': 'md5:4b46b1b9553b4c036a04d2a532a137e6',
113 'upload_date': '20150508',
115 'episode': 'Aap, poot, pies',
116 'thumbnail': 'https://images.poms.omroep.nl/image/s1280/c1280x720/608118.jpg',
117 'timestamp': 1431064200,
118 'series': 'Aap, poot, pies',
121 'skip_download': True,
124 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698',
126 'id': 'POW_00996502',
128 'title': '''"Dit is wel een 'landslide'..."''',
129 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8',
130 'upload_date': '20150508',
134 'skip_download': True,
136 'skip': 'Video was removed',
139 'url': 'http://www.npo.nl/jouw-stad-rotterdam/29-01-2017/RBX_FUNX_6683215/RBX_FUNX_7601437',
141 'id': 'RBX_FUNX_6683215',
143 'title': 'Jouw Stad Rotterdam',
144 'description': 'md5:db251505244f097717ec59fabc372d9f',
147 'skip_download': True,
149 'skip': 'Video was removed',
151 'url': 'http://www.zapp.nl/de-bzt-show/gemist/KN_1687547',
152 'only_matching': True,
154 'url': 'http://www.zapp.nl/de-bzt-show/filmpjes/POMS_KN_7315118',
155 'only_matching': True,
157 'url': 'http://www.zapp.nl/beste-vrienden-quiz/extra-video-s/WO_NTR_1067990',
158 'only_matching': True,
160 'url': 'https://www.npo3.nl/3onderzoekt/16-09-2015/VPWON_1239870',
161 'only_matching': True,
164 'url': 'npo:LI_NL1_4188102',
165 'only_matching': True,
167 'url': 'http://www.npo.nl/radio-gaga/13-06-2017/BNN_101383373',
168 'only_matching': True,
170 'url': 'https://www.zapp.nl/1803-skelterlab/instructie-video-s/740-instructievideo-s/POMS_AT_11736927',
171 'only_matching': True,
173 'url': 'https://www.npostart.nl/broodje-gezond-ei/28-05-2018/KN_1698996',
174 'only_matching': True,
176 'url': 'https://npo.nl/KN_1698996',
177 'only_matching': True,
179 'url': 'https://www.npo3.nl/the-genius/21-11-2022/VPWON_1341105',
181 'id': 'VPWON_1341105',
184 'series': 'The Genius',
185 'description': 'md5:db02f1456939ca63f7c408f858044e94',
186 'title': 'The Genius',
187 'timestamp': 1669062000,
189 'episode': 'The Genius',
190 'thumbnail': 'https://images.npo.nl/tile/1280x720/1827650.jpg',
192 'upload_date': '20221121',
195 'skip_download': True,
200 def suitable(cls
, url
):
201 return (False if any(ie
.suitable(url
)
202 for ie
in (NPOLiveIE
, NPORadioIE
, NPORadioFragmentIE
))
203 else super().suitable(url
))
205 def _real_extract(self
, url
):
206 video_id
= self
._match
_id
(url
)
207 if urllib
.parse
.urlparse(url
).netloc
in ['www.ntr.nl', 'ntr.nl']:
208 player
= self
._download
_json
(
209 f
'https://www.ntr.nl/ajax/player/embed/{video_id}', video_id
,
210 'Downloading player JSON', query
={
211 'parameters[elementId]': f
'npo{random.randint(0, 999)}',
212 'parameters[sterReferralUrl]': url
,
213 'parameters[autoplay]': 0,
216 self
._request
_webpage
(
217 'https://www.npostart.nl/api/token', video_id
,
218 'Downloading token', headers
={
220 'X-Requested-With': 'XMLHttpRequest',
222 player
= self
._download
_json
(
223 f
'https://www.npostart.nl/player/{video_id}', video_id
,
224 'Downloading player JSON', data
=urlencode_postdata({
230 'x-xsrf-token': try_call(lambda: urllib
.parse
.unquote(
231 self
._get
_cookies
('https://www.npostart.nl')['XSRF-TOKEN'].value
)),
234 player_token
= player
['token']
239 for profile
in ('hls', 'dash-widevine', 'dash-playready', 'smooth'):
240 streams
= self
._download
_json
(
241 f
'https://start-player.npo.nl/video/{video_id}/streams',
242 video_id
, f
'Downloading {profile} profile JSON', fatal
=False,
245 'quality': 'npoplus',
246 'tokenId': player_token
,
247 'streamType': 'broadcast',
248 }, data
=b
'') # endpoint requires POST
251 stream
= streams
.get('stream')
252 if not isinstance(stream
, dict):
254 stream_url
= url_or_none(stream
.get('src'))
255 if not stream_url
or stream_url
in format_urls
:
257 format_urls
.add(stream_url
)
258 if stream
.get('protection') is not None or stream
.get('keySystemOptions') is not None:
261 stream_type
= stream
.get('type')
262 stream_ext
= determine_ext(stream_url
)
263 if stream_type
== 'application/dash+xml' or stream_ext
== 'mpd':
264 formats
.extend(self
._extract
_mpd
_formats
(
265 stream_url
, video_id
, mpd_id
='dash', fatal
=False))
266 elif stream_type
== 'application/vnd.apple.mpegurl' or stream_ext
== 'm3u8':
267 formats
.extend(self
._extract
_m
3u8_formats
(
268 stream_url
, video_id
, ext
='mp4',
269 entry_protocol
='m3u8_native', m3u8_id
='hls', fatal
=False))
270 elif re
.search(r
'\.isml?/Manifest', stream_url
):
271 formats
.extend(self
._extract
_ism
_formats
(
272 stream_url
, video_id
, ism_id
='mss', fatal
=False))
279 if not self
.get_param('allow_unplayable_formats') and drm
:
280 self
.report_drm(video_id
)
288 embed_url
= url_or_none(player
.get('embedUrl'))
290 webpage
= self
._download
_webpage
(
291 embed_url
, video_id
, 'Downloading embed page', fatal
=False)
293 video
= self
._parse
_json
(
295 r
'\bvideo\s*=\s*({.+?})\s*;', webpage
, 'video',
296 default
='{}'), video_id
)
298 title
= video
.get('episodeTitle')
300 subtitles_list
= video
.get('subtitles')
301 if isinstance(subtitles_list
, list):
302 for cc
in subtitles_list
:
303 cc_url
= url_or_none(cc
.get('src'))
306 lang
= str_or_none(cc
.get('language')) or 'nl'
307 subtitles
.setdefault(lang
, []).append({
312 'description': video
.get('description'),
313 'thumbnail': url_or_none(
314 video
.get('still_image_url') or video
.get('orig_image_url')),
315 'duration': int_or_none(video
.get('duration')),
316 'timestamp': unified_timestamp(video
.get('broadcastDate')),
317 'creator': video
.get('channel'),
318 'series': video
.get('title'),
320 'episode_number': int_or_none(video
.get('episodeNumber')),
321 'subtitles': subtitles
,
327 class NPOLiveIE(InfoExtractor
):
328 IE_NAME
= 'npo.nl:live'
329 _VALID_URL
= r
'https?://(?:www\.)?npo(?:start)?\.nl/live(?:/(?P<id>[^/?#&]+))?'
332 'url': 'http://www.npo.nl/live/npo-1',
334 'id': 'LI_NL1_4188102',
335 'display_id': 'npo-1',
337 'title': 're:^NPO 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
341 'skip_download': True,
344 'url': 'http://www.npo.nl/live',
345 'only_matching': True,
347 'url': 'https://www.npostart.nl/live/npo-1',
348 'only_matching': True,
351 def _real_extract(self
, url
):
352 display_id
= self
._match
_id
(url
) or 'npo-1'
354 webpage
= self
._download
_webpage
(url
, display_id
)
356 live_id
= self
._search
_regex
(
357 [r
'media-id="([^"]+)"', r
'data-prid="([^"]+)"'], webpage
, 'live id')
360 '_type': 'url_transparent',
361 'url': f
'npo:{live_id}',
362 'ie_key': NPOIE
.ie_key(),
364 'display_id': display_id
,
368 class NPORadioIE(InfoExtractor
):
369 IE_NAME
= 'npo.nl:radio'
370 _VALID_URL
= r
'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)'
373 'url': 'http://www.npo.nl/radio/radio-1',
377 'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
381 'skip_download': True,
386 def suitable(cls
, url
):
387 return False if NPORadioFragmentIE
.suitable(url
) else super().suitable(url
)
390 def _html_get_attribute_regex(attribute
):
391 return rf
'{attribute}\s*=\s*\'([^\']+)\''
393 def _real_extract(self
, url
):
394 video_id
= self
._match
_id
(url
)
396 webpage
= self
._download
_webpage
(url
, video_id
)
398 title
= self
._html
_search
_regex
(
399 self
._html
_get
_attribute
_regex
('data-channel'), webpage
, 'title')
401 stream
= self
._parse
_json
(
402 self
._html
_search
_regex
(self
._html
_get
_attribute
_regex
('data-streams'), webpage
, 'data-streams'),
405 codec
= stream
.get('codec')
409 'url': stream
['url'],
417 class NPORadioFragmentIE(InfoExtractor
):
418 IE_NAME
= 'npo.nl:radio:fragment'
419 _VALID_URL
= r
'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)'
422 'url': 'http://www.npo.nl/radio/radio-5/fragment/174356',
423 'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2',
427 'title': 'Jubileumconcert Willeke Alberti',
431 def _real_extract(self
, url
):
432 audio_id
= self
._match
_id
(url
)
434 webpage
= self
._download
_webpage
(url
, audio_id
)
436 title
= self
._html
_search
_regex
(
437 rf
'href="/radio/[^/]+/fragment/{audio_id}" title="([^"]+)"',
440 audio_url
= self
._search
_regex
(
441 r
"data-streams='([^']+)'", webpage
, 'audio url')
450 class NPODataMidEmbedIE(InfoExtractor
): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
451 def _real_extract(self
, url
):
452 display_id
= self
._match
_id
(url
)
453 webpage
= self
._download
_webpage
(url
, display_id
)
454 video_id
= self
._search
_regex
(
455 r
'data-mid=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', webpage, 'video_id
', group='id')
457 '_type
': 'url_transparent
',
459 'url
': f'npo
:{video_id}
',
460 'display_id
': display_id,
464 class SchoolTVIE(NPODataMidEmbedIE):
466 _VALID_URL = r'https?
://(?
:www\
.)?schooltv\
.nl
/video
/(?P
<id>[^
/?
#&]+)'
469 'url': 'http://www.schooltv.nl/video/ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam/',
471 'id': 'WO_NTR_429477',
472 'display_id': 'ademhaling-de-hele-dag-haal-je-adem-maar-wat-gebeurt-er-dan-eigenlijk-in-je-lichaam',
473 'title': 'Ademhaling: De hele dag haal je adem. Maar wat gebeurt er dan eigenlijk in je lichaam?',
475 'description': 'md5:abfa0ff690adb73fd0297fd033aaa631',
478 # Skip because of m3u8 download
479 'skip_download': True,
484 class HetKlokhuisIE(NPODataMidEmbedIE
):
485 IE_NAME
= 'hetklokhuis'
486 _VALID_URL
= r
'https?://(?:www\.)?hetklokhuis\.nl/[^/]+/\d+/(?P<id>[^/?#&]+)'
489 'url': 'http://hetklokhuis.nl/tv-uitzending/3471/Zwaartekrachtsgolven',
491 'id': 'VPWON_1260528',
492 'display_id': 'Zwaartekrachtsgolven',
494 'title': 'Het Klokhuis: Zwaartekrachtsgolven',
495 'description': 'md5:c94f31fb930d76c2efa4a4a71651dd48',
496 'upload_date': '20170223',
499 'skip_download': True,
504 class NPOPlaylistBaseIE(NPOIE
): # XXX: Do not subclass from concrete IE
505 def _real_extract(self
, url
):
506 playlist_id
= self
._match
_id
(url
)
508 webpage
= self
._download
_webpage
(url
, playlist_id
)
511 self
.url_result(f
'npo:{video_id}' if not video_id
.startswith('http') else video_id
)
512 for video_id
in orderedSet(re
.findall(self
._PLAYLIST
_ENTRY
_RE
, webpage
))
515 playlist_title
= self
._html
_search
_regex
(
516 self
._PLAYLIST
_TITLE
_RE
, webpage
, 'playlist title',
517 default
=None) or self
._og
_search
_title
(webpage
)
519 return self
.playlist_result(entries
, playlist_id
, playlist_title
)
522 class VPROIE(NPOPlaylistBaseIE
):
524 _VALID_URL
= r
'https?://(?:www\.)?(?:(?:tegenlicht\.)?vpro|2doc)\.nl/(?:[^/]+/)*(?P<id>[^/]+)\.html'
525 _PLAYLIST_TITLE_RE
= (r
'<h1[^>]+class=["\'].*?
\bmedia
-platform
-title
\b.*?
["\'][^>]*>([^<]+)',
526 r'<h5[^>]+class=["\'].*?
\bmedia
-platform
-subtitle
\b.*?
["\'][^>]*>([^<]+)')
527 _PLAYLIST_ENTRY_RE = r'data-media-id="([^
"]+)"'
531 'url
': 'http
://tegenlicht
.vpro
.nl
/afleveringen
/2012-2013/de
-toekomst
-komt
-uit
-afrika
.html
',
532 'md5
': 'f8065e4e5a7824068ed3c7e783178f2c
',
534 'id': 'VPWON_1169289
',
536 'title
': 'De toekomst komt uit Afrika
',
537 'description
': 'md5
:52cf4eefbc96fffcbdc06d024147abea
',
538 'upload_date
': '20130225',
540 'skip
': 'Video gone
',
543 'url
': 'http
://www
.vpro
.nl
/programmas
/2doc
/2015/sergio
-herman
.html
',
545 'id': 'sergio
-herman
',
546 'title
': 'sergio herman
: fucking perfect
',
551 # playlist with youtube embed
552 'url
': 'http
://www
.vpro
.nl
/programmas
/2doc
/2015/education
-education
.html
',
554 'id': 'education
-education
',
555 'title
': 'education education
',
560 'url
': 'http
://www
.2doc
.nl
/documentaires
/series
/2doc
/2015/oktober
/de
-tegenprestatie
.html
',
562 'id': 'de
-tegenprestatie
',
563 'title
': 'De Tegenprestatie
',
567 'url
': 'http
://www
.2doc
.nl
/speel~VARA_101375237~mh17
-het
-verdriet
-van
-nederland~
.html
',
569 'id': 'VARA_101375237
',
571 'title
': 'MH17
: Het verdriet van Nederland
',
572 'description
': 'md5
:09e1a37c1fdb144621e22479691a9f18
',
573 'upload_date
': '20150716',
576 # Skip because of m3u8 download
577 'skip_download
': True,
583 class WNLIE(NPOPlaylistBaseIE):
585 _VALID_URL = r'https?
://(?
:www\
.)?omroepwnl\
.nl
/video
/detail
/(?P
<id>[^
/]+)__\d
+'
586 _PLAYLIST_TITLE_RE = r'(?s
)<h1
[^
>]+class="subject"[^
>]*>(.+?
)</h1
>'
587 _PLAYLIST_ENTRY_RE = r'<a
[^
>]+href
="([^"]+)"[^>]+class="js
-mid
"[^>]*>Deel \d+'
590 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
592 'id': 'vandaag-de-dag-6-mei',
593 'title': 'Vandaag de Dag 6 mei',
599 class AndereTijdenIE(NPOPlaylistBaseIE):
600 IE_NAME = 'anderetijden'
601 _VALID_URL = r'https?://(?:www\.)?anderetijden\.nl/programma/(?:[^/]+/)+(?P<id>[^/?#&]+)'
602 _PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class=["\'].*?
\bpage
-title
\b.*?
["\'][^>]*>(.+?)</h1>'
603 _PLAYLIST_ENTRY_RE = r'<figure[^>]+class=["\']episode
-container episode
-page
["\'][^>]+data-prid=["\'](.+?
)["\']'
606 'url': 'http://anderetijden.nl/programma/1/Andere-Tijden/aflevering/676/Duitse-soldaten-over-de-Slag-bij-Arnhem',
608 'id': 'Duitse-soldaten-over-de-Slag-bij-Arnhem',
609 'title': 'Duitse soldaten over de Slag bij Arnhem',