3 from .common
import InfoExtractor
24 class ZDFBaseIE(InfoExtractor
):
25 _GEO_COUNTRIES
= ['DE']
26 _QUALITIES
= ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
28 def _call_api(self
, url
, video_id
, item
, api_token
=None, referrer
=None):
31 headers
['Api-Auth'] = f
'Bearer {api_token}'
33 headers
['Referer'] = referrer
34 return self
._download
_json
(
35 url
, video_id
, f
'Downloading JSON {item}', headers
=headers
)
38 def _extract_subtitles(src
):
40 for caption
in try_get(src
, lambda x
: x
['captions'], list) or []:
41 subtitle_url
= url_or_none(caption
.get('uri'))
43 lang
= caption
.get('language', 'deu')
44 subtitles
.setdefault(lang
, []).append({
49 def _extract_format(self
, video_id
, formats
, format_urls
, meta
):
50 format_url
= url_or_none(meta
.get('url'))
51 if not format_url
or format_url
in format_urls
:
53 format_urls
.add(format_url
)
55 mime_type
, ext
= meta
.get('mimeType'), determine_ext(format_url
)
56 if mime_type
== 'application/x-mpegURL' or ext
== 'm3u8':
57 new_formats
= self
._extract
_m
3u8_formats
(
58 format_url
, video_id
, 'mp4', m3u8_id
='hls',
59 entry_protocol
='m3u8_native', fatal
=False)
60 elif mime_type
== 'application/f4m+xml' or ext
== 'f4m':
61 new_formats
= self
._extract
_f
4m
_formats
(
62 update_url_query(format_url
, {'hdcore': '3.7.0'}), video_id
, f4m_id
='hds', fatal
=False)
64 new_formats
= self
._extract
_mpd
_formats
(
65 format_url
, video_id
, mpd_id
='dash', fatal
=False)
67 f
= parse_codecs(meta
.get('mimeCodec'))
68 if not f
and meta
.get('type'):
69 data
= meta
['type'].split('_')
70 if try_get(data
, lambda x
: x
[2]) == ext
:
71 f
= {'vcodec': data
[0], 'acodec': data
[1]}
74 'format_id': join_nonempty('http', meta
.get('type'), meta
.get('quality')),
75 'tbr': int_or_none(self
._search
_regex
(r
'_(\d+)k_', format_url
, 'tbr', default
=None)),
78 formats
.extend(merge_dicts(f
, {
79 'format_note': join_nonempty('quality', 'class', from_dict
=meta
, delim
=', '),
80 'language': meta
.get('language'),
81 'language_preference': 10 if meta
.get('class') == 'main' else -10 if meta
.get('class') == 'ad' else -1,
82 'quality': qualities(self
._QUALITIES
)(meta
.get('quality')),
83 }) for f
in new_formats
)
85 def _extract_ptmd(self
, ptmd_url
, video_id
, api_token
, referrer
):
86 ptmd
= self
._call
_api
(
87 ptmd_url
, video_id
, 'metadata', api_token
, referrer
)
89 content_id
= ptmd
.get('basename') or ptmd_url
.split('/')[-1]
93 for p
in ptmd
['priorityList']:
94 formitaeten
= p
.get('formitaeten')
95 if not isinstance(formitaeten
, list):
98 f_qualities
= f
.get('qualities')
99 if not isinstance(f_qualities
, list):
101 for quality
in f_qualities
:
102 tracks
= try_get(quality
, lambda x
: x
['audio']['tracks'], list)
106 self
._extract
_format
(
107 content_id
, formats
, track_uris
, {
108 'url': track
.get('uri'),
109 'type': f
.get('type'),
110 'mimeType': f
.get('mimeType'),
111 'quality': quality
.get('quality'),
112 'class': track
.get('class'),
113 'language': track
.get('language'),
116 duration
= float_or_none(try_get(
117 ptmd
, lambda x
: x
['attributes']['duration']['value']), scale
=1000)
120 'extractor_key': ZDFIE
.ie_key(),
122 'duration': duration
,
124 'subtitles': self
._extract
_subtitles
(ptmd
),
125 '_format_sort_fields': ('tbr', 'res', 'quality', 'language_preference'),
128 def _extract_player(self
, webpage
, video_id
, fatal
=True):
129 return self
._parse
_json
(
131 r
'(?s)data-zdfplayer-jsb=(["\'])(?P
<json
>{.+?
})\
1', webpage,
132 'player JSON
', default='{}' if not fatal else NO_DEFAULT,
137 class ZDFIE(ZDFBaseIE):
138 _VALID_URL = r'https?
://www\
.zdf\
.de
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)\.html'
140 # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
141 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
142 'md5': '34ec321e7eb34231fd88616c65c92db0',
144 'id': '210222_phx_nachgehakt_corona_protest',
146 'title': 'Wohin führt der Protest in der Pandemie?',
147 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
149 'timestamp': 1613948400,
150 'upload_date': '20210221',
152 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
154 # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
155 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
156 'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
158 'id': '141007_ab18_10wochensommer_film',
160 'title': 'Ab 18! - 10 Wochen Sommer',
161 'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
163 'timestamp': 1608604200,
164 'upload_date': '20201222',
166 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
168 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
170 'id': '211230_sendung_hjo',
172 'description': 'md5:47dff85977bde9fb8cba9e9c9b929839',
174 'upload_date': '20211230',
176 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e',
177 'title': 'heute journal vom 30.12.2021',
178 'timestamp': 1640897100,
180 'skip': 'No longer available: "Diese Seite wurde leider nicht gefunden"',
182 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
184 'id': '151025_magie_farben2_tex',
186 'title': 'Die Magie der Farben (2/2)',
187 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
189 'timestamp': 1465021200,
190 'upload_date': '20160604',
191 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
194 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
195 'md5': '57af4423db0455a3975d2dc4578536bc',
198 'id': 'video_funk_1770473',
200 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
201 'title': 'Alles ist verzaubert',
202 'timestamp': 1635520560,
203 'upload_date': '20211029',
204 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-102~1920x1080?cb=1663848412907',
207 # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
208 'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
209 'only_matching': True,
211 # Same as https://www.3sat.de/film/spielfilm/der-hauptmann-100.html
212 'url': 'https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html',
213 'only_matching': True,
215 # Same as https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
216 'url': 'https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html',
217 'only_matching': True,
219 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
220 'only_matching': True,
222 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
223 'only_matching': True,
225 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
226 'only_matching': True,
228 'url': 'https://www.zdf.de/arte/todliche-flucht/page-video-artede-toedliche-flucht-16-100.html',
230 'id': 'video_artede_083871-001-A',
232 'title': 'Tödliche Flucht (1/6)',
233 'description': 'md5:e34f96a9a5f8abd839ccfcebad3d5315',
235 'timestamp': 1641355200,
236 'upload_date': '20220105',
238 'skip': 'No longer available "Diese Seite wurde leider nicht gefunden"',
240 'url': 'https://www.zdf.de/serien/soko-stuttgart/das-geld-anderer-leute-100.html',
242 'id': '191205_1800_sendung_sok8',
244 'title': 'Das Geld anderer Leute',
245 'description': 'md5:cb6f660850dc5eb7d1ab776ea094959d',
247 'timestamp': 1675160100,
248 'upload_date': '20230131',
249 'thumbnail': 'https://epg-image.zdf.de/fotobase-webdelivery/images/e2d7e55a-09f0-424e-ac73-6cac4dd65f35?layout=2400x1350',
252 'url': 'https://www.zdf.de/dokumentation/terra-x/unser-gruener-planet-wuesten-doku-100.html',
254 'id': '220605_dk_gruener_planet_wuesten_tex',
256 'title': 'Unser grüner Planet - Wüsten',
257 'description': 'md5:4fc647b6f9c3796eea66f4a0baea2862',
259 'timestamp': 1654450200,
260 'upload_date': '20220605',
261 'format_note': 'uhd, main',
262 'thumbnail': 'https://www.zdf.de/assets/saguaro-kakteen-102~3840x2160?cb=1655910690796',
266 def _extract_entry(self
, url
, player
, content
, video_id
):
267 title
= content
.get('title') or content
['teaserHeadline']
269 t
= content
['mainVideoContent']['http://zdf.de/rels/target']
270 ptmd_path
= traverse_obj(t
, (
271 (('streams', 'default'), None),
272 ('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template'),
275 raise ExtractorError('Could not extract ptmd_path')
277 info
= self
._extract
_ptmd
(
278 urljoin(url
, ptmd_path
.replace('{playerId}', 'android_native_5')), video_id
, player
['apiToken'], url
)
282 content
, lambda x
: x
['teaserImageRef']['layouts'], dict)
284 for layout_key
, layout_url
in layouts
.items():
285 layout_url
= url_or_none(layout_url
)
290 'format_id': layout_key
,
292 mobj
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)', layout_key
)
295 'width': int(mobj
.group('width')),
296 'height': int(mobj
.group('height')),
298 thumbnails
.append(thumbnail
)
300 chapter_marks
= t
.get('streamAnchorTag') or []
301 chapter_marks
.append({'anchorOffset': int_or_none(t
.get('duration'))})
303 'start_time': chap
.get('anchorOffset'),
304 'end_time': next_chap
.get('anchorOffset'),
305 'title': chap
.get('anchorLabel'),
306 } for chap
, next_chap
in zip(chapter_marks
, chapter_marks
[1:])]
308 return merge_dicts(info
, {
310 'description': content
.get('leadParagraph') or content
.get('teasertext'),
311 'duration': int_or_none(t
.get('duration')),
312 'timestamp': unified_timestamp(content
.get('editorialDate')),
313 'thumbnails': thumbnails
,
314 'chapters': chapters
or None,
317 def _extract_regular(self
, url
, player
, video_id
):
318 content
= self
._call
_api
(
319 player
['content'], video_id
, 'content', player
['apiToken'], url
)
320 return self
._extract
_entry
(player
['content'], player
, content
, video_id
)
322 def _extract_mobile(self
, video_id
):
323 video
= self
._download
_json
(
324 f
'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}',
328 formitaeten
= try_get(video
, lambda x
: x
['document']['formitaeten'], list)
329 document
= formitaeten
and video
['document']
331 title
= document
['titel']
332 content_id
= document
['basename']
335 for f
in formitaeten
or []:
336 self
._extract
_format
(content_id
, formats
, format_urls
, f
)
339 teaser_bild
= document
.get('teaserBild')
340 if isinstance(teaser_bild
, dict):
341 for thumbnail_key
, thumbnail
in teaser_bild
.items():
342 thumbnail_url
= try_get(
343 thumbnail
, lambda x
: x
['url'], str)
346 'url': thumbnail_url
,
348 'width': int_or_none(thumbnail
.get('width')),
349 'height': int_or_none(thumbnail
.get('height')),
355 'description': document
.get('beschreibung'),
356 'duration': int_or_none(document
.get('length')),
357 'timestamp': unified_timestamp(document
.get('date')) or unified_timestamp(
358 try_get(video
, lambda x
: x
['meta']['editorialDate'], str)),
359 'thumbnails': thumbnails
,
360 'subtitles': self
._extract
_subtitles
(document
),
364 def _real_extract(self
, url
):
365 video_id
= self
._match
_id
(url
)
367 webpage
= self
._download
_webpage
(url
, video_id
, fatal
=False)
369 player
= self
._extract
_player
(webpage
, url
, fatal
=False)
371 return self
._extract
_regular
(url
, player
, video_id
)
373 return self
._extract
_mobile
(video_id
)
376 class ZDFChannelIE(ZDFBaseIE
):
377 _VALID_URL
= r
'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
379 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
381 'id': 'das-aktuelle-sportstudio',
382 'title': 'das aktuelle sportstudio',
384 'playlist_mincount': 18,
386 'url': 'https://www.zdf.de/dokumentation/planet-e',
389 'title': 'planet e.',
391 'playlist_mincount': 50,
393 'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
395 'id': 'aktenzeichen-xy-ungeloest',
396 'title': 'Aktenzeichen XY... ungelöst',
397 'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
399 'playlist_mincount': 2,
401 'url': 'https://www.zdf.de/filme/taunuskrimi/',
402 'only_matching': True,
406 def suitable(cls
, url
):
407 return False if ZDFIE
.suitable(url
) else super().suitable(url
)
409 def _og_search_title(self
, webpage
, fatal
=False):
410 title
= super()._og
_search
_title
(webpage
, fatal
=fatal
)
411 return re
.split(r
'\s+[-|]\s+ZDF(?:mediathek)?$', title
or '')[0] or None
413 def _real_extract(self
, url
):
414 channel_id
= self
._match
_id
(url
)
416 webpage
= self
._download
_webpage
(url
, channel_id
)
418 matches
= re
.finditer(
419 rf
'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>{ZDFIE._VALID_URL})\1''',
422 if self
._downloader
.params
.get('noplaylist', False):
424 (self
.url_result(m
.group('url'), ie
=ZDFIE
.ie_key()) for m
in matches
),
426 self
.to_screen('Downloading just the main video because of --no-playlist')
430 self
.to_screen(f
'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
433 v_ref
= self
._search
_regex
(
434 r
'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m
.group('p_id')),
435 webpage
, 'check id', default
='')
436 v_ref
= extract_attributes(v_ref
)
437 return v_ref
.get('data-target-video-type') != 'novideo'
439 return self
.playlist_from_matches(
440 (m
.group('url') for m
in matches
if check_video(m
)),
441 channel_id
, self
._og
_search
_title
(webpage
, fatal
=False))