7 import xml
.etree
.ElementTree
9 from .common
import InfoExtractor
10 from ..networking
import HEADRequest
31 class CBCIE(InfoExtractor
):
33 _VALID_URL
= r
'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
36 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
37 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
41 'title': 'Don Cherry – All-Stars',
42 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
43 'timestamp': 1454463000,
44 'upload_date': '20160203',
45 'uploader': 'CBCC-NEW',
47 'skip': 'Geo-restricted to Canada',
49 # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
50 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
51 'md5': '162adfa070274b144f4fdc3c3b8207db',
55 'title': '22 Minutes Update: What Not To Wear Quebec',
56 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
57 'upload_date': '20131025',
58 'uploader': 'CBCC-NEW',
59 'timestamp': 1382717907,
61 'skip': 'No longer available',
63 # with clipId, feed only available via tpfeed.cbc.ca
64 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
65 'md5': '0274a90b51a9b4971fe005c63f592f12',
69 'title': 'Robin Williams freestyles on 90 Minutes Live',
70 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
71 'upload_date': '19780210',
72 'uploader': 'CBCC-NEW',
73 'timestamp': 255977160,
75 'skip': '404 Not Found',
78 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
80 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
84 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
85 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
86 'upload_date': '20160201',
87 'timestamp': 1454342820,
88 'uploader': 'CBCC-NEW',
91 'md5': '415a0e3f586113894174dfb31aa5bb1a',
95 'title': 'Fly like an eagle!',
96 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
97 'upload_date': '20150315',
98 'timestamp': 1426443984,
99 'uploader': 'CBCC-NEW',
102 'skip': 'Geo-restricted to Canada',
104 # multiple CBC.APP.Caffeine.initInstance(...)
105 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
107 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME: actual title includes " | CBC News"
108 'id': 'dog-indoor-exercise-winter-1.3928238',
109 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
111 'playlist_mincount': 6,
115 def suitable(cls
, url
):
116 return False if CBCPlayerIE
.suitable(url
) else super().suitable(url
)
118 def _extract_player_init(self
, player_init
, display_id
):
119 player_info
= self
._parse
_json
(player_init
, display_id
, js_to_json
)
120 media_id
= player_info
.get('mediaId')
122 clip_id
= player_info
['clipId']
123 feed
= self
._download
_json
(
124 f
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={{:mpsReleases}}{{{clip_id}}}',
125 clip_id
, fatal
=False)
127 media_id
= try_get(feed
, lambda x
: x
['entries'][0]['guid'], str)
129 media_id
= self
._download
_json
(
130 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id
,
131 clip_id
)['entries'][0]['id'].split('/')[-1]
132 return self
.url_result(f
'cbcplayer:{media_id}', 'CBCPlayer', media_id
)
134 def _real_extract(self
, url
):
135 display_id
= self
._match
_id
(url
)
136 webpage
= self
._download
_webpage
(url
, display_id
)
137 title
= (self
._og
_search
_title
(webpage
, default
=None)
138 or self
._html
_search
_meta
('twitter:title', webpage
, 'title', default
=None)
139 or self
._html
_extract
_title
(webpage
))
141 self
._extract
_player
_init
(player_init
, display_id
)
142 for player_init
in re
.findall(r
'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage
)]
145 r
'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
146 r
'<div[^>]+\bid=["\']player
-(\d
+)',
147 r'guid
["\']\s*:\s*["\'](\d
+)'):
148 media_ids.extend(re.findall(media_id_re, webpage))
150 self.url_result(f'cbcplayer
:{media_id}
', 'CBCPlayer
', media_id)
151 for media_id in orderedSet(media_ids)])
152 return self.playlist_result(
153 entries, display_id, strip_or_none(title),
154 self._og_search_description(webpage))
157 class CBCPlayerIE(InfoExtractor):
158 IE_NAME = 'cbc
.ca
:player
'
159 _VALID_URL = r'(?
:cbcplayer
:|https?
://(?
:www\
.)?cbc\
.ca
/(?
:player
/play
/(?
:video
/)?|i
/caffeine
/syndicate
/\?mediaId
=))(?P
<id>(?
:\d\
.)?\d
+)'
160 _GEO_COUNTRIES = ['CA
']
162 'url
': 'http
://www
.cbc
.ca
/player
/play
/2683190193',
163 'md5
': '64d25f841ddf4ddb28a235338af32e2c
',
167 'title
': 'Gerry Runs a Sweat Shop
',
168 'description
': 'md5
:b457e1c01e8ff408d9d801c1c2cd29b0
',
169 'timestamp
': 1455071400,
170 'upload_date
': '20160210',
171 'uploader
': 'CBCC
-NEW
',
173 'skip
': 'Geo
-restricted to Canada
and no longer available
',
175 'url
': 'http
://www
.cbc
.ca
/i
/caffeine
/syndicate
/?mediaId
=2657631896',
176 'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
',
180 'title
': 'CBC Montreal
is organizing its first ever community hackathon
!',
181 'description
': 'md5
:dd3b692f0a139b0369943150bd1c46a9
',
182 'timestamp
': 1425704400,
183 'upload_date
': '20150307',
184 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.2985700,1717262248558/full
/max/0/default
.jpg
',
187 'categories
': ['All
in a Weekend Montreal
'],
189 'location
': 'Quebec
',
190 'series
': 'All
in a Weekend Montreal
',
191 'season
': 'Season
2015',
192 'season_number
': 2015,
193 'media_type
': 'Excerpt
',
197 'url
': 'http
://www
.cbc
.ca
/i
/caffeine
/syndicate
/?mediaId
=2164402062',
201 'title
': 'Cancer survivor four times over
',
202 'description
': 'Tim Mayer has beaten three different forms of cancer four times
in five years
.',
203 'timestamp
': 1320410746,
204 'upload_date
': '20111104',
205 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.1711287,1717139372111/full
/max/0/default
.jpg
',
208 'series
': 'CBC News
: Windsor at
6:00',
209 'categories
': ['Windsor
'],
210 'location
': 'Windsor
',
211 'tags
': ['Cancer
', 'News
/Canada
/Windsor
', 'Windsor
'],
212 'media_type
': 'Excerpt
',
215 'params
': {'skip_download
': 'm3u8
'},
217 # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
218 'url
': 'https
://www
.cbc
.ca
/player
/play
/1.2985700',
219 'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
',
223 'title
': 'CBC Montreal
is organizing its first ever community hackathon
!',
224 'description
': 'The modern technology we tend to depend on so heavily
, is never without it
\'s share of hiccups
and headaches
. Next weekend
- CBC Montreal will be getting members of the public
for its first Hackathon
.',
225 'timestamp
': 1425704400,
226 'upload_date
': '20150307',
227 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.2985700,1717262248558/full
/max/0/default
.jpg
',
230 'categories
': ['All
in a Weekend Montreal
'],
232 'location
': 'Quebec
',
233 'series
': 'All
in a Weekend Montreal
',
234 'season
': 'Season
2015',
235 'season_number
': 2015,
236 'media_type
': 'Excerpt
',
240 'url
': 'https
://www
.cbc
.ca
/player
/play
/1.1711287',
244 'title
': 'Cancer survivor four times over
',
245 'description
': 'Tim Mayer has beaten three different forms of cancer four times
in five years
.',
246 'timestamp
': 1320410746,
247 'upload_date
': '20111104',
248 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.1711287,1717139372111/full
/max/0/default
.jpg
',
251 'series
': 'CBC News
: Windsor at
6:00',
252 'categories
': ['Windsor
'],
253 'location
': 'Windsor
',
254 'tags
': ['Cancer
', 'News
/Canada
/Windsor
', 'Windsor
'],
255 'media_type
': 'Excerpt
',
258 'params
': {'skip_download
': 'm3u8
'},
261 # These broadcasts expire after ~1 month, can find new test URL here:
262 # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
263 'url
': 'https
://www
.cbc
.ca
/player
/play
/video
/9.6424403',
264 'md5
': '8025909eaffcf0adf59922904def9a5e
',
268 'title
': 'The National | N
.W
.T
. wildfire emergency
',
269 'description
': 'md5
:ada33d36d1df69347ed575905bfd496c
',
270 'timestamp
': 1718589600,
271 'duration
': 2692.833,
274 'name
': 'English Captions
',
275 'url
': 'https
://cbchls
.akamaized
.net
/delivery
/news
-shows
/2024/06/17/NAT_JUN16
-00-55-00/NAT_JUN16_cc
.vtt
',
278 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/6272b5c6
-5e78
-4c05
-915d
-0e36672e33d1
,1714756287822/full
/max/0/default
.jpg
',
279 'chapters
': 'count
:5',
280 'upload_date
': '20240617',
281 'categories
': ['News
', 'The National
', 'The National Latest Broadcasts
'],
282 'series
': 'The National
- Full Show
',
283 'tags
': ['The National
'],
284 'location
': 'Canada
',
285 'media_type
': 'Full Program
',
289 'url
': 'https
://www
.cbc
.ca
/player
/play
/video
/1.7194274',
290 'md5
': '188b96cf6bdcb2540e178a6caa957128
',
294 'title
': '#TheMoment a rare white spirit moose was spotted in Alberta',
295 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
296 'timestamp': 1714788791,
298 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
299 'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
301 'categories': 'count:3',
302 'series': 'The National',
304 'location': 'Canada',
305 'media_type': 'Excerpt',
306 'upload_date': '20240504',
310 'url': 'https://www.cbc.ca/player/play/video/9.6427282',
314 'title': 'Men\'s Soccer - Argentina vs Morocco',
315 'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
316 'series': 'CBC Sports',
317 'media_type': 'Event Coverage',
318 'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
319 'timestamp': 1721825400.0,
320 'upload_date': '20240724',
324 'tags': ['2024 Paris Olympic Games'],
325 'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
326 'location': 'Canada',
328 'params': {'skip_download': 'm3u8'},
330 'url': 'https://www.cbc.ca/player/play/video/9.6459530',
331 'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
335 'title': 'Parts of Jasper incinerated as wildfire rages',
336 'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
337 'series': 'The National',
338 'media_type': 'Excerpt',
339 'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
340 'timestamp': 1721964091.012,
341 'upload_date': '20240726',
346 'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
349 'url': 'https://www.cbc.ca/player/play/video/9.6420651',
350 'md5': '71a850c2c6ee5e912de169f5311bb533',
354 'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
355 'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
356 'series': 'CBC News Edmonton',
357 'media_type': 'Excerpt',
358 'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
359 'timestamp': 1718220065.768,
360 'upload_date': '20240612',
364 'categories': ['News', 'Edmonton'],
366 'location': 'Edmonton',
369 'url': 'cbcplayer:1.7159484',
370 'only_matching': True,
372 'url': 'cbcplayer:2164402062',
373 'only_matching': True,
375 'url': 'http://www.cbc.ca/player/play/2657631896',
376 'only_matching': True,
379 def _parse_param(self
, asset_data
, name
):
380 return traverse_obj(asset_data
, ('params', lambda _
, v
: v
['name'] == name
, 'value', {str}
, any
))
382 def _real_extract(self
, url
):
383 video_id
= self
._match
_id
(url
)
384 webpage
= self
._download
_webpage
(f
'https://www.cbc.ca/player/play/{video_id}', video_id
)
385 data
= self
._search
_json
(
386 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)['video']['currentClip']
387 assets
= traverse_obj(
388 data
, ('media', 'assets', lambda _
, v
: url_or_none(v
['key']) and v
['type']))
390 if not assets
and (media_id
:= traverse_obj(data
, ('mediaId', {str}
))):
391 # XXX: Deprecated; CBC is migrating off of ThePlatform
393 '_type': 'url_transparent',
394 'ie_key': 'ThePlatform',
396 f
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
397 'force_smil_url': True,
400 '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
403 is_live
= traverse_obj(data
, ('media', 'streamType', {str}
)) == 'Live'
404 formats
, subtitles
= [], {}
406 for sub
in traverse_obj(data
, ('media', 'textTracks', lambda _
, v
: url_or_none(v
['src']))):
407 subtitles
.setdefault(sub
.get('language') or 'und', []).append({
409 'name': sub
.get('label'),
413 asset_key
= asset
['key']
414 asset_type
= asset
['type']
415 if asset_type
!= 'medianet':
416 self
.report_warning(f
'Skipping unsupported asset type "{asset_type}": {asset_key}')
418 asset_data
= self
._download
_json
(asset_key
, video_id
, f
'Downloading {asset_type} JSON')
419 ext
= mimetype2ext(self
._parse
_param
(asset_data
, 'contentType'))
421 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
422 asset_data
['url'], video_id
, 'mp4', m3u8_id
='hls', live
=is_live
)
424 # Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
426 self
._merge
_subtitles
(subs
, target
=subtitles
)
427 if is_live
or not fmts
:
429 # Check for direct https mp4 format
430 best_video_fmt
= traverse_obj(fmts
, (
431 lambda _
, v
: v
.get('vcodec') != 'none' and v
['tbr'], all
,
432 {functools
.partial(sorted, key
=lambda x
: x
['tbr'])}, -1, {dict}
)) or {}
433 base_url
= self
._search
_regex
(
434 r
'(https?://[^?#]+?/)hdntl=', best_video_fmt
.get('url'), 'base url', default
=None)
435 if not base_url
or '/live/' in base_url
:
437 mp4_url
= base_url
+ replace_extension(url_basename(best_video_fmt
['url']), 'mp4')
438 if self
._request
_webpage
(
439 HEADRequest(mp4_url
), video_id
, 'Checking for https format',
440 errnote
=False, fatal
=False):
444 'format_id': 'https-mp4',
446 'manifest_url': None,
451 'url': asset_data
['url'],
453 'vcodec': 'none' if self
._parse
_param
(asset_data
, 'mediaType') == 'audio' else None,
456 chapters
= traverse_obj(data
, (
457 'media', 'chapters', lambda _
, v
: float(v
['startTime']) is not None, {
458 'start_time': ('startTime', {functools
.partial(float_or_none
, scale
=1000)}),
459 'end_time': ('endTime', {functools
.partial(float_or_none
, scale
=1000)}),
460 'title': ('name', {str}
),
462 # Filter out pointless single chapters with start_time==0 and no end_time
463 if len(chapters
) == 1 and not (chapters
[0].get('start_time') or chapters
[0].get('end_time')):
467 **traverse_obj(data
, {
468 'title': ('title', {str}
),
469 'description': ('description', {str.strip
}),
470 'thumbnail': ('image', 'url', {url_or_none}
, {functools
.partial(update_url
, query
=None)}),
471 'timestamp': ('publishedAt', {functools
.partial(float_or_none
, scale
=1000)}),
472 'media_type': ('media', 'clipType', {str}
),
473 'series': ('showName', {str}
),
474 'season_number': ('media', 'season', {int_or_none}
),
475 'duration': ('media', 'duration', {float_or_none}
, {lambda x
: None if is_live
else x
}),
476 'location': ('media', 'region', {str}
),
477 'tags': ('tags', ..., 'name', {str}
),
478 'genres': ('media', 'genre', all
),
479 'categories': ('categories', ..., 'name', {str}
),
483 'subtitles': subtitles
,
484 'chapters': chapters
,
489 class CBCPlayerPlaylistIE(InfoExtractor
):
490 IE_NAME
= 'cbc.ca:player:playlist'
491 _VALID_URL
= r
'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
493 'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
494 'playlist_mincount': 25,
496 'id': 'news/tv shows/the national/latest broadcast',
499 'url': 'https://www.cbc.ca/player/news/Canada/North',
500 'playlist_mincount': 25,
502 'id': 'news/canada/north',
506 def _real_extract(self
, url
):
507 playlist_id
= urllib
.parse
.unquote(self
._match
_id
(url
)).lower()
508 webpage
= self
._download
_webpage
(url
, playlist_id
)
509 json_content
= self
._search
_json
(
510 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', playlist_id
)
513 for video_id
in traverse_obj(json_content
, (
514 'video', 'clipsByCategory', lambda k
, _
: k
.lower() == playlist_id
, 'items', ..., 'id',
516 yield self
.url_result(f
'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE
)
518 return self
.playlist_result(entries(), playlist_id
)
521 class CBCGemIE(InfoExtractor
):
522 IE_NAME
= 'gem.cbc.ca'
523 _VALID_URL
= r
'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
525 # This is a normal, public, TV show video
526 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
527 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
529 'id': 'schitts-creek/s06e01',
531 'title': 'Smoke Signals',
532 'description': 'md5:929868d20021c924020641769eb3e7f1',
533 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)',
535 'categories': ['comedy'],
536 'series': 'Schitt\'s Creek',
537 'season': 'Season 6',
539 'episode': 'Smoke Signals',
541 'episode_id': 'schitts-creek/s06e01',
543 'params': {'format': 'bv'},
544 'skip': 'Geo-restricted to Canada',
546 # This video requires an account in the browser, but works fine in yt-dlp
547 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
548 'md5': '297a9600f554f2258aed01514226a697',
550 'id': 'schitts-creek/s01e01',
552 'title': 'The Cup Runneth Over',
553 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
554 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)',
555 'series': 'Schitt\'s Creek',
557 'season': 'Season 1',
559 'episode': 'The Cup Runneth Over',
560 'episode_id': 'schitts-creek/s01e01',
562 'categories': ['comedy'],
564 'params': {'format': 'bv'},
565 'skip': 'Geo-restricted to Canada',
567 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
568 'only_matching': True,
571 _GEO_COUNTRIES
= ['CA']
572 _TOKEN_API_KEY
= '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
573 _NETRC_MACHINE
= 'cbcgem'
576 def _new_claims_token(self
, email
, password
):
579 'password': password
,
581 headers
= {'content-type': 'application/json'}
582 query
= {'apikey': self
._TOKEN
_API
_KEY
}
583 resp
= self
._download
_json
('https://api.loginradius.com/identity/v2/auth/login',
584 None, data
=data
, headers
=headers
, query
=query
)
585 access_token
= resp
['access_token']
588 'access_token': access_token
,
589 'apikey': self
._TOKEN
_API
_KEY
,
592 resp
= self
._download
_json
('https://cloud-api.loginradius.com/sso/jwt/api/token',
593 None, headers
=headers
, query
=query
)
594 sig
= resp
['signature']
596 data
= json
.dumps({'jwt': sig
}).encode()
597 headers
= {'content-type': 'application/json', 'ott-device-type': 'web'}
598 resp
= self
._download
_json
('https://services.radio-canada.ca/ott/cbc-api/v2/token',
599 None, data
=data
, headers
=headers
, expected_status
=426)
600 cbc_access_token
= resp
['accessToken']
602 headers
= {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token
}
603 resp
= self
._download
_json
('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
604 None, headers
=headers
, expected_status
=426)
605 return resp
['claimsToken']
607 def _get_claims_token_expiry(self
):
609 # JWT is decoded here and 'exp' field is extracted
610 # It is a Unix timestamp for when the token expires
611 b64_data
= self
._claims
_token
.split('.')[1]
612 data
= base64
.urlsafe_b64decode(b64_data
+ '==')
613 return json
.loads(data
)['exp']
615 def claims_token_expired(self
):
616 exp
= self
._get
_claims
_token
_expiry
()
617 # It will expire in less than 10 seconds, or has already expired
618 return exp
- time
.time() < 10
620 def claims_token_valid(self
):
621 return self
._claims
_token
is not None and not self
.claims_token_expired()
623 def _get_claims_token(self
, email
, password
):
624 if not self
.claims_token_valid():
625 self
._claims
_token
= self
._new
_claims
_token
(email
, password
)
626 self
.cache
.store(self
._NETRC
_MACHINE
, 'claims_token', self
._claims
_token
)
627 return self
._claims
_token
629 def _real_initialize(self
):
630 if self
.claims_token_valid():
632 self
._claims
_token
= self
.cache
.load(self
._NETRC
_MACHINE
, 'claims_token')
634 def _find_secret_formats(self
, formats
, video_id
):
635 """ Find a valid video url and convert it to the secret variant """
636 base_format
= next((f
for f
in formats
if f
.get('vcodec') != 'none'), None)
640 base_url
= re
.sub(r
'(Manifest\(.*?),filter=[\w-]+(.*?\))', r
'\1\2', base_format
['url'])
641 url
= re
.sub(r
'(Manifest\(.*?),format=[\w-]+(.*?\))', r
'\1\2', base_url
)
643 secret_xml
= self
._download
_xml
(url
, video_id
, note
='Downloading secret XML', fatal
=False)
644 if not isinstance(secret_xml
, xml
.etree
.ElementTree
.Element
):
647 for child
in secret_xml
:
648 if child
.attrib
.get('Type') != 'video':
650 for video_quality
in child
:
651 bitrate
= int_or_none(video_quality
.attrib
.get('Bitrate'))
652 if not bitrate
or 'Index' not in video_quality
.attrib
:
654 height
= int_or_none(video_quality
.attrib
.get('MaxHeight'))
658 'format_id': join_nonempty('sec', height
),
659 # Note: \g<1> is necessary instead of \1 since bitrate is a number
660 'url': re
.sub(r
'(QualityLevels\()\d+(\))', fr
'\g<1>{bitrate}\2', base_url
),
661 'width': int_or_none(video_quality
.attrib
.get('MaxWidth')),
662 'tbr': bitrate
/ 1000.0,
666 def _real_extract(self
, url
):
667 video_id
= self
._match
_id
(url
)
668 video_info
= self
._download
_json
(
669 f
'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
670 video_id
, expected_status
=426)
672 email
, password
= self
._get
_login
_info
()
673 if email
and password
:
674 claims_token
= self
._get
_claims
_token
(email
, password
)
675 headers
= {'x-claims-token': claims_token
}
678 m3u8_info
= self
._download
_json
(video_info
['playSession']['url'], video_id
, headers
=headers
)
679 m3u8_url
= m3u8_info
.get('url')
681 if m3u8_info
.get('errorCode') == 1:
682 self
.raise_geo_restricted(countries
=['CA'])
683 elif m3u8_info
.get('errorCode') == 35:
684 self
.raise_login_required(method
='password')
685 elif m3u8_info
.get('errorCode') != 0:
686 raise ExtractorError(f
'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
688 formats
= self
._extract
_m
3u8_formats
(m3u8_url
, video_id
, m3u8_id
='hls')
689 self
._remove
_duplicate
_formats
(formats
)
690 formats
.extend(self
._find
_secret
_formats
(formats
, video_id
))
693 if fmt
.get('vcodec') == 'none':
694 if fmt
.get('ext') is None:
696 if fmt
.get('acodec') is None:
697 fmt
['acodec'] = 'mp4a.40.2'
699 # Put described audio at the beginning of the list, so that it
700 # isn't chosen by default, as most people won't want it.
701 if 'descriptive' in fmt
['format_id'].lower():
702 fmt
['preference'] = -2
706 'title': video_info
['title'],
707 'description': video_info
.get('description'),
708 'thumbnail': video_info
.get('image'),
709 'series': video_info
.get('series'),
710 'season_number': video_info
.get('season'),
711 'season': f
'Season {video_info.get("season")}',
712 'episode_number': video_info
.get('episode'),
713 'episode': video_info
.get('title'),
714 'episode_id': video_id
,
715 'duration': video_info
.get('duration'),
716 'categories': [video_info
.get('category')],
718 'release_timestamp': video_info
.get('airDate'),
719 'timestamp': video_info
.get('availableDate'),
723 class CBCGemPlaylistIE(InfoExtractor
):
724 IE_NAME
= 'gem.cbc.ca:playlist'
725 _VALID_URL
= r
'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
727 # TV show playlist, all public videos
728 'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
729 'playlist_count': 16,
731 'id': 'schitts-creek/s06',
733 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
734 'series': 'Schitt\'s Creek',
736 'season': 'Season 6',
737 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
740 'url': 'https://gem.cbc.ca/schitts-creek/s06',
741 'only_matching': True,
743 _API_BASE
= 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
745 def _real_extract(self
, url
):
746 match
= self
._match
_valid
_url
(url
)
747 season_id
= match
.group('id')
748 show
= match
.group('show')
749 show_info
= self
._download
_json
(self
._API
_BASE
+ show
, season_id
, expected_status
=426)
750 season
= int(match
.group('season'))
752 season_info
= next((s
for s
in show_info
['seasons'] if s
.get('season') == season
), None)
754 if season_info
is None:
755 raise ExtractorError(f
'Couldn\'t find season {season} of {show}')
758 for episode
in season_info
['assets']:
760 '_type': 'url_transparent',
762 'url': 'https://gem.cbc.ca/media/' + episode
['id'],
764 'title': episode
.get('title'),
765 'description': episode
.get('description'),
766 'thumbnail': episode
.get('image'),
767 'series': episode
.get('series'),
768 'season_number': episode
.get('season'),
769 'season': season_info
['title'],
770 'season_id': season_info
.get('id'),
771 'episode_number': episode
.get('episode'),
772 'episode': episode
.get('title'),
773 'episode_id': episode
['id'],
774 'duration': episode
.get('duration'),
775 'categories': [episode
.get('category')],
779 tn_uri
= season_info
.get('image')
780 # the-national was observed to use a "data:image/png;base64"
781 # URI for their 'image' value. The image was 1x1, and is
782 # probably just a placeholder, so it is ignored.
783 if tn_uri
is not None and not tn_uri
.startswith('data:'):
790 'title': season_info
['title'],
791 'description': season_info
.get('description'),
792 'thumbnail': thumbnail
,
793 'series': show_info
.get('title'),
794 'season_number': season_info
.get('season'),
795 'season': season_info
['title'],
799 class CBCGemLiveIE(InfoExtractor
):
800 IE_NAME
= 'gem.cbc.ca:live'
801 _VALID_URL
= r
'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
804 'url': 'https://gem.cbc.ca/live/920604739687',
807 'description': 'The live TV channel and local programming from Ottawa',
808 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
809 'live_status': 'is_live',
810 'id': 'AyqZwxRqh8EH',
812 'release_timestamp': 1492106160,
813 'release_date': '20170413',
814 'uploader': 'CBCC-NEW',
816 'skip': 'Live might have ended',
819 'url': 'https://gem.cbc.ca/live/44',
824 'title': r
're:^Ottawa [0-9\-: ]+',
825 'description': 'The live TV channel and local programming from Ottawa',
826 'live_status': 'is_live',
827 'thumbnail': r
're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
829 'params': {'skip_download': True},
830 'skip': 'Live might have ended',
833 'url': 'https://gem.cbc.ca/live-event/10835',
838 'title': r
're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
839 'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
840 'live_status': 'is_live',
841 'thumbnail': r
're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
842 'release_timestamp': 1679706000,
843 'release_date': '20230325',
845 'params': {'skip_download': True},
846 'skip': 'Live might have ended',
848 { # event replay (medianetlive)
849 'url': 'https://gem.cbc.ca/live-event/42314',
850 'md5': '297a9600f554f2258aed01514226a697',
854 'live_status': 'was_live',
855 'title': 'Women\'s Soccer - Canada vs New Zealand',
856 'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
857 'thumbnail': r
're:https://.+default\.jpg',
858 'release_timestamp': 1721917200,
859 'release_date': '20240725',
861 'params': {'skip_download': True},
862 'skip': 'Replay might no longer be available',
864 { # event replay (medianetlive)
865 'url': 'https://gem.cbc.ca/live-event/43273',
866 'only_matching': True,
869 _GEO_COUNTRIES
= ['CA']
871 def _real_extract(self
, url
):
872 video_id
= self
._match
_id
(url
)
873 webpage
= self
._download
_webpage
(url
, video_id
)
874 video_info
= self
._search
_nextjs
_data
(webpage
, video_id
)['props']['pageProps']['data']
876 # Three types of video_info JSON: info in root, freeTv stream/item, event replay
877 if not video_info
.get('formattedIdMedia'):
878 if traverse_obj(video_info
, ('event', 'key')) == video_id
:
879 video_info
= video_info
['event']
881 video_info
= traverse_obj(video_info
, (
882 ('freeTv', ('streams', ...)), 'items',
883 lambda _
, v
: v
['key'].partition('-')[0] == video_id
, any
)) or {}
885 video_stream_id
= video_info
.get('formattedIdMedia')
886 if not video_stream_id
:
887 raise ExtractorError(
888 'Couldn\'t find video metadata, maybe this livestream is now offline', expected
=True)
890 live_status
= 'was_live' if video_info
.get('isVodEnabled') else 'is_live'
891 release_timestamp
= traverse_obj(video_info
, ('airDate', {parse_iso8601}
))
893 if live_status
== 'is_live' and release_timestamp
and release_timestamp
> time
.time():
895 live_status
= 'is_upcoming'
896 self
.raise_no_formats('This livestream has not yet started', expected
=True)
898 stream_data
= self
._download
_json
(
899 'https://services.radio-canada.ca/media/validation/v2/', video_id
, query
={
900 'appCode': 'medianetlive',
901 'connectionType': 'hd',
902 'deviceType': 'ipad',
903 'idMedia': video_stream_id
,
904 'multibitrate': 'true',
907 'manifestType': 'desktop',
909 formats
= self
._extract
_m
3u8_formats
(
910 stream_data
['url'], video_id
, 'mp4', live
=live_status
== 'is_live')
915 'live_status': live_status
,
916 'release_timestamp': release_timestamp
,
917 **traverse_obj(video_info
, {
918 'title': ('title', {str}
),
919 'description': ('description', {str}
),
920 'thumbnail': ('images', 'card', 'url'),