8 from .common
import InfoExtractor
9 from ..networking
import HEADRequest
29 class CBCIE(InfoExtractor
):
31 _VALID_URL
= r
'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
34 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
35 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
39 'title': 'Don Cherry – All-Stars',
40 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
41 'timestamp': 1454463000,
42 'upload_date': '20160203',
43 'uploader': 'CBCC-NEW',
45 'skip': 'Geo-restricted to Canada',
47 # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
48 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
49 'md5': '162adfa070274b144f4fdc3c3b8207db',
53 'title': '22 Minutes Update: What Not To Wear Quebec',
54 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
55 'upload_date': '20131025',
56 'uploader': 'CBCC-NEW',
57 'timestamp': 1382717907,
59 'skip': 'No longer available',
61 # with clipId, feed only available via tpfeed.cbc.ca
62 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
63 'md5': '0274a90b51a9b4971fe005c63f592f12',
67 'title': 'Robin Williams freestyles on 90 Minutes Live',
68 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
69 'upload_date': '19780210',
70 'uploader': 'CBCC-NEW',
71 'timestamp': 255977160,
73 'skip': '404 Not Found',
76 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
78 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
82 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
83 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
84 'upload_date': '20160201',
85 'timestamp': 1454342820,
86 'uploader': 'CBCC-NEW',
89 'md5': '415a0e3f586113894174dfb31aa5bb1a',
93 'title': 'Fly like an eagle!',
94 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
95 'upload_date': '20150315',
96 'timestamp': 1426443984,
97 'uploader': 'CBCC-NEW',
100 'skip': 'Geo-restricted to Canada',
102 # multiple CBC.APP.Caffeine.initInstance(...)
103 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
105 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME: actual title includes " | CBC News"
106 'id': 'dog-indoor-exercise-winter-1.3928238',
107 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
109 'playlist_mincount': 6,
113 def suitable(cls
, url
):
114 return False if CBCPlayerIE
.suitable(url
) else super().suitable(url
)
116 def _extract_player_init(self
, player_init
, display_id
):
117 player_info
= self
._parse
_json
(player_init
, display_id
, js_to_json
)
118 media_id
= player_info
.get('mediaId')
120 clip_id
= player_info
['clipId']
121 feed
= self
._download
_json
(
122 f
'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={{:mpsReleases}}{{{clip_id}}}',
123 clip_id
, fatal
=False)
125 media_id
= try_get(feed
, lambda x
: x
['entries'][0]['guid'], str)
127 media_id
= self
._download
_json
(
128 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id
,
129 clip_id
)['entries'][0]['id'].split('/')[-1]
130 return self
.url_result(f
'cbcplayer:{media_id}', 'CBCPlayer', media_id
)
132 def _real_extract(self
, url
):
133 display_id
= self
._match
_id
(url
)
134 webpage
= self
._download
_webpage
(url
, display_id
)
135 title
= (self
._og
_search
_title
(webpage
, default
=None)
136 or self
._html
_search
_meta
('twitter:title', webpage
, 'title', default
=None)
137 or self
._html
_extract
_title
(webpage
))
139 self
._extract
_player
_init
(player_init
, display_id
)
140 for player_init
in re
.findall(r
'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage
)]
143 r
'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
144 r
'<div[^>]+\bid=["\']player
-(\d
+)',
145 r'guid
["\']\s*:\s*["\'](\d
+)'):
146 media_ids.extend(re.findall(media_id_re, webpage))
148 self.url_result(f'cbcplayer
:{media_id}
', 'CBCPlayer
', media_id)
149 for media_id in orderedSet(media_ids)])
150 return self.playlist_result(
151 entries, display_id, strip_or_none(title),
152 self._og_search_description(webpage))
155 class CBCPlayerIE(InfoExtractor):
156 IE_NAME = 'cbc
.ca
:player
'
157 _VALID_URL = r'(?
:cbcplayer
:|https?
://(?
:www\
.)?cbc\
.ca
/(?
:player
/play
/(?
:video
/)?|i
/caffeine
/syndicate
/\?mediaId
=))(?P
<id>(?
:\d\
.)?\d
+)'
158 _GEO_COUNTRIES = ['CA
']
160 'url
': 'http
://www
.cbc
.ca
/player
/play
/2683190193',
161 'md5
': '64d25f841ddf4ddb28a235338af32e2c
',
165 'title
': 'Gerry Runs a Sweat Shop
',
166 'description
': 'md5
:b457e1c01e8ff408d9d801c1c2cd29b0
',
167 'timestamp
': 1455071400,
168 'upload_date
': '20160210',
169 'uploader
': 'CBCC
-NEW
',
171 'skip
': 'Geo
-restricted to Canada
and no longer available
',
173 'url
': 'http
://www
.cbc
.ca
/i
/caffeine
/syndicate
/?mediaId
=2657631896',
174 'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
',
178 'title
': 'CBC Montreal
is organizing its first ever community hackathon
!',
179 'description
': 'md5
:dd3b692f0a139b0369943150bd1c46a9
',
180 'timestamp
': 1425704400,
181 'upload_date
': '20150307',
182 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.2985700,1717262248558/full
/max/0/default
.jpg
',
185 'categories
': ['All
in a Weekend Montreal
'],
187 'location
': 'Quebec
',
188 'series
': 'All
in a Weekend Montreal
',
189 'season
': 'Season
2015',
190 'season_number
': 2015,
191 'media_type
': 'Excerpt
',
195 'url
': 'http
://www
.cbc
.ca
/i
/caffeine
/syndicate
/?mediaId
=2164402062',
199 'title
': 'Cancer survivor four times over
',
200 'description
': 'Tim Mayer has beaten three different forms of cancer four times
in five years
.',
201 'timestamp
': 1320410746,
202 'upload_date
': '20111104',
203 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.1711287,1717139372111/full
/max/0/default
.jpg
',
206 'series
': 'CBC News
: Windsor at
6:00',
207 'categories
': ['Windsor
'],
208 'location
': 'Windsor
',
209 'tags
': ['Cancer
', 'News
/Canada
/Windsor
', 'Windsor
'],
210 'media_type
': 'Excerpt
',
213 'params
': {'skip_download
': 'm3u8
'},
215 # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
216 'url
': 'https
://www
.cbc
.ca
/player
/play
/1.2985700',
217 'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
',
221 'title
': 'CBC Montreal
is organizing its first ever community hackathon
!',
222 'description
': 'The modern technology we tend to depend on so heavily
, is never without it
\'s share of hiccups
and headaches
. Next weekend
- CBC Montreal will be getting members of the public
for its first Hackathon
.',
223 'timestamp
': 1425704400,
224 'upload_date
': '20150307',
225 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.2985700,1717262248558/full
/max/0/default
.jpg
',
228 'categories
': ['All
in a Weekend Montreal
'],
230 'location
': 'Quebec
',
231 'series
': 'All
in a Weekend Montreal
',
232 'season
': 'Season
2015',
233 'season_number
': 2015,
234 'media_type
': 'Excerpt
',
238 'url
': 'https
://www
.cbc
.ca
/player
/play
/1.1711287',
242 'title
': 'Cancer survivor four times over
',
243 'description
': 'Tim Mayer has beaten three different forms of cancer four times
in five years
.',
244 'timestamp
': 1320410746,
245 'upload_date
': '20111104',
246 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/1.1711287,1717139372111/full
/max/0/default
.jpg
',
249 'series
': 'CBC News
: Windsor at
6:00',
250 'categories
': ['Windsor
'],
251 'location
': 'Windsor
',
252 'tags
': ['Cancer
', 'News
/Canada
/Windsor
', 'Windsor
'],
253 'media_type
': 'Excerpt
',
256 'params
': {'skip_download
': 'm3u8
'},
259 # These broadcasts expire after ~1 month, can find new test URL here:
260 # https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
261 'url
': 'https
://www
.cbc
.ca
/player
/play
/video
/9.6424403',
262 'md5
': '8025909eaffcf0adf59922904def9a5e
',
266 'title
': 'The National | N
.W
.T
. wildfire emergency
',
267 'description
': 'md5
:ada33d36d1df69347ed575905bfd496c
',
268 'timestamp
': 1718589600,
269 'duration
': 2692.833,
272 'name
': 'English Captions
',
273 'url
': 'https
://cbchls
.akamaized
.net
/delivery
/news
-shows
/2024/06/17/NAT_JUN16
-00-55-00/NAT_JUN16_cc
.vtt
',
276 'thumbnail
': 'https
://i
.cbc
.ca
/ais
/6272b5c6
-5e78
-4c05
-915d
-0e36672e33d1
,1714756287822/full
/max/0/default
.jpg
',
277 'chapters
': 'count
:5',
278 'upload_date
': '20240617',
279 'categories
': ['News
', 'The National
', 'The National Latest Broadcasts
'],
280 'series
': 'The National
- Full Show
',
281 'tags
': ['The National
'],
282 'location
': 'Canada
',
283 'media_type
': 'Full Program
',
287 'url
': 'https
://www
.cbc
.ca
/player
/play
/video
/1.7194274',
288 'md5
': '188b96cf6bdcb2540e178a6caa957128
',
292 'title
': '#TheMoment a rare white spirit moose was spotted in Alberta',
293 'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
294 'timestamp': 1714788791,
296 'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
297 'thumbnail': 'https://i.cbc.ca/ais/1.7194274,1717224990425/full/max/0/default.jpg',
299 'categories': 'count:3',
300 'series': 'The National',
302 'location': 'Canada',
303 'media_type': 'Excerpt',
304 'upload_date': '20240504',
308 'url': 'https://www.cbc.ca/player/play/video/9.6427282',
312 'title': 'Men\'s Soccer - Argentina vs Morocco',
313 'description': 'Argentina faces Morocco on the football pitch at Saint Etienne Stadium.',
314 'series': 'CBC Sports',
315 'media_type': 'Event Coverage',
316 'thumbnail': 'https://i.cbc.ca/ais/a4c5c0c2-99fa-4bd3-8061-5a63879c1b33,1718828053500/full/max/0/default.jpg',
317 'timestamp': 1721825400.0,
318 'upload_date': '20240724',
322 'tags': ['2024 Paris Olympic Games'],
323 'categories': ['Olympics Summer Soccer', 'Summer Olympics Replays', 'Summer Olympics Soccer Replays'],
324 'location': 'Canada',
326 'params': {'skip_download': 'm3u8'},
328 'url': 'https://www.cbc.ca/player/play/video/9.6459530',
329 'md5': '6c1bb76693ab321a2e99c347a1d5ecbc',
333 'title': 'Parts of Jasper incinerated as wildfire rages',
334 'description': 'md5:6f1caa8d128ad3f629257ef5fecf0962',
335 'series': 'The National',
336 'media_type': 'Excerpt',
337 'thumbnail': 'https://i.cbc.ca/ais/507c0086-31a2-494d-96e4-bffb1048d045,1721953984375/full/max/0/default.jpg',
338 'timestamp': 1721964091.012,
339 'upload_date': '20240726',
344 'categories': ['News (FAST)', 'News', 'The National', 'TV News Shows', 'The National '],
347 'url': 'https://www.cbc.ca/player/play/video/9.6420651',
348 'md5': '71a850c2c6ee5e912de169f5311bb533',
352 'title': 'Is it a breath of fresh air? Measuring air quality in Edmonton',
353 'description': 'md5:3922b92cc8b69212d739bd9dd095b1c3',
354 'series': 'CBC News Edmonton',
355 'media_type': 'Excerpt',
356 'thumbnail': 'https://i.cbc.ca/ais/73c4ab9c-7ad4-46ee-bb9b-020fdc01c745,1718214547576/full/max/0/default.jpg',
357 'timestamp': 1718220065.768,
358 'upload_date': '20240612',
362 'categories': ['News', 'Edmonton'],
364 'location': 'Edmonton',
367 'url': 'cbcplayer:1.7159484',
368 'only_matching': True,
370 'url': 'cbcplayer:2164402062',
371 'only_matching': True,
373 'url': 'http://www.cbc.ca/player/play/2657631896',
374 'only_matching': True,
377 def _parse_param(self
, asset_data
, name
):
378 return traverse_obj(asset_data
, ('params', lambda _
, v
: v
['name'] == name
, 'value', {str}
, any
))
380 def _real_extract(self
, url
):
381 video_id
= self
._match
_id
(url
)
382 webpage
= self
._download
_webpage
(f
'https://www.cbc.ca/player/play/{video_id}', video_id
)
383 data
= self
._search
_json
(
384 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)['video']['currentClip']
385 assets
= traverse_obj(
386 data
, ('media', 'assets', lambda _
, v
: url_or_none(v
['key']) and v
['type']))
388 if not assets
and (media_id
:= traverse_obj(data
, ('mediaId', {str}
))):
389 # XXX: Deprecated; CBC is migrating off of ThePlatform
391 '_type': 'url_transparent',
392 'ie_key': 'ThePlatform',
394 f
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{media_id}?mbr=true&formats=MPEG4,FLV,MP3', {
395 'force_smil_url': True,
398 '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS
401 is_live
= traverse_obj(data
, ('media', 'streamType', {str}
)) == 'Live'
402 formats
, subtitles
= [], {}
404 for sub
in traverse_obj(data
, ('media', 'textTracks', lambda _
, v
: url_or_none(v
['src']))):
405 subtitles
.setdefault(sub
.get('language') or 'und', []).append({
407 'name': sub
.get('label'),
411 asset_key
= asset
['key']
412 asset_type
= asset
['type']
413 if asset_type
!= 'medianet':
414 self
.report_warning(f
'Skipping unsupported asset type "{asset_type}": {asset_key}')
416 asset_data
= self
._download
_json
(asset_key
, video_id
, f
'Downloading {asset_type} JSON')
417 ext
= mimetype2ext(self
._parse
_param
(asset_data
, 'contentType'))
419 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
420 asset_data
['url'], video_id
, 'mp4', m3u8_id
='hls', live
=is_live
)
422 # Avoid slow/error-prone webvtt-over-m3u8 if direct https vtt is available
424 self
._merge
_subtitles
(subs
, target
=subtitles
)
425 if is_live
or not fmts
:
427 # Check for direct https mp4 format
428 best_video_fmt
= traverse_obj(fmts
, (
429 lambda _
, v
: v
.get('vcodec') != 'none' and v
['tbr'], all
,
430 {functools
.partial(sorted, key
=lambda x
: x
['tbr'])}, -1, {dict}
)) or {}
431 base_url
= self
._search
_regex
(
432 r
'(https?://[^?#]+?/)hdntl=', best_video_fmt
.get('url'), 'base url', default
=None)
433 if not base_url
or '/live/' in base_url
:
435 mp4_url
= base_url
+ replace_extension(url_basename(best_video_fmt
['url']), 'mp4')
436 if self
._request
_webpage
(
437 HEADRequest(mp4_url
), video_id
, 'Checking for https format',
438 errnote
=False, fatal
=False):
442 'format_id': 'https-mp4',
444 'manifest_url': None,
449 'url': asset_data
['url'],
451 'vcodec': 'none' if self
._parse
_param
(asset_data
, 'mediaType') == 'audio' else None,
454 chapters
= traverse_obj(data
, (
455 'media', 'chapters', lambda _
, v
: float(v
['startTime']) is not None, {
456 'start_time': ('startTime', {float_or_none(scale
=1000)}),
457 'end_time': ('endTime', {float_or_none(scale
=1000)}),
458 'title': ('name', {str}
),
460 # Filter out pointless single chapters with start_time==0 and no end_time
461 if len(chapters
) == 1 and not (chapters
[0].get('start_time') or chapters
[0].get('end_time')):
465 **traverse_obj(data
, {
466 'title': ('title', {str}
),
467 'description': ('description', {str.strip
}),
468 'thumbnail': ('image', 'url', {url_or_none}
, {update_url(query
=None)}),
469 'timestamp': ('publishedAt', {float_or_none(scale
=1000)}),
470 'media_type': ('media', 'clipType', {str}
),
471 'series': ('showName', {str}
),
472 'season_number': ('media', 'season', {int_or_none}
),
473 'duration': ('media', 'duration', {float_or_none}
, {lambda x
: None if is_live
else x
}),
474 'location': ('media', 'region', {str}
),
475 'tags': ('tags', ..., 'name', {str}
),
476 'genres': ('media', 'genre', all
),
477 'categories': ('categories', ..., 'name', {str}
),
481 'subtitles': subtitles
,
482 'chapters': chapters
,
487 class CBCPlayerPlaylistIE(InfoExtractor
):
488 IE_NAME
= 'cbc.ca:player:playlist'
489 _VALID_URL
= r
'https?://(?:www\.)?cbc\.ca/(?:player/)(?!play/)(?P<id>[^?#]+)'
491 'url': 'https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast',
492 'playlist_mincount': 25,
494 'id': 'news/tv shows/the national/latest broadcast',
497 'url': 'https://www.cbc.ca/player/news/Canada/North',
498 'playlist_mincount': 25,
500 'id': 'news/canada/north',
504 def _real_extract(self
, url
):
505 playlist_id
= urllib
.parse
.unquote(self
._match
_id
(url
)).lower()
506 webpage
= self
._download
_webpage
(url
, playlist_id
)
507 json_content
= self
._search
_json
(
508 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', playlist_id
)
511 for video_id
in traverse_obj(json_content
, (
512 'video', 'clipsByCategory', lambda k
, _
: k
.lower() == playlist_id
, 'items', ..., 'id',
514 yield self
.url_result(f
'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE
)
516 return self
.playlist_result(entries(), playlist_id
)
519 class CBCGemIE(InfoExtractor
):
520 IE_NAME
= 'gem.cbc.ca'
521 _VALID_URL
= r
'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
523 # This is a normal, public, TV show video
524 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
526 'id': 'schitts-creek/s06e01',
528 'title': 'Smoke Signals',
529 'description': 'md5:929868d20021c924020641769eb3e7f1',
530 'thumbnail': r
're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
532 'categories': ['comedy'],
533 'series': 'Schitt\'s Creek',
534 'season': 'Season 6',
536 'episode': 'Smoke Signals',
538 'episode_id': 'schitts-creek/s06e01',
539 'upload_date': '20210618',
540 'timestamp': 1623988800,
541 'release_date': '20200107',
542 'release_timestamp': 1578427200,
544 'params': {'format': 'bv'},
546 # This video requires an account in the browser, but works fine in yt-dlp
547 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
549 'id': 'schitts-creek/s01e01',
551 'title': 'The Cup Runneth Over',
552 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
553 'thumbnail': r
're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
554 'series': 'Schitt\'s Creek',
556 'season': 'Season 1',
558 'episode': 'The Cup Runneth Over',
559 'episode_id': 'schitts-creek/s01e01',
561 'categories': ['comedy'],
562 'upload_date': '20210617',
563 'timestamp': 1623902400,
564 'release_date': '20151124',
565 'release_timestamp': 1448323200,
567 'params': {'format': 'bv'},
569 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
570 'only_matching': True,
573 _GEO_COUNTRIES
= ['CA']
574 _TOKEN_API_KEY
= '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
575 _NETRC_MACHINE
= 'cbcgem'
578 def _new_claims_token(self
, email
, password
):
581 'password': password
,
583 headers
= {'content-type': 'application/json'}
584 query
= {'apikey': self
._TOKEN
_API
_KEY
}
585 resp
= self
._download
_json
('https://api.loginradius.com/identity/v2/auth/login',
586 None, data
=data
, headers
=headers
, query
=query
)
587 access_token
= resp
['access_token']
590 'access_token': access_token
,
591 'apikey': self
._TOKEN
_API
_KEY
,
594 resp
= self
._download
_json
('https://cloud-api.loginradius.com/sso/jwt/api/token',
595 None, headers
=headers
, query
=query
)
596 sig
= resp
['signature']
598 data
= json
.dumps({'jwt': sig
}).encode()
599 headers
= {'content-type': 'application/json', 'ott-device-type': 'web'}
600 resp
= self
._download
_json
('https://services.radio-canada.ca/ott/cbc-api/v2/token',
601 None, data
=data
, headers
=headers
, expected_status
=426)
602 cbc_access_token
= resp
['accessToken']
604 headers
= {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token
}
605 resp
= self
._download
_json
('https://services.radio-canada.ca/ott/cbc-api/v2/profile',
606 None, headers
=headers
, expected_status
=426)
607 return resp
['claimsToken']
609 def _get_claims_token_expiry(self
):
611 # JWT is decoded here and 'exp' field is extracted
612 # It is a Unix timestamp for when the token expires
613 b64_data
= self
._claims
_token
.split('.')[1]
614 data
= base64
.urlsafe_b64decode(b64_data
+ '==')
615 return json
.loads(data
)['exp']
617 def claims_token_expired(self
):
618 exp
= self
._get
_claims
_token
_expiry
()
619 # It will expire in less than 10 seconds, or has already expired
620 return exp
- time
.time() < 10
622 def claims_token_valid(self
):
623 return self
._claims
_token
is not None and not self
.claims_token_expired()
625 def _get_claims_token(self
, email
, password
):
626 if not self
.claims_token_valid():
627 self
._claims
_token
= self
._new
_claims
_token
(email
, password
)
628 self
.cache
.store(self
._NETRC
_MACHINE
, 'claims_token', self
._claims
_token
)
629 return self
._claims
_token
631 def _real_initialize(self
):
632 if self
.claims_token_valid():
634 self
._claims
_token
= self
.cache
.load(self
._NETRC
_MACHINE
, 'claims_token')
636 def _real_extract(self
, url
):
637 video_id
= self
._match
_id
(url
)
638 video_info
= self
._download
_json
(
639 f
'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
640 video_id
, expected_status
=426)
642 email
, password
= self
._get
_login
_info
()
643 if email
and password
:
644 claims_token
= self
._get
_claims
_token
(email
, password
)
645 headers
= {'x-claims-token': claims_token
}
648 m3u8_info
= self
._download
_json
(video_info
['playSession']['url'], video_id
, headers
=headers
)
650 if m3u8_info
.get('errorCode') == 1:
651 self
.raise_geo_restricted(countries
=['CA'])
652 elif m3u8_info
.get('errorCode') == 35:
653 self
.raise_login_required(method
='password')
654 elif m3u8_info
.get('errorCode') != 0:
655 raise ExtractorError(f
'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
657 formats
= self
._extract
_m
3u8_formats
(
658 m3u8_info
['url'], video_id
, 'mp4', m3u8_id
='hls', query
={'manifestType': ''})
659 self
._remove
_duplicate
_formats
(formats
)
662 if fmt
.get('vcodec') == 'none':
663 if fmt
.get('ext') is None:
665 if fmt
.get('acodec') is None:
666 fmt
['acodec'] = 'mp4a.40.2'
668 # Put described audio at the beginning of the list, so that it
669 # isn't chosen by default, as most people won't want it.
670 if 'descriptive' in fmt
['format_id'].lower():
671 fmt
['preference'] = -2
675 'episode_id': video_id
,
677 **traverse_obj(video_info
, {
678 'title': ('title', {str}
),
679 'episode': ('title', {str}
),
680 'description': ('description', {str}
),
681 'thumbnail': ('image', {url_or_none}
),
682 'series': ('series', {str}
),
683 'season_number': ('season', {int_or_none}
),
684 'episode_number': ('episode', {int_or_none}
),
685 'duration': ('duration', {int_or_none}
),
686 'categories': ('category', {str}
, all
),
687 'release_timestamp': ('airDate', {int_or_none(scale
=1000)}),
688 'timestamp': ('availableDate', {int_or_none(scale
=1000)}),
693 class CBCGemPlaylistIE(InfoExtractor
):
694 IE_NAME
= 'gem.cbc.ca:playlist'
695 _VALID_URL
= r
'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
697 # TV show playlist, all public videos
698 'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
699 'playlist_count': 16,
701 'id': 'schitts-creek/s06',
703 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
704 'series': 'Schitt\'s Creek',
706 'season': 'Season 6',
707 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
710 'url': 'https://gem.cbc.ca/schitts-creek/s06',
711 'only_matching': True,
713 _API_BASE
= 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
715 def _real_extract(self
, url
):
716 match
= self
._match
_valid
_url
(url
)
717 season_id
= match
.group('id')
718 show
= match
.group('show')
719 show_info
= self
._download
_json
(self
._API
_BASE
+ show
, season_id
, expected_status
=426)
720 season
= int(match
.group('season'))
722 season_info
= next((s
for s
in show_info
['seasons'] if s
.get('season') == season
), None)
724 if season_info
is None:
725 raise ExtractorError(f
'Couldn\'t find season {season} of {show}')
728 for episode
in season_info
['assets']:
730 '_type': 'url_transparent',
732 'url': 'https://gem.cbc.ca/media/' + episode
['id'],
734 'title': episode
.get('title'),
735 'description': episode
.get('description'),
736 'thumbnail': episode
.get('image'),
737 'series': episode
.get('series'),
738 'season_number': episode
.get('season'),
739 'season': season_info
['title'],
740 'season_id': season_info
.get('id'),
741 'episode_number': episode
.get('episode'),
742 'episode': episode
.get('title'),
743 'episode_id': episode
['id'],
744 'duration': episode
.get('duration'),
745 'categories': [episode
.get('category')],
749 tn_uri
= season_info
.get('image')
750 # the-national was observed to use a "data:image/png;base64"
751 # URI for their 'image' value. The image was 1x1, and is
752 # probably just a placeholder, so it is ignored.
753 if tn_uri
is not None and not tn_uri
.startswith('data:'):
760 'title': season_info
['title'],
761 'description': season_info
.get('description'),
762 'thumbnail': thumbnail
,
763 'series': show_info
.get('title'),
764 'season_number': season_info
.get('season'),
765 'season': season_info
['title'],
769 class CBCGemLiveIE(InfoExtractor
):
770 IE_NAME
= 'gem.cbc.ca:live'
771 _VALID_URL
= r
'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
774 'url': 'https://gem.cbc.ca/live/920604739687',
777 'description': 'The live TV channel and local programming from Ottawa',
778 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
779 'live_status': 'is_live',
780 'id': 'AyqZwxRqh8EH',
782 'release_timestamp': 1492106160,
783 'release_date': '20170413',
784 'uploader': 'CBCC-NEW',
786 'skip': 'Live might have ended',
789 'url': 'https://gem.cbc.ca/live/44',
794 'title': r
're:^Ottawa [0-9\-: ]+',
795 'description': 'The live TV channel and local programming from Ottawa',
796 'live_status': 'is_live',
797 'thumbnail': r
're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
799 'params': {'skip_download': True},
800 'skip': 'Live might have ended',
803 'url': 'https://gem.cbc.ca/live-event/10835',
808 'title': r
're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
809 'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
810 'live_status': 'is_live',
811 'thumbnail': r
're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
812 'release_timestamp': 1679706000,
813 'release_date': '20230325',
815 'params': {'skip_download': True},
816 'skip': 'Live might have ended',
818 { # event replay (medianetlive)
819 'url': 'https://gem.cbc.ca/live-event/42314',
820 'md5': '297a9600f554f2258aed01514226a697',
824 'live_status': 'was_live',
825 'title': 'Women\'s Soccer - Canada vs New Zealand',
826 'description': 'md5:36200e5f1a70982277b5a6ecea86155d',
827 'thumbnail': r
're:https://.+default\.jpg',
828 'release_timestamp': 1721917200,
829 'release_date': '20240725',
831 'params': {'skip_download': True},
832 'skip': 'Replay might no longer be available',
834 { # event replay (medianetlive)
835 'url': 'https://gem.cbc.ca/live-event/43273',
836 'only_matching': True,
839 _GEO_COUNTRIES
= ['CA']
841 def _real_extract(self
, url
):
842 video_id
= self
._match
_id
(url
)
843 webpage
= self
._download
_webpage
(url
, video_id
)
844 video_info
= self
._search
_nextjs
_data
(webpage
, video_id
)['props']['pageProps']['data']
846 # Three types of video_info JSON: info in root, freeTv stream/item, event replay
847 if not video_info
.get('formattedIdMedia'):
848 if traverse_obj(video_info
, ('event', 'key')) == video_id
:
849 video_info
= video_info
['event']
851 video_info
= traverse_obj(video_info
, (
852 ('freeTv', ('streams', ...)), 'items',
853 lambda _
, v
: v
['key'].partition('-')[0] == video_id
, any
)) or {}
855 video_stream_id
= video_info
.get('formattedIdMedia')
856 if not video_stream_id
:
857 raise ExtractorError(
858 'Couldn\'t find video metadata, maybe this livestream is now offline', expected
=True)
860 live_status
= 'was_live' if video_info
.get('isVodEnabled') else 'is_live'
861 release_timestamp
= traverse_obj(video_info
, ('airDate', {parse_iso8601}
))
863 if live_status
== 'is_live' and release_timestamp
and release_timestamp
> time
.time():
865 live_status
= 'is_upcoming'
866 self
.raise_no_formats('This livestream has not yet started', expected
=True)
868 stream_data
= self
._download
_json
(
869 'https://services.radio-canada.ca/media/validation/v2/', video_id
, query
={
870 'appCode': 'medianetlive',
871 'connectionType': 'hd',
872 'deviceType': 'ipad',
873 'idMedia': video_stream_id
,
874 'multibitrate': 'true',
877 'manifestType': 'desktop',
879 formats
= self
._extract
_m
3u8_formats
(
880 stream_data
['url'], video_id
, 'mp4', live
=live_status
== 'is_live')
885 'live_status': live_status
,
886 'release_timestamp': release_timestamp
,
887 **traverse_obj(video_info
, {
888 'title': ('title', {str}
),
889 'description': ('description', {str}
),
890 'thumbnail': ('images', 'card', 'url'),