[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / radiocanada.py
blob950b9ec068a862f3ae7649c04e8840c76dd1941f
1 from .common import InfoExtractor
2 from ..networking.exceptions import HTTPError
3 from ..utils import (
4 ExtractorError,
5 determine_ext,
6 int_or_none,
7 unified_strdate,
11 class RadioCanadaIE(InfoExtractor):
12 IE_NAME = 'radiocanada'
13 _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
14 _TESTS = [
16 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
17 'info_dict': {
18 'id': '7184272',
19 'ext': 'mp4',
20 'title': 'Le parcours du tireur capté sur vidéo',
21 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
22 'upload_date': '20141023',
24 'params': {
25 # m3u8 download
26 'skip_download': True,
30 # empty Title
31 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
32 'info_dict': {
33 'id': '7754998',
34 'ext': 'mp4',
35 'title': 'letelejournal22h',
36 'description': 'INTEGRALE WEB 22H-TJ',
37 'upload_date': '20170720',
39 'params': {
40 # m3u8 download
41 'skip_download': True,
45 # with protectionType but not actually DRM protected
46 'url': 'radiocanada:toutv:140872',
47 'info_dict': {
48 'id': '140872',
49 'title': 'Épisode 1',
50 'series': 'District 31',
52 'only_matching': True,
55 _GEO_COUNTRIES = ['CA']
56 _access_token = None
57 _claims = None
59 def _call_api(self, path, video_id=None, app_code=None, query=None):
60 if not query:
61 query = {}
62 query.update({
63 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
64 'output': 'json',
66 if video_id:
67 query.update({
68 'appCode': app_code,
69 'idMedia': video_id,
71 if self._access_token:
72 query['access_token'] = self._access_token
73 try:
74 return self._download_json(
75 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
76 except ExtractorError as e:
77 if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422):
78 data = self._parse_json(e.cause.response.read().decode(), None)
79 error = data.get('error_description') or data['errorMessage']['text']
80 raise ExtractorError(error, expected=True)
81 raise
83 def _extract_info(self, app_code, video_id):
84 metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
86 def get_meta(name):
87 for meta in metas:
88 if meta.get('name') == name:
89 text = meta.get('text')
90 if text:
91 return text
93 # protectionType does not necessarily mean the video is DRM protected (see
94 # https://github.com/ytdl-org/youtube-dl/pull/18609).
95 if get_meta('protectionType'):
96 self.report_warning('This video is probably DRM protected.')
98 query = {
99 'connectionType': 'hd',
100 'deviceType': 'ipad',
101 'multibitrate': 'true',
103 if self._claims:
104 query['claims'] = self._claims
105 v_data = self._call_api('validation/v2/', video_id, app_code, query)
106 v_url = v_data.get('url')
107 if not v_url:
108 error = v_data['message']
109 if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
110 raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
111 if error == 'Le contenu sélectionné est disponible seulement en premium':
112 self.raise_login_required(error)
113 raise ExtractorError(
114 f'{self.IE_NAME} said: {error}', expected=True)
115 formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
117 subtitles = {}
118 closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
119 if closed_caption_url:
120 subtitles['fr'] = [{
121 'url': closed_caption_url,
122 'ext': determine_ext(closed_caption_url, 'vtt'),
125 return {
126 'id': video_id,
127 'title': get_meta('Title') or get_meta('AV-nomEmission'),
128 'description': get_meta('Description') or get_meta('ShortDescription'),
129 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
130 'duration': int_or_none(get_meta('length')),
131 'series': get_meta('Emission'),
132 'season_number': int_or_none('SrcSaison'),
133 'episode_number': int_or_none('SrcEpisode'),
134 'upload_date': unified_strdate(get_meta('Date')),
135 'subtitles': subtitles,
136 'formats': formats,
139 def _real_extract(self, url):
140 return self._extract_info(*self._match_valid_url(url).groups())
143 class RadioCanadaAudioVideoIE(InfoExtractor):
144 IE_NAME = 'radiocanada:audiovideo'
145 _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
146 _TESTS = [{
147 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
148 'info_dict': {
149 'id': '7527184',
150 'ext': 'mp4',
151 'title': 'Barack Obama au Vietnam',
152 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
153 'upload_date': '20160523',
155 'params': {
156 # m3u8 download
157 'skip_download': True,
159 }, {
160 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
161 'only_matching': True,
164 def _real_extract(self, url):
165 return self.url_result(f'radiocanada:medianet:{self._match_id(url)}')