yt_dlp/extractor/radiocanada.py

   1 from .common import InfoExtractor
   2 from ..networking.exceptions import HTTPError
   3 from ..utils import (
   4     determine_ext,
   5     ExtractorError,
   6     int_or_none,
   7     unified_strdate,
   8 )
   9
  10
  11 class RadioCanadaIE(InfoExtractor):
  12     IE_NAME = 'radiocanada'
  13     _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
  14     _TESTS = [
  15         {
  16             'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
  17             'info_dict': {
  18                 'id': '7184272',
  19                 'ext': 'mp4',
  20                 'title': 'Le parcours du tireur capté sur vidéo',
  21                 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
  22                 'upload_date': '20141023',
  23             },
  24             'params': {
  25                 # m3u8 download
  26                 'skip_download': True,
  27             }
  28         },
  29         {
  30             # empty Title
  31             'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
  32             'info_dict': {
  33                 'id': '7754998',
  34                 'ext': 'mp4',
  35                 'title': 'letelejournal22h',
  36                 'description': 'INTEGRALE WEB 22H-TJ',
  37                 'upload_date': '20170720',
  38             },
  39             'params': {
  40                 # m3u8 download
  41                 'skip_download': True,
  42             },
  43         },
  44         {
  45             # with protectionType but not actually DRM protected
  46             'url': 'radiocanada:toutv:140872',
  47             'info_dict': {
  48                 'id': '140872',
  49                 'title': 'Épisode 1',
  50                 'series': 'District 31',
  51             },
  52             'only_matching': True,
  53         }
  54     ]
  55     _GEO_COUNTRIES = ['CA']
  56     _access_token = None
  57     _claims = None
  58
  59     def _call_api(self, path, video_id=None, app_code=None, query=None):
  60         if not query:
  61             query = {}
  62         query.update({
  63             'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
  64             'output': 'json',
  65         })
  66         if video_id:
  67             query.update({
  68                 'appCode': app_code,
  69                 'idMedia': video_id,
  70             })
  71         if self._access_token:
  72             query['access_token'] = self._access_token
  73         try:
  74             return self._download_json(
  75                 'https://services.radio-canada.ca/media/' + path, video_id, query=query)
  76         except ExtractorError as e:
  77             if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422):
  78                 data = self._parse_json(e.cause.response.read().decode(), None)
  79                 error = data.get('error_description') or data['errorMessage']['text']
  80                 raise ExtractorError(error, expected=True)
  81             raise
  82
  83     def _extract_info(self, app_code, video_id):
  84         metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']
  85
  86         def get_meta(name):
  87             for meta in metas:
  88                 if meta.get('name') == name:
  89                     text = meta.get('text')
  90                     if text:
  91                         return text
  92
  93         # protectionType does not necessarily mean the video is DRM protected (see
  94         # https://github.com/ytdl-org/youtube-dl/pull/18609).
  95         if get_meta('protectionType'):
  96             self.report_warning('This video is probably DRM protected.')
  97
  98         query = {
  99             'connectionType': 'hd',
 100             'deviceType': 'ipad',
 101             'multibitrate': 'true',
 102         }
 103         if self._claims:
 104             query['claims'] = self._claims
 105         v_data = self._call_api('validation/v2/', video_id, app_code, query)
 106         v_url = v_data.get('url')
 107         if not v_url:
 108             error = v_data['message']
 109             if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
 110                 raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
 111             if error == 'Le contenu sélectionné est disponible seulement en premium':
 112                 self.raise_login_required(error)
 113             raise ExtractorError(
 114                 '%s said: %s' % (self.IE_NAME, error), expected=True)
 115         formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
 116
 117         subtitles = {}
 118         closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
 119         if closed_caption_url:
 120             subtitles['fr'] = [{
 121                 'url': closed_caption_url,
 122                 'ext': determine_ext(closed_caption_url, 'vtt'),
 123             }]
 124
 125         return {
 126             'id': video_id,
 127             'title': get_meta('Title') or get_meta('AV-nomEmission'),
 128             'description': get_meta('Description') or get_meta('ShortDescription'),
 129             'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
 130             'duration': int_or_none(get_meta('length')),
 131             'series': get_meta('Emission'),
 132             'season_number': int_or_none('SrcSaison'),
 133             'episode_number': int_or_none('SrcEpisode'),
 134             'upload_date': unified_strdate(get_meta('Date')),
 135             'subtitles': subtitles,
 136             'formats': formats,
 137         }
 138
 139     def _real_extract(self, url):
 140         return self._extract_info(*self._match_valid_url(url).groups())
 141
 142
 143 class RadioCanadaAudioVideoIE(InfoExtractor):
 144     IE_NAME = 'radiocanada:audiovideo'
 145     _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
 146     _TESTS = [{
 147         'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
 148         'info_dict': {
 149             'id': '7527184',
 150             'ext': 'mp4',
 151             'title': 'Barack Obama au Vietnam',
 152             'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
 153             'upload_date': '20160523',
 154         },
 155         'params': {
 156             # m3u8 download
 157             'skip_download': True,
 158         },
 159     }, {
 160         'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
 161         'only_matching': True,
 162     }]
 163
 164     def _real_extract(self, url):
 165         return self.url_result('radiocanada:medianet:%s' % self._match_id(url))