yt_dlp/extractor/radlive.py

   1 import json
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     format_field,
   7     traverse_obj,
   8     try_get,
   9     unified_timestamp,
  10 )
  11
  12
  13 class RadLiveIE(InfoExtractor):
  14     IE_NAME = 'radlive'
  15     _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)'
  16     _TESTS = [{
  17         'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a',
  18         'md5': '6219d5d31d52de87d21c9cf5b7cb27ff',
  19         'info_dict': {
  20             'id': 'dc5acfbc-761b-4bec-9564-df999905116a',
  21             'ext': 'mp4',
  22             'title': 'Deathpact - Digital Mirage 2 [Full Set]',
  23             'language': 'en',
  24             'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png',
  25             'description': '',
  26             'release_timestamp': 1600185600.0,
  27             'channel': 'Proximity',
  28             'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009',
  29             'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009',
  30         },
  31     }, {
  32         'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
  33         'md5': '40b2175f347592125d93e9a344080125',
  34         'info_dict': {
  35             'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf',
  36             'ext': 'mp4',
  37             'title': 'E01: Bad Jokes 1',
  38             'language': 'en',
  39             'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg',
  40             'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype',
  41             'episode': 'E01: Bad Jokes 1',
  42             'episode_number': 1,
  43             'episode_id': '336',
  44         },
  45     }]
  46
  47     def _real_extract(self, url):
  48         content_type, video_id = self._match_valid_url(url).groups()
  49
  50         webpage = self._download_webpage(url, video_id)
  51
  52         content_info = json.loads(self._search_regex(
  53             r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
  54             webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
  55         video_info = content_info[content_type]
  56
  57         if not video_info:
  58             raise ExtractorError('Unable to extract video info, make sure the URL is valid')
  59
  60         formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id)
  61
  62         data = video_info.get('structured_data', {})
  63
  64         release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate')))
  65         channel = next(iter(content_info.get('channels', [])), {})
  66         channel_id = channel.get('lrn', '').split(':')[-1] or None
  67
  68         result = {
  69             'id': video_id,
  70             'title': video_info['title'],
  71             'formats': formats,
  72             'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')),
  73             'thumbnail': traverse_obj(data, ('image', 'contentUrl')),
  74             'description': data.get('description'),
  75             'release_timestamp': release_date,
  76             'channel': channel.get('name'),
  77             'channel_id': channel_id,
  78             'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'),
  79
  80         }
  81         if content_type == 'episode':
  82             result.update({
  83                 # TODO: Get season number when downloading single episode
  84                 'episode': video_info.get('title'),
  85                 'episode_number': video_info.get('number'),
  86                 'episode_id': video_info.get('id'),
  87             })
  88
  89         return result
  90
  91
  92 class RadLiveSeasonIE(RadLiveIE):  # XXX: Do not subclass from concrete IE
  93     IE_NAME = 'radlive:season'
  94     _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)'
  95     _TESTS = [{
  96         'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75',
  97         'md5': '40b2175f347592125d93e9a344080125',
  98         'info_dict': {
  99             'id': '08a290f7-c9ef-4e22-9105-c255995a2e75',
 100             'title': 'Bad Jokes - Season 1',
 101         },
 102         'playlist_mincount': 5,
 103     }]
 104
 105     @classmethod
 106     def suitable(cls, url):
 107         return False if RadLiveIE.suitable(url) else super().suitable(url)
 108
 109     def _real_extract(self, url):
 110         season_id = self._match_id(url)
 111         webpage = self._download_webpage(url, season_id)
 112
 113         content_info = json.loads(self._search_regex(
 114             r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>',
 115             webpage, 'video info', group='json'))['props']['pageProps']['initialContentData']
 116         video_info = content_info['season']
 117
 118         entries = [{
 119             '_type': 'url_transparent',
 120             'id': episode['structured_data']['url'].split('/')[-1],
 121             'url': episode['structured_data']['url'],
 122             'series': try_get(content_info, lambda x: x['series']['title']),
 123             'season': video_info['title'],
 124             'season_number': video_info.get('number'),
 125             'season_id': video_info.get('id'),
 126             'ie_key': RadLiveIE.ie_key(),
 127         } for episode in video_info['episodes']]
 128
 129         return self.playlist_result(entries, season_id, video_info.get('title'))
 130
 131
 132 class RadLiveChannelIE(RadLiveIE):  # XXX: Do not subclass from concrete IE
 133     IE_NAME = 'radlive:channel'
 134     _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)'
 135     _TESTS = [{
 136         'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274',
 137         'md5': '625156a08b7f2b0b849f234e664457ac',
 138         'info_dict': {
 139             'id': '5c4d8df4-6fa0-413c-81e3-873479b49274',
 140             'title': 'Whistle Sports',
 141         },
 142         'playlist_mincount': 7,
 143     }]
 144
 145     _QUERY = '''
 146 query WebChannelListing ($lrn: ID!) {
 147   channel (id:$lrn) {
 148     name
 149     features {
 150       structured_data
 151     }
 152   }
 153 }'''
 154
 155     @classmethod
 156     def suitable(cls, url):
 157         return False if RadLiveIE.suitable(url) else super().suitable(url)
 158
 159     def _real_extract(self, url):
 160         channel_id = self._match_id(url)
 161
 162         graphql = self._download_json(
 163             'https://content.mhq.12core.net/graphql', channel_id,
 164             headers={'Content-Type': 'application/json'},
 165             data=json.dumps({
 166                 'query': self._QUERY,
 167                 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'},
 168             }).encode())
 169
 170         data = traverse_obj(graphql, ('data', 'channel'))
 171         if not data:
 172             raise ExtractorError('Unable to extract video info, make sure the URL is valid')
 173
 174         entries = [{
 175             '_type': 'url_transparent',
 176             'url': feature['structured_data']['url'],
 177             'ie_key': RadLiveIE.ie_key(),
 178         } for feature in data['features']]
 179
 180         return self.playlist_result(entries, channel_id, data.get('name'))