[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / extractor / globalplayer.py
blob3d4a9304ca371c89752730b0b2be8071992756c6
1 from .common import InfoExtractor
2 from ..utils import (
3 clean_html,
4 join_nonempty,
5 parse_duration,
6 str_or_none,
7 traverse_obj,
8 unified_strdate,
9 unified_timestamp,
10 urlhandle_detect_ext,
14 class GlobalPlayerBaseIE(InfoExtractor):
15 def _get_page_props(self, url, video_id):
16 webpage = self._download_webpage(url, video_id)
17 return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
19 def _request_ext(self, url, video_id):
20 return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
21 url, video_id, note='Determining source extension'))
23 def _extract_audio(self, episode, series):
24 return {
25 'vcodec': 'none',
26 **traverse_obj(series, {
27 'series': 'title',
28 'series_id': 'id',
29 'thumbnail': 'imageUrl',
30 'uploader': 'itunesAuthor', # podcasts only
31 }),
32 **traverse_obj(episode, {
33 'id': 'id',
34 'description': ('description', {clean_html}),
35 'duration': ('duration', {parse_duration}),
36 'thumbnail': 'imageUrl',
37 'url': 'streamUrl',
38 'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
39 'title': 'title',
40 }, get_all=False),
44 class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
45 _VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
46 _TESTS = [{
47 'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
48 'info_dict': {
49 'id': '2mx1E',
50 'ext': 'aac',
51 'display_id': 'smoothchill-uk',
52 'title': 're:^Smooth Chill.+$',
53 'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
54 'description': 'Music To Chill To',
55 'live_status': 'is_live',
57 }, {
58 # national station
59 'url': 'https://www.globalplayer.com/live/heart/uk/',
60 'info_dict': {
61 'id': '2mwx4',
62 'ext': 'aac',
63 'description': 'turn up the feel good!',
64 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
65 'live_status': 'is_live',
66 'title': 're:^Heart UK.+$',
67 'display_id': 'heart-uk',
69 }, {
70 # regional variation
71 'url': 'https://www.globalplayer.com/live/heart/london/',
72 'info_dict': {
73 'id': 'AMqg',
74 'ext': 'aac',
75 'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
76 'title': 're:^Heart London.+$',
77 'live_status': 'is_live',
78 'display_id': 'heart-london',
79 'description': 'turn up the feel good!',
83 def _real_extract(self, url):
84 video_id = self._match_id(url)
85 station = self._get_page_props(url, video_id)['station']
86 stream_url = station['streamUrl']
88 return {
89 'id': station['id'],
90 'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
91 'url': stream_url,
92 'ext': self._request_ext(stream_url, video_id),
93 'vcodec': 'none',
94 'is_live': True,
95 **traverse_obj(station, {
96 'title': (('name', 'brandName'), {str_or_none}),
97 'description': 'tagline',
98 'thumbnail': 'brandLogo',
99 }, get_all=False),
103 class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
104 _VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
105 _TESTS = [{
106 # "live playlist"
107 'url': 'https://www.globalplayer.com/playlists/8bLk/',
108 'info_dict': {
109 'id': '8bLk',
110 'ext': 'aac',
111 'live_status': 'is_live',
112 'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
113 'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
114 'title': 're:^Classic FM Hall of Fame.+$',
118 def _real_extract(self, url):
119 video_id = self._match_id(url)
120 station = self._get_page_props(url, video_id)['playlistData']
121 stream_url = station['streamUrl']
123 return {
124 'id': video_id,
125 'url': stream_url,
126 'ext': self._request_ext(stream_url, video_id),
127 'vcodec': 'none',
128 'is_live': True,
129 **traverse_obj(station, {
130 'title': 'title',
131 'description': 'description',
132 'thumbnail': 'image',
137 class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
138 _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
139 _TESTS = [{
140 # podcast
141 'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
142 'playlist_mincount': 5,
143 'info_dict': {
144 'id': '42KuaM',
145 'title': 'Filthy Ritual',
146 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
147 'categories': ['Society & Culture', 'True Crime'],
148 'uploader': 'Global',
149 'description': 'md5:da5b918eac9ae319454a10a563afacf9',
151 }, {
152 # radio catchup
153 'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
154 'playlist_mincount': 3,
155 'info_dict': {
156 'id': '46vyD7z',
157 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
158 'title': 'Nick Ferrari',
159 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
163 def _real_extract(self, url):
164 video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
165 props = self._get_page_props(url, video_id)
166 series = props['podcastInfo'] if podcast else props['catchupInfo']
168 return {
169 '_type': 'playlist',
170 'id': video_id,
171 'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
172 series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
173 'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
174 **traverse_obj(series, {
175 'description': 'description',
176 'thumbnail': 'imageUrl',
177 'title': 'title',
178 'uploader': 'itunesAuthor', # podcasts only
183 class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
184 _VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
185 _TESTS = [{
186 # podcast
187 'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
188 'info_dict': {
189 'id': '7DrfNnE',
190 'ext': 'mp3',
191 'title': 'Filthy Ritual - Trailer',
192 'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
193 'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
194 'duration': 225.0,
195 'timestamp': 1681254900,
196 'series': 'Filthy Ritual',
197 'series_id': '42KuaM',
198 'upload_date': '20230411',
199 'uploader': 'Global',
201 }, {
202 # radio catchup
203 'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
204 'info_dict': {
205 'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
206 'ext': 'm4a',
207 'timestamp': 1682056800,
208 'series': 'Nick Ferrari',
209 'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
210 'upload_date': '20230421',
211 'series_id': '46vyD7z',
212 'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
213 'title': 'Nick Ferrari',
214 'duration': 10800.0,
218 def _real_extract(self, url):
219 video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
220 props = self._get_page_props(url, video_id)
221 episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
223 return self._extract_audio(
224 episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
227 class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
228 _VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
229 _TESTS = [{
230 'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
231 'info_dict': {
232 'id': '2JsSZ7Gm2uP',
233 'ext': 'mp4',
234 'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
235 'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
236 'upload_date': '20230420',
237 'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
241 def _real_extract(self, url):
242 video_id = self._match_id(url)
243 meta = self._get_page_props(url, video_id)['videoData']
245 return {
246 'id': video_id,
247 **traverse_obj(meta, {
248 'url': 'url',
249 'thumbnail': ('image', 'url'),
250 'title': 'title',
251 'upload_date': ('publish_date', {unified_strdate}),
252 'description': 'description',