4 from .art19
import Art19IE
5 from .common
import InfoExtractor
6 from ..networking
.exceptions
import HTTPError
19 from ..utils
.traversal
import traverse_obj
21 _BASE_URL_RE
= r
'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
24 class NebulaBaseIE(InfoExtractor
):
25 _NETRC_MACHINE
= 'watchnebula'
26 _token
= _api_token
= None
28 def _perform_login(self
, username
, password
):
30 response
= self
._download
_json
(
31 'https://nebula.tv/auth/login/', None,
32 'Logging in to Nebula', 'Login failed',
33 data
=json
.dumps({'email': username
, 'password': password
}).encode(),
34 headers
={'content-type': 'application/json'})
35 except ExtractorError
as e
:
36 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 400:
37 raise ExtractorError('Login failed: Invalid username or password', expected
=True)
39 self
._api
_token
= traverse_obj(response
, ('key', {str}
))
40 if not self
._api
_token
:
41 raise ExtractorError('Login failed: No token')
43 def _call_api(self
, *args
, **kwargs
):
45 kwargs
.setdefault('headers', {})['Authorization'] = f
'Bearer {self._token}'
47 return self
._download
_json
(*args
, **kwargs
)
48 except ExtractorError
as e
:
49 if not isinstance(e
.cause
, HTTPError
) or e
.cause
.status
not in (401, 403):
52 f
'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
53 self
._real
_initialize
()
55 kwargs
.setdefault('headers', {})['Authorization'] = f
'Bearer {self._token}'
56 return self
._download
_json
(*args
, **kwargs
)
58 def _real_initialize(self
):
59 if not self
._api
_token
:
60 self
._api
_token
= try_call(
61 lambda: self
._get
_cookies
('https://nebula.tv')['nebula_auth.apiToken'].value
)
62 self
._token
= self
._download
_json
(
63 'https://users.api.nebula.app/api/v1/authorization/', None,
64 headers
={'Authorization': f
'Token {self._api_token}'} if self
._api
_token
else None,
65 note
='Authorizing to Nebula', data
=b
'')['token']
67 def _extract_formats(self
, content_id
, slug
):
68 for retry
in (False, True):
70 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
71 f
'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
74 'app_version': '23.10.0',
77 return {'formats': fmts
, 'subtitles': subs
}
78 except ExtractorError
as e
:
79 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 401:
80 self
.raise_login_required()
81 if not retry
and isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 403:
82 self
.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
83 self
._real
_initialize
()
87 def _extract_video_metadata(self
, episode
):
88 channel_url
= traverse_obj(
89 episode
, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all
=False)
91 'id': episode
['id'].partition(':')[2],
92 **traverse_obj(episode
, {
95 'description': 'description',
96 'timestamp': ('published_at', {parse_iso8601}
),
97 'duration': ('duration', {int_or_none}
),
98 'channel_id': 'channel_slug',
99 'uploader_id': 'channel_slug',
100 'channel': 'channel_title',
101 'uploader': 'channel_title',
102 'series': 'channel_title',
103 'creator': 'channel_title',
104 'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}
),
105 'episode_number': ('order', {int_or_none}
),
106 # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
107 '_old_archive_ids': ('zype_id', {lambda x
: [
108 make_archive_id(NebulaIE
, x
), make_archive_id(NebulaSubscriptionsIE
, x
)] if x
else None}),
110 'channel_url': channel_url
,
111 'uploader_url': channel_url
,
115 class NebulaIE(NebulaBaseIE
):
116 IE_NAME
= 'nebula:video'
117 _VALID_URL
= rf
'{_BASE_URL_RE}/videos/(?P<id>[\w-]+)'
119 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
121 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
123 'title': 'That Time Disney Remade Beauty and the Beast',
124 'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
125 'upload_date': '20180731',
126 'timestamp': 1533009600,
127 'channel': 'Lindsay Ellis',
128 'channel_id': 'lindsayellis',
129 'uploader': 'Lindsay Ellis',
130 'uploader_id': 'lindsayellis',
131 'uploader_url': r
're:https://nebula\.(tv|app)/lindsayellis',
132 'series': 'Lindsay Ellis',
133 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
134 'channel_url': r
're:https://nebula\.(tv|app)/lindsayellis',
135 'creator': 'Lindsay Ellis',
137 'thumbnail': r
're:https://\w+\.cloudfront\.net/[\w-]+',
138 '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
140 'params': {'skip_download': 'm3u8'},
142 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
143 'md5': 'd05739cf6c38c09322422f696b569c23',
145 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
147 'title': 'Landing Craft - How The Allies Got Ashore',
148 'description': r
're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
149 'upload_date': '20200327',
150 'timestamp': 1585348140,
151 'channel': 'Real Engineering — The Logistics of D-Day',
152 'channel_id': 'd-day',
153 'uploader': 'Real Engineering — The Logistics of D-Day',
154 'uploader_id': 'd-day',
155 'series': 'Real Engineering — The Logistics of D-Day',
156 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
157 'creator': 'Real Engineering — The Logistics of D-Day',
159 'channel_url': 'https://nebula.tv/d-day',
160 'uploader_url': 'https://nebula.tv/d-day',
161 'thumbnail': r
're:https://\w+\.cloudfront\.net/[\w-]+',
162 '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
164 'params': {'skip_download': 'm3u8'},
166 'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
167 'md5': 'ebe28a7ad822b9ee172387d860487868',
169 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
171 'title': 'Episode 1: The Draw',
172 'description': r
'contains:There’s free money on offer… if the players can all work together.',
173 'upload_date': '20200323',
174 'timestamp': 1584980400,
175 'channel': 'Tom Scott Presents: Money',
176 'channel_id': 'tom-scott-presents-money',
177 'uploader': 'Tom Scott Presents: Money',
178 'uploader_id': 'tom-scott-presents-money',
179 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
181 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
182 'series': 'Tom Scott Presents: Money',
183 'display_id': 'money-episode-1-the-draw',
184 'thumbnail': r
're:https://\w+\.cloudfront\.net/[\w-]+',
185 'creator': 'Tom Scott Presents: Money',
186 '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
188 'params': {'skip_download': 'm3u8'},
190 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
191 'only_matching': True,
193 'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
195 'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
197 'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
198 'title': 'Did the US Really Blow Up the NordStream Pipelines?',
199 'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
200 'upload_date': '20230223',
201 'timestamp': 1677144070,
202 'channel': 'TLDR News EU',
203 'channel_id': 'tldrnewseu',
204 'uploader': 'TLDR News EU',
205 'uploader_id': 'tldrnewseu',
206 'uploader_url': r
're:https://nebula\.(tv|app)/tldrnewseu',
208 'channel_url': r
're:https://nebula\.(tv|app)/tldrnewseu',
209 'series': 'TLDR News EU',
210 'thumbnail': r
're:https://\w+\.cloudfront\.net/[\w-]+',
211 'creator': 'TLDR News EU',
212 '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
214 'params': {'skip_download': 'm3u8'},
216 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
217 'only_matching': True,
220 def _real_extract(self
, url
):
221 slug
= self
._match
_id
(url
)
222 url
, smuggled_data
= unsmuggle_url(url
, {})
223 if smuggled_data
.get('id'):
225 'id': smuggled_data
['id'],
228 **self
._extract
_formats
(smuggled_data
['id'], slug
),
231 metadata
= self
._call
_api
(
232 f
'https://content.api.nebula.app/content/videos/{slug}',
233 slug
, note
='Fetching video metadata')
235 **self
._extract
_video
_metadata
(metadata
),
236 **self
._extract
_formats
(metadata
['id'], slug
),
240 class NebulaClassIE(NebulaBaseIE
):
241 IE_NAME
= 'nebula:media'
242 _VALID_URL
= rf
'{_BASE_URL_RE}/(?!(?:myshows|library|videos)/)(?P<id>[\w-]+)/(?P<ep>[\w-]+)/?(?:$|[?#])'
244 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
246 'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
249 'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
250 'episode_number': 14,
251 'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
252 'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
254 'episode': 'Episode 14',
255 'title': 'Photos, Sculpture, and Video',
257 'params': {'skip_download': 'm3u8'},
259 'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
262 'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
263 'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
264 'series_id': '335e8159-d663-491a-888f-1732285706ac',
265 'modified_timestamp': 1599091504,
266 'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
267 'series': 'Extremities',
268 'modified_date': '20200903',
269 'upload_date': '20200902',
270 'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
271 'release_timestamp': 1571237958,
272 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
273 'duration': 1546.05714,
274 'timestamp': 1599085608,
275 'release_date': '20191016',
278 'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
281 'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
283 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
284 'release_date': '20230304',
285 'modified_date': '20230403',
286 'series': 'The Layover',
287 'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
288 'modified_timestamp': 1680554566,
289 'duration': 3130.46401,
290 'release_timestamp': 1677943800,
291 'title': 'The Layover — Episode 1',
292 'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
293 'upload_date': '20230303',
294 'episode': 'Episode 1',
295 'timestamp': 1677883672,
296 'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
300 def _real_extract(self
, url
):
301 slug
, episode
= self
._match
_valid
_url
(url
).group('id', 'ep')
302 url
, smuggled_data
= unsmuggle_url(url
, {})
303 if smuggled_data
.get('id'):
305 'id': smuggled_data
['id'],
308 **self
._extract
_formats
(smuggled_data
['id'], slug
),
311 metadata
= self
._call
_api
(
312 f
'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
313 slug
, note
='Fetching class/podcast metadata')
314 content_type
= metadata
.get('type')
315 if content_type
== 'lesson':
317 **self
._extract
_video
_metadata
(metadata
),
318 **self
._extract
_formats
(metadata
['id'], slug
),
320 elif content_type
== 'podcast_episode':
321 episode_url
= metadata
['episode_url']
322 if not episode_url
and metadata
.get('premium'):
323 self
.raise_login_required()
325 if Art19IE
.suitable(episode_url
):
326 return self
.url_result(episode_url
, Art19IE
)
327 return traverse_obj(metadata
, {
329 'url': ('episode_url', {url_or_none}
),
330 'title': ('title', {str}
),
331 'description': ('description', {str}
),
332 'timestamp': ('published_at', {parse_iso8601}
),
333 'duration': ('duration', {int_or_none}
),
334 'channel_id': ('channel_id', {str}
),
335 'chnanel': ('channel_title', {str}
),
336 'thumbnail': ('assets', 'regular', {url_or_none}
),
339 raise ExtractorError(f
'Unexpected content type {content_type!r}')
342 class NebulaSubscriptionsIE(NebulaBaseIE
):
343 IE_NAME
= 'nebula:subscriptions'
344 _VALID_URL
= rf
'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
346 'url': 'https://nebula.tv/myshows',
347 'playlist_mincount': 1,
353 def _generate_playlist_entries(self
):
354 next_url
= update_url_query('https://content.api.nebula.app/video_episodes/', {
356 'include': 'engagement',
357 'ordering': '-published_at',
359 for page_num
in itertools
.count(1):
360 channel
= self
._call
_api
(
361 next_url
, 'myshows', note
=f
'Retrieving subscriptions page {page_num}')
362 for episode
in channel
['results']:
363 metadata
= self
._extract
_video
_metadata
(episode
)
364 yield self
.url_result(smuggle_url(
365 f
'https://nebula.tv/videos/{metadata["display_id"]}',
366 {'id': episode
['id']}), NebulaIE
, url_transparent
=True, **metadata
)
367 next_url
= channel
.get('next')
371 def _real_extract(self
, url
):
372 return self
.playlist_result(self
._generate
_playlist
_entries
(), 'myshows')
375 class NebulaChannelIE(NebulaBaseIE
):
376 IE_NAME
= 'nebula:channel'
377 _VALID_URL
= rf
'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
379 'url': 'https://nebula.tv/tom-scott-presents-money',
381 'id': 'tom-scott-presents-money',
382 'title': 'Tom Scott Presents: Money',
383 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
387 'url': 'https://nebula.tv/lindsayellis',
389 'id': 'lindsayellis',
390 'title': 'Lindsay Ellis',
391 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
393 'playlist_mincount': 2,
395 'url': 'https://nebula.tv/johnnyharris',
397 'id': 'johnnyharris',
398 'title': 'Johnny Harris',
399 'description': 'I make videos about maps and many other things.',
401 'playlist_mincount': 90,
403 'url': 'https://nebula.tv/copyright-for-fun-and-profit',
405 'id': 'copyright-for-fun-and-profit',
406 'title': 'Copyright for Fun and Profit',
407 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
409 'playlist_count': 23,
411 'url': 'https://nebula.tv/trussissuespodcast',
413 'id': 'trussissuespodcast',
414 'title': 'The TLDR News Podcast',
415 'description': 'md5:a08c4483bc0b705881d3e0199e721385',
417 'playlist_mincount': 80,
420 def _generate_playlist_entries(self
, collection_id
, collection_slug
):
421 next_url
= f
'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
422 for page_num
in itertools
.count(1):
423 episodes
= self
._call
_api
(next_url
, collection_slug
, note
=f
'Retrieving channel page {page_num}')
424 for episode
in episodes
['results']:
425 metadata
= self
._extract
_video
_metadata
(episode
)
426 yield self
.url_result(smuggle_url(
427 episode
.get('share_url') or f
'https://nebula.tv/videos/{metadata["display_id"]}',
428 {'id': episode
['id']}), NebulaIE
, url_transparent
=True, **metadata
)
429 next_url
= episodes
.get('next')
433 def _generate_class_entries(self
, channel
):
434 for lesson
in channel
['lessons']:
435 metadata
= self
._extract
_video
_metadata
(lesson
)
436 yield self
.url_result(smuggle_url(
437 lesson
.get('share_url') or f
'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
438 {'id': lesson
['id']}), NebulaClassIE
, url_transparent
=True, **metadata
)
440 def _generate_podcast_entries(self
, collection_id
, collection_slug
):
441 next_url
= f
'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
442 for page_num
in itertools
.count(1):
443 episodes
= self
._call
_api
(next_url
, collection_slug
, note
=f
'Retrieving podcast page {page_num}')
445 for episode
in traverse_obj(episodes
, ('results', lambda _
, v
: url_or_none(v
['share_url']))):
446 yield self
.url_result(episode
['share_url'], NebulaClassIE
)
447 next_url
= episodes
.get('next')
451 def _real_extract(self
, url
):
452 collection_slug
= self
._match
_id
(url
)
453 channel
= self
._call
_api
(
454 f
'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
455 collection_slug
, note
='Retrieving channel')
457 if channel
.get('type') == 'class':
458 entries
= self
._generate
_class
_entries
(channel
)
459 elif channel
.get('type') == 'podcast_channel':
460 entries
= self
._generate
_podcast
_entries
(channel
['id'], collection_slug
)
462 entries
= self
._generate
_playlist
_entries
(channel
['id'], collection_slug
)
464 return self
.playlist_result(
466 playlist_id
=collection_slug
,
467 playlist_title
=channel
.get('title'),
468 playlist_description
=channel
.get('description'))