[ie/crunchyroll] Remove initial state extraction (#7632)
[yt-dlp3.git] / yt_dlp / extractor / crunchyroll.py
blobee34aced5555a861a9afa1c9ae0e8eefe9630a84
1 import base64
3 from .common import InfoExtractor
4 from ..networking.exceptions import HTTPError
5 from ..utils import (
6 ExtractorError,
7 float_or_none,
8 format_field,
9 int_or_none,
10 join_nonempty,
11 parse_age_limit,
12 parse_count,
13 parse_iso8601,
14 qualities,
15 remove_start,
16 time_seconds,
17 traverse_obj,
18 url_or_none,
19 urlencode_postdata,
23 class CrunchyrollBaseIE(InfoExtractor):
24 _BASE_URL = 'https://www.crunchyroll.com'
25 _API_BASE = 'https://api.crunchyroll.com'
26 _NETRC_MACHINE = 'crunchyroll'
27 _AUTH_HEADERS = None
28 _API_ENDPOINT = None
29 _BASIC_AUTH = None
30 _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
31 _LOCALE_LOOKUP = {
32 'ar': 'ar-SA',
33 'de': 'de-DE',
34 '': 'en-US',
35 'es': 'es-419',
36 'es-es': 'es-ES',
37 'fr': 'fr-FR',
38 'it': 'it-IT',
39 'pt-br': 'pt-BR',
40 'pt-pt': 'pt-PT',
41 'ru': 'ru-RU',
42 'hi': 'hi-IN',
45 @property
46 def is_logged_in(self):
47 return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
49 def _perform_login(self, username, password):
50 if self.is_logged_in:
51 return
53 upsell_response = self._download_json(
54 f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
55 query={
56 'sess_id': 1,
57 'device_id': 'whatvalueshouldbeforweb',
58 'device_type': 'com.crunchyroll.static',
59 'access_token': 'giKq5eY27ny3cqz',
60 'referer': f'{self._BASE_URL}/welcome/login'
62 if upsell_response['code'] != 'ok':
63 raise ExtractorError('Could not get session id')
64 session_id = upsell_response['data']['session_id']
66 login_response = self._download_json(
67 f'{self._API_BASE}/login.1.json', None, 'Logging in',
68 data=urlencode_postdata({
69 'account': username,
70 'password': password,
71 'session_id': session_id
72 }))
73 if login_response['code'] != 'ok':
74 raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
75 if not self.is_logged_in:
76 raise ExtractorError('Login succeeded but did not set etp_rt cookie')
78 def _update_auth(self):
79 if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
80 return
82 if not CrunchyrollBaseIE._BASIC_AUTH:
83 cx_api_param = self._CLIENT_ID[self.is_logged_in]
84 self.write_debug(f'Using cxApiParam={cx_api_param}')
85 CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
87 grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
88 try:
89 auth_response = self._download_json(
90 f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
91 headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
92 except ExtractorError as error:
93 if isinstance(error.cause, HTTPError) and error.cause.status == 403:
94 raise ExtractorError(
95 'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
96 'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
97 'and your browser\'s User-Agent (with --user-agent)', expected=True)
98 raise
100 CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
101 CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
103 def _locale_from_language(self, language):
104 config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
105 return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
107 def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
108 self._update_auth()
110 if not endpoint.startswith('/'):
111 endpoint = f'/{endpoint}'
113 query = query.copy()
114 locale = self._locale_from_language(lang)
115 if locale:
116 query['locale'] = locale
118 return self._download_json(
119 f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
120 headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
122 def _call_api(self, path, internal_id, lang, note='api', query={}):
123 if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
124 path = f'/content/v2/{self._API_ENDPOINT}/{path}'
126 try:
127 result = self._call_base_api(
128 path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
129 except ExtractorError as error:
130 if isinstance(error.cause, HTTPError) and error.cause.status == 404:
131 return None
132 raise
134 if not result:
135 raise ExtractorError(f'Unexpected response when downloading {note} JSON')
136 return result
138 def _extract_formats(self, stream_response, display_id=None):
139 requested_formats = self._configuration_arg('format') or ['adaptive_hls']
140 available_formats = {}
141 for stream_type, streams in traverse_obj(
142 stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
143 if stream_type not in requested_formats:
144 continue
145 for stream in traverse_obj(streams, lambda _, v: v['url']):
146 hardsub_lang = stream.get('hardsub_locale') or ''
147 format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
148 available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
150 requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
151 if '' in available_formats and 'all' not in requested_hardsubs:
152 full_format_langs = set(requested_hardsubs)
153 self.to_screen(
154 'To get all formats of a hardsub language, use '
155 '"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
156 'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
157 only_once=True)
158 else:
159 full_format_langs = set(map(str.lower, available_formats))
161 audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
162 hardsub_preference = qualities(requested_hardsubs[::-1])
163 formats = []
164 for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
165 if stream_type.endswith('hls'):
166 if hardsub_lang.lower() in full_format_langs:
167 adaptive_formats = self._extract_m3u8_formats(
168 stream_url, display_id, 'mp4', m3u8_id=format_id,
169 fatal=False, note=f'Downloading {format_id} HLS manifest')
170 else:
171 adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
172 elif stream_type.endswith('dash'):
173 adaptive_formats = self._extract_mpd_formats(
174 stream_url, display_id, mpd_id=format_id,
175 fatal=False, note=f'Downloading {format_id} MPD manifest')
176 else:
177 self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
178 continue
179 for f in adaptive_formats:
180 if f.get('acodec') != 'none':
181 f['language'] = audio_locale
182 f['quality'] = hardsub_preference(hardsub_lang.lower())
183 formats.extend(adaptive_formats)
185 return formats
187 def _extract_subtitles(self, data):
188 subtitles = {}
190 for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
191 subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
193 return subtitles
196 class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
197 _API_ENDPOINT = 'cms'
198 _CMS_EXPIRY = None
200 def _call_cms_api_signed(self, path, internal_id, lang, note='api'):
201 if not CrunchyrollCmsBaseIE._CMS_EXPIRY or CrunchyrollCmsBaseIE._CMS_EXPIRY <= time_seconds():
202 response = self._call_base_api('index/v2', None, lang, 'Retrieving signed policy')['cms_web']
203 CrunchyrollCmsBaseIE._CMS_QUERY = {
204 'Policy': response['policy'],
205 'Signature': response['signature'],
206 'Key-Pair-Id': response['key_pair_id'],
208 CrunchyrollCmsBaseIE._CMS_BUCKET = response['bucket']
209 CrunchyrollCmsBaseIE._CMS_EXPIRY = parse_iso8601(response['expires']) - 10
211 if not path.startswith('/cms/v2'):
212 path = f'/cms/v2{CrunchyrollCmsBaseIE._CMS_BUCKET}/{path}'
214 return self._call_base_api(
215 path, internal_id, lang, f'Downloading {note} JSON (signed cms)', query=CrunchyrollCmsBaseIE._CMS_QUERY)
218 class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
219 IE_NAME = 'crunchyroll'
220 _VALID_URL = r'''(?x)
221 https?://(?:beta\.|www\.)?crunchyroll\.com/
222 (?:(?P<lang>\w{2}(?:-\w{2})?)/)?
223 watch/(?!concert|musicvideo)(?P<id>\w+)'''
224 _TESTS = [{
225 # Premium only
226 'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
227 'info_dict': {
228 'id': 'GY2P1Q98Y',
229 'ext': 'mp4',
230 'duration': 1380.241,
231 'timestamp': 1459632600,
232 'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
233 'title': 'World Trigger Episode 73 – To the Future',
234 'upload_date': '20160402',
235 'series': 'World Trigger',
236 'series_id': 'GR757DMKY',
237 'season': 'World Trigger',
238 'season_id': 'GR9P39NJ6',
239 'season_number': 1,
240 'episode': 'To the Future',
241 'episode_number': 73,
242 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
243 'chapters': 'count:2',
244 'age_limit': 14,
245 'like_count': int,
246 'dislike_count': int,
248 'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
249 }, {
250 # Premium only
251 'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
252 'info_dict': {
253 'id': 'GYE5WKQGR',
254 'ext': 'mp4',
255 'duration': 366.459,
256 'timestamp': 1476788400,
257 'description': 'md5:74b67283ffddd75f6e224ca7dc031e76',
258 'title': 'SHELTER – Porter Robinson presents Shelter the Animation',
259 'upload_date': '20161018',
260 'series': 'SHELTER',
261 'series_id': 'GYGG09WWY',
262 'season': 'SHELTER',
263 'season_id': 'GR09MGK4R',
264 'season_number': 1,
265 'episode': 'Porter Robinson presents Shelter the Animation',
266 'episode_number': 0,
267 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
268 'age_limit': 14,
269 'like_count': int,
270 'dislike_count': int,
272 'params': {'skip_download': True},
273 }, {
274 'url': 'https://www.crunchyroll.com/watch/GJWU2VKK3/cherry-blossom-meeting-and-a-coming-blizzard',
275 'info_dict': {
276 'id': 'GJWU2VKK3',
277 'ext': 'mp4',
278 'duration': 1420.054,
279 'description': 'md5:2d1c67c0ec6ae514d9c30b0b99a625cd',
280 'title': 'The Ice Guy and His Cool Female Colleague Episode 1 – Cherry Blossom Meeting and a Coming Blizzard',
281 'series': 'The Ice Guy and His Cool Female Colleague',
282 'series_id': 'GW4HM75NP',
283 'season': 'The Ice Guy and His Cool Female Colleague',
284 'season_id': 'GY9PC21VE',
285 'season_number': 1,
286 'episode': 'Cherry Blossom Meeting and a Coming Blizzard',
287 'episode_number': 1,
288 'chapters': 'count:2',
289 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
290 'timestamp': 1672839000,
291 'upload_date': '20230104',
292 'age_limit': 14,
293 'like_count': int,
294 'dislike_count': int,
296 'params': {'skip_download': 'm3u8'},
297 }, {
298 'url': 'https://www.crunchyroll.com/watch/GM8F313NQ',
299 'info_dict': {
300 'id': 'GM8F313NQ',
301 'ext': 'mp4',
302 'title': 'Garakowa -Restore the World-',
303 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
304 'duration': 3996.104,
305 'age_limit': 13,
306 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
308 'params': {'skip_download': 'm3u8'},
309 }, {
310 'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
311 'info_dict': {
312 'id': 'G62PEZ2E6',
313 'description': 'md5:8d2f8b6b9dd77d87810882e7d2ee5608',
314 'age_limit': 13,
315 'duration': 65.138,
316 'title': 'Garakowa -Restore the World-',
318 'playlist_mincount': 5,
319 }, {
320 'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
321 'only_matching': True,
322 }, {
323 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
324 'only_matching': True,
326 # We want to support lazy playlist filtering and movie listings cannot be inside a playlist
327 _RETURN_TYPE = 'video'
329 def _real_extract(self, url):
330 lang, internal_id = self._match_valid_url(url).group('lang', 'id')
332 # We need to use unsigned API call to allow ratings query string
333 response = traverse_obj(self._call_api(
334 f'objects/{internal_id}', internal_id, lang, 'object info', {'ratings': 'true'}), ('data', 0, {dict}))
335 if not response:
336 raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
338 object_type = response.get('type')
339 if object_type == 'episode':
340 result = self._transform_episode_response(response)
342 elif object_type == 'movie':
343 result = self._transform_movie_response(response)
345 elif object_type == 'movie_listing':
346 first_movie_id = traverse_obj(response, ('movie_listing_metadata', 'first_movie_id'))
347 if not self._yes_playlist(internal_id, first_movie_id):
348 return self.url_result(f'{self._BASE_URL}/{lang}watch/{first_movie_id}', CrunchyrollBetaIE, first_movie_id)
350 def entries():
351 movies = self._call_api(f'movie_listings/{internal_id}/movies', internal_id, lang, 'movie list')
352 for movie_response in traverse_obj(movies, ('data', ...)):
353 yield self.url_result(
354 f'{self._BASE_URL}/{lang}watch/{movie_response["id"]}',
355 CrunchyrollBetaIE, **self._transform_movie_response(movie_response))
357 return self.playlist_result(entries(), **self._transform_movie_response(response))
359 else:
360 raise ExtractorError(f'Unknown object type {object_type}')
362 # There might be multiple audio languages for one object (`<object>_metadata.versions`),
363 # so we need to get the id from `streams_link` instead or we dont know which language to choose
364 streams_link = response.get('streams_link')
365 if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
366 message = f'This {object_type} is for premium members only'
367 if self.is_logged_in:
368 raise ExtractorError(message, expected=True)
369 self.raise_login_required(message)
371 # We need go from unsigned to signed api to avoid getting soft banned
372 stream_response = self._call_cms_api_signed(remove_start(
373 streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
374 result['formats'] = self._extract_formats(stream_response, internal_id)
375 result['subtitles'] = self._extract_subtitles(stream_response)
377 # if no intro chapter is available, a 403 without usable data is returned
378 intro_chapter = self._download_json(
379 f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
380 internal_id, note='Downloading chapter info', fatal=False, errnote=False)
381 if isinstance(intro_chapter, dict):
382 result['chapters'] = [{
383 'title': 'Intro',
384 'start_time': float_or_none(intro_chapter.get('startTime')),
385 'end_time': float_or_none(intro_chapter.get('endTime')),
388 def calculate_count(item):
389 return parse_count(''.join((item['displayed'], item.get('unit') or '')))
391 result.update(traverse_obj(response, ('rating', {
392 'like_count': ('up', {calculate_count}),
393 'dislike_count': ('down', {calculate_count}),
394 })))
396 return result
398 @staticmethod
399 def _transform_episode_response(data):
400 metadata = traverse_obj(data, (('episode_metadata', None), {dict}), get_all=False) or {}
401 return {
402 'id': data['id'],
403 'title': ' \u2013 '.join((
404 ('%s%s' % (
405 format_field(metadata, 'season_title'),
406 format_field(metadata, 'episode', ' Episode %s'))),
407 format_field(data, 'title'))),
408 **traverse_obj(data, {
409 'episode': ('title', {str}),
410 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
411 'thumbnails': ('images', 'thumbnail', ..., ..., {
412 'url': ('source', {url_or_none}),
413 'width': ('width', {int_or_none}),
414 'height': ('height', {int_or_none}),
417 **traverse_obj(metadata, {
418 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
419 'timestamp': ('upload_date', {parse_iso8601}),
420 'series': ('series_title', {str}),
421 'series_id': ('series_id', {str}),
422 'season': ('season_title', {str}),
423 'season_id': ('season_id', {str}),
424 'season_number': ('season_number', ({int}, {float_or_none})),
425 'episode_number': ('sequence_number', ({int}, {float_or_none})),
426 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
427 'language': ('audio_locale', {str}),
428 }, get_all=False),
431 @staticmethod
432 def _transform_movie_response(data):
433 metadata = traverse_obj(data, (('movie_metadata', 'movie_listing_metadata', None), {dict}), get_all=False) or {}
434 return {
435 'id': data['id'],
436 **traverse_obj(data, {
437 'title': ('title', {str}),
438 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
439 'thumbnails': ('images', 'thumbnail', ..., ..., {
440 'url': ('source', {url_or_none}),
441 'width': ('width', {int_or_none}),
442 'height': ('height', {int_or_none}),
445 **traverse_obj(metadata, {
446 'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}),
447 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
452 class CrunchyrollBetaShowIE(CrunchyrollCmsBaseIE):
453 IE_NAME = 'crunchyroll:playlist'
454 _VALID_URL = r'''(?x)
455 https?://(?:beta\.|www\.)?crunchyroll\.com/
456 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
457 series/(?P<id>\w+)'''
458 _TESTS = [{
459 'url': 'https://www.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
460 'info_dict': {
461 'id': 'GY19NQ2QR',
462 'title': 'Girl Friend BETA',
463 'description': 'md5:99c1b22ee30a74b536a8277ced8eb750',
464 # XXX: `thumbnail` does not get set from `thumbnails` in playlist
465 # 'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
466 'age_limit': 14,
468 'playlist_mincount': 10,
469 }, {
470 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR',
471 'only_matching': True,
474 def _real_extract(self, url):
475 lang, internal_id = self._match_valid_url(url).group('lang', 'id')
477 def entries():
478 seasons_response = self._call_cms_api_signed(f'seasons?series_id={internal_id}', internal_id, lang, 'seasons')
479 for season in traverse_obj(seasons_response, ('items', ..., {dict})):
480 episodes_response = self._call_cms_api_signed(
481 f'episodes?season_id={season["id"]}', season["id"], lang, 'episode list')
482 for episode_response in traverse_obj(episodes_response, ('items', ..., {dict})):
483 yield self.url_result(
484 f'{self._BASE_URL}/{lang}watch/{episode_response["id"]}',
485 CrunchyrollBetaIE, **CrunchyrollBetaIE._transform_episode_response(episode_response))
487 return self.playlist_result(
488 entries(), internal_id,
489 **traverse_obj(self._call_api(f'series/{internal_id}', internal_id, lang, 'series'), ('data', 0, {
490 'title': ('title', {str}),
491 'description': ('description', {lambda x: x.replace(r'\r\n', '\n')}),
492 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
493 'thumbnails': ('images', ..., ..., ..., {
494 'url': ('source', {url_or_none}),
495 'width': ('width', {int_or_none}),
496 'height': ('height', {int_or_none}),
498 })))
501 class CrunchyrollMusicIE(CrunchyrollBaseIE):
502 IE_NAME = 'crunchyroll:music'
503 _VALID_URL = r'''(?x)
504 https?://(?:www\.)?crunchyroll\.com/
505 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
506 watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
507 _TESTS = [{
508 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
509 'info_dict': {
510 'ext': 'mp4',
511 'id': 'MV5B02C79',
512 'display_id': 'egaono-hana',
513 'title': 'Egaono Hana',
514 'track': 'Egaono Hana',
515 'artist': 'Goose house',
516 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
517 'genre': ['J-Pop'],
519 'params': {'skip_download': 'm3u8'},
520 }, {
521 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
522 'info_dict': {
523 'ext': 'mp4',
524 'id': 'MV88BB7F2C',
525 'display_id': 'crossing-field',
526 'title': 'Crossing Field',
527 'track': 'Crossing Field',
528 'artist': 'LiSA',
529 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
530 'genre': ['Anime'],
532 'params': {'skip_download': 'm3u8'},
533 }, {
534 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
535 'info_dict': {
536 'ext': 'mp4',
537 'id': 'MC2E2AC135',
538 'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
539 'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
540 'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
541 'artist': 'LiSA',
542 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
543 'description': 'md5:747444e7e6300907b7a43f0a0503072e',
544 'genre': ['J-Pop'],
546 'params': {'skip_download': 'm3u8'},
547 }, {
548 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
549 'only_matching': True,
550 }, {
551 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
552 'only_matching': True,
553 }, {
554 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
555 'only_matching': True,
557 _API_ENDPOINT = 'music'
559 def _real_extract(self, url):
560 lang, internal_id, object_type = self._match_valid_url(url).group('lang', 'id', 'type')
561 path, name = {
562 'concert': ('concerts', 'concert info'),
563 'musicvideo': ('music_videos', 'music video info'),
564 }[object_type]
565 response = traverse_obj(self._call_api(f'{path}/{internal_id}', internal_id, lang, name), ('data', 0, {dict}))
566 if not response:
567 raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
569 streams_link = response.get('streams_link')
570 if not streams_link and response.get('isPremiumOnly'):
571 message = f'This {response.get("type") or "media"} is for premium members only'
572 if self.is_logged_in:
573 raise ExtractorError(message, expected=True)
574 self.raise_login_required(message)
576 result = self._transform_music_response(response)
577 stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
578 result['formats'] = self._extract_formats(stream_response, internal_id)
580 return result
582 @staticmethod
583 def _transform_music_response(data):
584 return {
585 'id': data['id'],
586 **traverse_obj(data, {
587 'display_id': 'slug',
588 'title': 'title',
589 'track': 'title',
590 'artist': ('artist', 'name'),
591 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
592 'thumbnails': ('images', ..., ..., {
593 'url': ('source', {url_or_none}),
594 'width': ('width', {int_or_none}),
595 'height': ('height', {int_or_none}),
597 'genre': ('genres', ..., 'displayValue'),
598 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
603 class CrunchyrollArtistIE(CrunchyrollBaseIE):
604 IE_NAME = 'crunchyroll:artist'
605 _VALID_URL = r'''(?x)
606 https?://(?:www\.)?crunchyroll\.com/
607 (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
608 artist/(?P<id>\w{10})'''
609 _TESTS = [{
610 'url': 'https://www.crunchyroll.com/artist/MA179CB50D',
611 'info_dict': {
612 'id': 'MA179CB50D',
613 'title': 'LiSA',
614 'genre': ['J-Pop', 'Anime', 'Rock'],
615 'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
617 'playlist_mincount': 83,
618 }, {
619 'url': 'https://www.crunchyroll.com/artist/MA179CB50D/lisa',
620 'only_matching': True,
622 _API_ENDPOINT = 'music'
624 def _real_extract(self, url):
625 lang, internal_id = self._match_valid_url(url).group('lang', 'id')
626 response = traverse_obj(self._call_api(
627 f'artists/{internal_id}', internal_id, lang, 'artist info'), ('data', 0))
629 def entries():
630 for attribute, path in [('concerts', 'concert'), ('videos', 'musicvideo')]:
631 for internal_id in traverse_obj(response, (attribute, ...)):
632 yield self.url_result(f'{self._BASE_URL}/watch/{path}/{internal_id}', CrunchyrollMusicIE, internal_id)
634 return self.playlist_result(entries(), **self._transform_artist_response(response))
636 @staticmethod
637 def _transform_artist_response(data):
638 return {
639 'id': data['id'],
640 **traverse_obj(data, {
641 'title': 'name',
642 'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n')}),
643 'thumbnails': ('images', ..., ..., {
644 'url': ('source', {url_or_none}),
645 'width': ('width', {int_or_none}),
646 'height': ('height', {int_or_none}),
648 'genre': ('genres', ..., 'displayValue'),