[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / bandlab.py
blob64aa2ba70d79a5d82082de54d33aec7b3734df50
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 float_or_none,
5 format_field,
6 int_or_none,
7 parse_iso8601,
8 parse_qs,
9 truncate_string,
10 url_or_none,
12 from ..utils.traversal import traverse_obj, value
15 class BandlabBaseIE(InfoExtractor):
16 def _call_api(self, endpoint, asset_id, **kwargs):
17 headers = kwargs.pop('headers', None) or {}
18 return self._download_json(
19 f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
20 asset_id, headers={
21 'accept': 'application/json',
22 'referer': 'https://www.bandlab.com/',
23 'x-client-id': 'BandLab-Web',
24 'x-client-version': '10.1.124',
25 **headers,
26 }, **kwargs)
28 def _parse_revision(self, revision_data, url=None):
29 return {
30 'vcodec': 'none',
31 'media_type': 'revision',
32 'extractor_key': BandlabIE.ie_key(),
33 'extractor': BandlabIE.IE_NAME,
34 **traverse_obj(revision_data, {
35 'webpage_url': (
36 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
37 'id': (('revisionId', 'id'), {str}, any),
38 'title': ('song', 'name', {str}),
39 'track': ('song', 'name', {str}),
40 'url': ('mixdown', 'file', {url_or_none}),
41 'thumbnail': ('song', 'picture', 'url', {url_or_none}),
42 'description': ('description', {str}),
43 'uploader': ('creator', 'name', {str}),
44 'uploader_id': ('creator', 'username', {str}),
45 'timestamp': ('createdOn', {parse_iso8601}),
46 'duration': ('mixdown', 'duration', {float_or_none}),
47 'view_count': ('counters', 'plays', {int_or_none}),
48 'like_count': ('counters', 'likes', {int_or_none}),
49 'comment_count': ('counters', 'comments', {int_or_none}),
50 'genres': ('genres', ..., 'name', {str}),
51 }),
54 def _parse_track(self, track_data, url=None):
55 return {
56 'vcodec': 'none',
57 'media_type': 'track',
58 'extractor_key': BandlabIE.ie_key(),
59 'extractor': BandlabIE.IE_NAME,
60 **traverse_obj(track_data, {
61 'webpage_url': (
62 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
63 'id': (('revisionId', 'id'), {str}, any),
64 'url': ('track', 'sample', 'audioUrl', {url_or_none}),
65 'title': ('track', 'name', {str}),
66 'track': ('track', 'name', {str}),
67 'description': ('caption', {str}),
68 'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
69 'view_count': ('counters', 'plays', {int_or_none}),
70 'like_count': ('counters', 'likes', {int_or_none}),
71 'comment_count': ('counters', 'comments', {int_or_none}),
72 'duration': ('track', 'sample', 'duration', {float_or_none}),
73 'uploader': ('creator', 'name', {str}),
74 'uploader_id': ('creator', 'username', {str}),
75 'timestamp': ('createdOn', {parse_iso8601}),
76 }),
79 def _parse_video(self, video_data, url=None):
80 return {
81 'media_type': 'video',
82 'extractor_key': BandlabIE.ie_key(),
83 'extractor': BandlabIE.IE_NAME,
84 **traverse_obj(video_data, {
85 'id': ('id', {str}),
86 'webpage_url': (
87 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
88 'url': ('video', 'url', {url_or_none}),
89 'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
90 'description': ('caption', {str}),
91 'thumbnail': ('video', 'picture', 'url', {url_or_none}),
92 'view_count': ('video', 'counters', 'plays', {int_or_none}),
93 'like_count': ('video', 'counters', 'likes', {int_or_none}),
94 'comment_count': ('counters', 'comments', {int_or_none}),
95 'duration': ('video', 'duration', {float_or_none}),
96 'uploader': ('creator', 'name', {str}),
97 'uploader_id': ('creator', 'username', {str}),
98 }),
102 class BandlabIE(BandlabBaseIE):
103 _VALID_URL = [
104 r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
105 r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
107 _EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
108 _TESTS = [{
109 'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
110 'md5': '46f7b43367dd268bbcf0bbe466753b2c',
111 'info_dict': {
112 'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
113 'ext': 'm4a',
114 'uploader_id': 'ender_milze',
115 'track': 'sweet black',
116 'description': 'composed by juanjn3737',
117 'timestamp': 1702171963,
118 'view_count': int,
119 'like_count': int,
120 'duration': 54.629999999999995,
121 'title': 'sweet black',
122 'upload_date': '20231210',
123 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
124 'genres': ['Lofi'],
125 'uploader': 'ender milze',
126 'comment_count': int,
127 'media_type': 'revision',
129 }, {
130 # Same track as above but post URL
131 'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
132 'md5': '46f7b43367dd268bbcf0bbe466753b2c',
133 'info_dict': {
134 'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
135 'ext': 'm4a',
136 'uploader_id': 'ender_milze',
137 'track': 'sweet black',
138 'description': 'composed by juanjn3737',
139 'timestamp': 1702171973,
140 'view_count': int,
141 'like_count': int,
142 'duration': 54.629999999999995,
143 'title': 'sweet black',
144 'upload_date': '20231210',
145 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
146 'genres': ['Lofi'],
147 'uploader': 'ender milze',
148 'comment_count': int,
149 'media_type': 'revision',
151 }, {
152 # SharedKey Example
153 'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
154 'md5': '15174b57c44440e2a2008be9cae00250',
155 'info_dict': {
156 'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
157 'ext': 'm4a',
158 'comment_count': int,
159 'genres': ['Other'],
160 'uploader_id': 'user8353034818103753',
161 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
162 'timestamp': 1709625771,
163 'track': 'PodcastMaerchen4b',
164 'duration': 468.14,
165 'view_count': int,
166 'description': 'Podcast: Neues aus der Märchenwelt',
167 'like_count': int,
168 'upload_date': '20240305',
169 'uploader': 'Erna Wageneder',
170 'title': 'PodcastMaerchen4b',
171 'media_type': 'revision',
173 }, {
174 # Different Revision selected
175 'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
176 'md5': '74e055ef9325d63f37088772fbfe4454',
177 'info_dict': {
178 'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
179 'ext': 'm4a',
180 'timestamp': 1588273294,
181 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
182 'description': 'Final Revision.',
183 'title': 'Replay ( Instrumental)',
184 'uploader': 'David R Sparks',
185 'uploader_id': 'davesnothome69',
186 'view_count': int,
187 'comment_count': int,
188 'track': 'Replay ( Instrumental)',
189 'genres': ['Rock'],
190 'upload_date': '20200430',
191 'like_count': int,
192 'duration': 279.43,
193 'media_type': 'revision',
195 }, {
196 # Video
197 'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
198 'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
199 'info_dict': {
200 'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
201 'ext': 'mp4',
202 'duration': 44.705,
203 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
204 'comment_count': int,
205 'title': 'backing vocals',
206 'uploader_id': 'marliashya',
207 'uploader': 'auraa',
208 'like_count': int,
209 'description': 'backing vocals',
210 'media_type': 'video',
212 }, {
213 # Embed Example
214 'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
215 'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
216 'info_dict': {
217 'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
218 'ext': 'm4a',
219 'comment_count': int,
220 'genres': ['Electronic'],
221 'uploader': 'Charlie Henson',
222 'timestamp': 1587328674,
223 'upload_date': '20200419',
224 'view_count': int,
225 'track': 'Positronic Meltdown',
226 'duration': 318.55,
227 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
228 'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
229 'uploader_id': 'microfreaks',
230 'title': 'Positronic Meltdown',
231 'like_count': int,
232 'media_type': 'revision',
234 }, {
235 # Track without revisions available
236 'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
237 'md5': 'f05d68a3769952c2d9257c473e14c15f',
238 'info_dict': {
239 'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
240 'ext': 'm4a',
241 'track': 'insame',
242 'like_count': int,
243 'duration': 84.03,
244 'title': 'insame',
245 'view_count': int,
246 'comment_count': int,
247 'uploader': 'Sorakime',
248 'uploader_id': 'sorakime',
249 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
250 'timestamp': 1691162128,
251 'upload_date': '20230804',
252 'media_type': 'track',
254 }, {
255 'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
256 'only_matching': True,
258 _WEBPAGE_TESTS = [{
259 'url': 'https://phantomluigi.github.io/',
260 'info_dict': {
261 'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
262 'ext': 'm4a',
263 'view_count': int,
264 'upload_date': '20240913',
265 'uploader_id': 'phantommusicofficial',
266 'timestamp': 1726194897,
267 'uploader': 'Phantom',
268 'comment_count': int,
269 'genres': ['Progresive Rock'],
270 'description': 'md5:a38cd668f7a2843295ef284114f18429',
271 'duration': 225.23,
272 'like_count': int,
273 'title': 'Vermilion Pt. 2 (Cover)',
274 'track': 'Vermilion Pt. 2 (Cover)',
275 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
276 'media_type': 'revision',
280 def _real_extract(self, url):
281 display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
283 qs = parse_qs(url)
284 revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
285 if url_type == 'revision':
286 revision_id = display_id
288 revision_data = None
289 if not revision_id:
290 post_data = self._call_api(
291 'posts', display_id, note='Downloading post data',
292 query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
294 revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
295 revision_data = traverse_obj(post_data, ('revision', {dict}))
297 if not revision_data and not revision_id:
298 post_type = post_data.get('type')
299 if post_type == 'Video':
300 return self._parse_video(post_data, url=url)
301 if post_type == 'Track':
302 return self._parse_track(post_data, url=url)
303 raise ExtractorError(f'Could not extract data for post type {post_type!r}')
305 if not revision_data:
306 revision_data = self._call_api(
307 'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
309 return self._parse_revision(revision_data, url=url)
312 class BandlabPlaylistIE(BandlabBaseIE):
313 _VALID_URL = [
314 r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
315 r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
317 _EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
318 _TESTS = [{
319 'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
320 'info_dict': {
321 'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
322 'release_date': '20221003',
323 'title': 'Remnants',
324 'album': 'Remnants',
325 'like_count': int,
326 'album_type': 'LP',
327 'description': 'A collection of some feel good, rock hits.',
328 'comment_count': int,
329 'view_count': int,
330 'id': '89b79ea6-de42-ed11-b495-00224845aac7',
331 'uploader': 'David R Sparks',
332 'uploader_id': 'davesnothome69',
334 'playlist_count': 10,
335 }, {
336 'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
337 'info_dict': {
338 'id': '955102d4-1040-ef11-86c3-000d3a42581b',
339 'timestamp': 1720762659,
340 'view_count': int,
341 'title': 'My Shit 🖤',
342 'uploader_id': 'slytheband',
343 'uploader': '𝓢𝓛𝓨',
344 'upload_date': '20240712',
345 'like_count': int,
346 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
348 'playlist_count': 15,
349 }, {
350 # Embeds can contain both albums and collections with the same URL pattern. This is an album
351 'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
352 'info_dict': {
353 'id': '12cc6f7f-951b-ee11-907c-00224844f303',
354 'release_date': '20230706',
355 'description': 'This is a collection of songs I created when I had an Amiga computer.',
356 'view_count': int,
357 'title': 'Mark Salud The Amiga Collection',
358 'uploader_id': 'mssirmooth1962',
359 'comment_count': int,
360 'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
361 'like_count': int,
362 'uploader': 'Mark Salud',
363 'album': 'Mark Salud The Amiga Collection',
364 'album_type': 'LP',
366 'playlist_count': 24,
367 }, {
368 # Tracks without revision id
369 'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
370 'info_dict': {
371 'like_count': int,
372 'uploader_id': 'sorakime',
373 'comment_count': int,
374 'uploader': 'Sorakime',
375 'view_count': int,
376 'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
377 'release_date': '20230812',
378 'title': 'Art',
379 'album': 'Art',
380 'album_type': 'Album',
381 'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
382 'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
384 'playlist_count': 13,
385 }, {
386 'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
387 'only_matching': True,
390 def _entries(self, album_data):
391 for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
392 post_type = post['type']
393 if post_type == 'Revision':
394 yield self._parse_revision(post.get('revision'))
395 elif post_type == 'Track':
396 yield self._parse_track(post)
397 elif post_type == 'Video':
398 yield self._parse_video(post)
399 else:
400 self.report_warning(f'Skipping unknown post type: "{post_type}"')
402 def _real_extract(self, url):
403 playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
405 endpoints = {
406 'albums': ['albums'],
407 'collections': ['collections'],
408 'embed': ['collections', 'albums'],
409 }.get(playlist_type)
410 for endpoint in endpoints:
411 playlist_data = self._call_api(
412 endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
413 fatal=False, expected_status=404)
414 if not playlist_data.get('errorCode'):
415 playlist_type = endpoint
416 break
417 if error_code := playlist_data.get('errorCode'):
418 raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
420 return self.playlist_result(
421 self._entries(playlist_data), playlist_id,
422 **traverse_obj(playlist_data, {
423 'title': ('name', {str}),
424 'description': ('description', {str}),
425 'uploader': ('creator', 'name', {str}),
426 'uploader_id': ('creator', 'username', {str}),
427 'timestamp': ('createdOn', {parse_iso8601}),
428 'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
429 'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
430 'like_count': ('counters', 'likes', {int_or_none}),
431 'comment_count': ('counters', 'comments', {int_or_none}),
432 'view_count': ('counters', 'plays', {int_or_none}),
434 **(traverse_obj(playlist_data, {
435 'album': ('name', {str}),
436 'album_type': ('type', {str}),
437 }) if playlist_type == 'albums' else {}))