5 from .common
import InfoExtractor
22 class BandcampIE(InfoExtractor
):
23 _VALID_URL
= r
'https?://(?P<uploader>[^/]+)\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
24 _EMBED_REGEX
= [r
'<meta property="og:url"[^>]*?content="(?P<url>.*?bandcamp\.com.*?)"']
26 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
27 'md5': 'c557841d5e50261777a6585648adf439',
31 'title': 'youtube-dl "\'/\\ä↭ - youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
33 'uploader': 'youtube-dl "\'/\\ä↭',
34 'upload_date': '20121129',
35 'timestamp': 1354224127,
36 'track': 'youtube-dl "\'/\\ä↭ - youtube-dl test song "\'/\\ä↭',
37 'album_artist': 'youtube-dl "\'/\\ä↭',
38 'track_id': '1812978515',
39 'artist': 'youtube-dl "\'/\\ä↭',
40 'uploader_url': 'https://youtube-dl.bandcamp.com',
41 'uploader_id': 'youtube-dl',
42 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
44 'skip': 'There is a limit of 200 free downloads / month for the test song',
47 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
51 'acodec': r
're:[fa]lac',
52 'title': 'Ben Prunty - Lanius (Battle)',
53 'thumbnail': r
're:^https?://.*\.jpg$',
54 'uploader': 'Ben Prunty',
55 'timestamp': 1396508491,
56 'upload_date': '20140403',
57 'release_timestamp': 1396483200,
58 'release_date': '20140403',
60 'track': 'Lanius (Battle)',
62 'track_id': '2650410135',
63 'artist': 'Ben Prunty',
64 'album_artist': 'Ben Prunty',
65 'album': 'FTL: Advanced Edition Soundtrack',
66 'uploader_url': 'https://benprunty.bandcamp.com',
67 'uploader_id': 'benprunty',
70 # no free download, mp3 128
71 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
72 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
76 'title': 'Mastodon - Hail to Fire',
77 'thumbnail': r
're:^https?://.*\.jpg$',
78 'uploader': 'Mastodon',
79 'timestamp': 1322005399,
80 'upload_date': '20111122',
81 'release_timestamp': 1076112000,
82 'release_date': '20040207',
84 'track': 'Hail to Fire',
86 'track_id': '2584466013',
88 'album_artist': 'Mastodon',
89 'album': 'Call of the Mastodon',
90 'uploader_url': 'https://relapsealumni.bandcamp.com',
91 'uploader_id': 'relapsealumni',
94 # track from compilation album (artist/album_artist difference)
95 'url': 'https://diskotopia.bandcamp.com/track/safehouse',
96 'md5': '19c5337bca1428afa54129f86a2f6a69',
100 'title': 'submerse - submerse - Safehouse',
101 'thumbnail': r
're:^https?://.*\.jpg$',
102 'uploader': 'submerse',
103 'timestamp': 1480779297,
104 'upload_date': '20161203',
105 'release_timestamp': 1481068800,
106 'release_date': '20161207',
108 'track': 'submerse - Safehouse',
110 'track_id': '1978174799',
111 'artist': 'submerse',
112 'album_artist': 'Diskotopia',
113 'album': 'DSK F/W 2016-2017 Free Compilation',
114 'uploader_url': 'https://diskotopia.bandcamp.com',
115 'uploader_id': 'diskotopia',
119 def _extract_data_attr(self
, webpage
, video_id
, attr
='tralbum', fatal
=True):
120 return self
._parse
_json
(self
._html
_search
_regex
(
121 rf
'data-{attr}=(["\'])({{.+?}})\1', webpage
,
122 attr
+ ' data', group
=2), video_id
, fatal
=fatal
)
124 def _real_extract(self
, url
):
125 title
, uploader
= self
._match
_valid
_url
(url
).group('id', 'uploader')
126 webpage
= self
._download
_webpage
(url
, title
)
127 tralbum
= self
._extract
_data
_attr
(webpage
, title
)
128 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
136 track_info
= try_get(tralbum
, lambda x
: x
['trackinfo'][0], dict)
138 file_
= track_info
.get('file')
139 if isinstance(file_
, dict):
140 for format_id
, format_url
in file_
.items():
141 if not url_or_none(format_url
):
143 ext
, abr_str
= format_id
.split('-', 1)
145 'format_id': format_id
,
146 'url': self
._proto
_relative
_url
(format_url
, 'http:'),
150 'abr': int_or_none(abr_str
),
152 track
= track_info
.get('title')
153 track_id
= str_or_none(
154 track_info
.get('track_id') or track_info
.get('id'))
155 track_number
= int_or_none(track_info
.get('track_num'))
156 duration
= float_or_none(track_info
.get('duration'))
158 embed
= self
._extract
_data
_attr
(webpage
, title
, 'embed', False)
159 current
= tralbum
.get('current') or {}
160 artist
= embed
.get('artist') or current
.get('artist') or tralbum
.get('artist')
161 album_artist
= self
._html
_search
_regex
(
162 r
'<h3 class="albumTitle">[\S\s]*?by\s*<span>\s*<a href="[^>]+">\s*([^>]+?)\s*</a>',
163 webpage
, 'album artist', fatal
=False)
164 timestamp
= unified_timestamp(
165 current
.get('publish_date') or tralbum
.get('album_publish_date'))
167 download_link
= tralbum
.get('freeDownloadPage')
169 track_id
= str(tralbum
['id'])
171 download_webpage
= self
._download
_webpage
(
172 download_link
, track_id
, 'Downloading free downloads page')
174 blob
= self
._extract
_data
_attr
(download_webpage
, track_id
, 'blob')
177 blob
, (lambda x
: x
['digital_items'][0],
178 lambda x
: x
['download_items'][0]), dict)
180 downloads
= info
.get('downloads')
181 if isinstance(downloads
, dict):
183 track
= info
.get('title')
185 artist
= info
.get('artist')
187 thumbnail
= info
.get('thumb_url')
189 download_formats
= {}
190 download_formats_list
= blob
.get('download_formats')
191 if isinstance(download_formats_list
, list):
192 for f
in blob
['download_formats']:
193 name
, ext
= f
.get('name'), f
.get('file_extension')
194 if all(isinstance(x
, str) for x
in (name
, ext
)):
195 download_formats
[name
] = ext
.strip('.')
197 for format_id
, f
in downloads
.items():
198 format_url
= f
.get('url')
201 # Stat URL generation algorithm is reverse engineered from
202 # download_*_bundle_*.js
203 stat_url
= update_url_query(
204 format_url
.replace('/download/', '/statdownload/'), {
205 '.rand': int(time
.time() * 1000 * random
.random()),
207 format_id
= f
.get('encoding_name') or format_id
208 stat
= self
._download
_json
(
209 stat_url
, track_id
, f
'Downloading {format_id} JSON',
210 transform_source
=lambda s
: s
[s
.index('{'):s
.rindex('}') + 1],
214 retry_url
= url_or_none(stat
.get('retry_url'))
218 'url': self
._proto
_relative
_url
(retry_url
, 'http:'),
219 'ext': download_formats
.get(format_id
),
220 'format_id': format_id
,
221 'format_note': f
.get('description'),
222 'filesize': parse_filesize(f
.get('size_mb')),
224 'acodec': format_id
.split('-')[0],
227 title
= f
'{artist} - {track}' if artist
else track
230 duration
= float_or_none(self
._html
_search
_meta
(
231 'duration', webpage
, default
=None))
236 'thumbnail': thumbnail
,
238 'uploader_id': uploader
,
239 'uploader_url': f
'https://{uploader}.bandcamp.com',
240 'timestamp': timestamp
,
241 'release_timestamp': unified_timestamp(tralbum
.get('album_release_date')),
242 'duration': duration
,
244 'track_number': track_number
,
245 'track_id': track_id
,
247 'album': embed
.get('album_title'),
248 'album_artist': album_artist
,
253 class BandcampAlbumIE(BandcampIE
): # XXX: Do not subclass from concrete IE
254 IE_NAME
= 'Bandcamp:album'
255 _VALID_URL
= r
'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
258 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
261 'md5': '39bc1eded3476e927c724321ddf116cf',
265 'title': 'Blazo - Intro',
266 'timestamp': 1311756226,
267 'upload_date': '20110727',
272 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
276 'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
277 'timestamp': 1311757238,
278 'upload_date': '20110727',
284 'title': 'Jazz Format Mixtape vol.1',
285 'id': 'jazz-format-mixtape-vol-1',
286 'uploader_id': 'blazo',
291 'skip': 'Bandcamp imposes download limits.',
293 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
295 'title': 'Hierophany of the Open Grave',
296 'uploader_id': 'nightbringer',
297 'id': 'hierophany-of-the-open-grave',
299 'playlist_mincount': 9,
301 # with escaped quote in title
302 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
304 'title': '"Entropy" EP',
305 'uploader_id': 'jstrecords',
307 'description': 'md5:0ff22959c943622972596062f2f366a5',
309 'playlist_mincount': 3,
311 # not all tracks have songs
312 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
314 'id': 'we-are-the-plague',
315 'title': 'WE ARE THE PLAGUE',
316 'uploader_id': 'insulters',
317 'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
323 def suitable(cls
, url
):
325 if BandcampWeeklyIE
.suitable(url
) or BandcampIE
.suitable(url
)
326 else super().suitable(url
))
328 def _real_extract(self
, url
):
329 uploader_id
, album_id
= self
._match
_valid
_url
(url
).groups()
330 playlist_id
= album_id
or uploader_id
331 webpage
= self
._download
_webpage
(url
, playlist_id
)
332 tralbum
= self
._extract
_data
_attr
(webpage
, playlist_id
)
333 track_info
= tralbum
.get('trackinfo')
335 raise ExtractorError('The page doesn\'t contain any tracks')
336 # Only tracks with duration info have songs
339 urljoin(url
, t
['title_link']), BandcampIE
.ie_key(),
340 str_or_none(t
.get('track_id') or t
.get('id')), t
.get('title'))
342 if t
.get('duration')]
344 current
= tralbum
.get('current') or {}
348 'uploader_id': uploader_id
,
350 'title': current
.get('title'),
351 'description': current
.get('about'),
356 class BandcampWeeklyIE(BandcampIE
): # XXX: Do not subclass from concrete IE
357 IE_NAME
= 'Bandcamp:weekly'
358 _VALID_URL
= r
'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
360 'url': 'https://bandcamp.com/?show=224',
361 'md5': 'b00df799c733cf7e0c567ed187dea0fd',
365 'title': 'BC Weekly April 4th 2017 - Magic Moments',
366 'description': 'md5:5d48150916e8e02d030623a48512c874',
368 'release_date': '20170404',
369 'series': 'Bandcamp Weekly',
370 'episode': 'Magic Moments',
377 'url': 'https://bandcamp.com/?blah/blah@&show=228',
378 'only_matching': True,
381 def _real_extract(self
, url
):
382 show_id
= self
._match
_id
(url
)
383 webpage
= self
._download
_webpage
(url
, show_id
)
385 blob
= self
._extract
_data
_attr
(webpage
, show_id
, 'blob')
387 show
= blob
['bcw_data'][show_id
]
390 for format_id
, format_url
in show
['audio_stream'].items():
391 if not url_or_none(format_url
):
393 for known_ext
in KNOWN_EXTENSIONS
:
394 if known_ext
in format_id
:
400 'format_id': format_id
,
406 title
= show
.get('audio_title') or 'Bandcamp Weekly'
407 subtitle
= show
.get('subtitle')
409 title
+= f
' - {subtitle}'
414 'description': show
.get('desc') or show
.get('short_desc'),
415 'duration': float_or_none(show
.get('audio_duration')),
417 'release_date': unified_strdate(show
.get('published_date')),
418 'series': 'Bandcamp Weekly',
419 'episode': show
.get('subtitle'),
420 'episode_id': show_id
,
425 class BandcampUserIE(InfoExtractor
):
426 IE_NAME
= 'Bandcamp:user'
427 _VALID_URL
= r
'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)'
430 # Type 1 Bandcamp user page.
431 'url': 'https://adrianvonziegler.bandcamp.com',
433 'id': 'adrianvonziegler',
434 'title': 'Discography of adrianvonziegler',
436 'playlist_mincount': 23,
438 # Bandcamp user page with only one album
439 'url': 'http://dotscale.bandcamp.com',
442 'title': 'Discography of dotscale',
446 # Type 2 Bandcamp user page.
447 'url': 'https://nightcallofficial.bandcamp.com',
449 'id': 'nightcallofficial',
450 'title': 'Discography of nightcallofficial',
454 'url': 'https://steviasphere.bandcamp.com/music',
455 'playlist_mincount': 47,
457 'id': 'steviasphere',
458 'title': 'Discography of steviasphere',
461 'url': 'https://coldworldofficial.bandcamp.com/music',
462 'playlist_mincount': 10,
464 'id': 'coldworldofficial',
465 'title': 'Discography of coldworldofficial',
468 'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
469 'playlist_mincount': 399,
471 'id': 'nuclearwarnowproductions',
472 'title': 'Discography of nuclearwarnowproductions',
476 def _real_extract(self
, url
):
477 uploader
= self
._match
_id
(url
)
478 webpage
= self
._download
_webpage
(url
, uploader
)
480 discography_data
= (re
.findall(r
'<li data-item-id=["\'][^
>]+>\s
*<a href
=["\'](?![^"\'/]*?
/merch
)([^
"\']+)', webpage)
481 or re.findall(r'<div[^>]+trackTitle["\'][^
"\']+["\']([^
"\']+)', webpage))
483 return self.playlist_from_matches(
484 discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))