yt_dlp/extractor/jamendo.py

   1 import hashlib
   2 import random
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     clean_html,
   7     int_or_none,
   8     try_get,
   9 )
  10
  11
  12 class JamendoIE(InfoExtractor):
  13     _VALID_URL = r'''(?x)
  14                     https?://
  15                         (?:
  16                             licensing\.jamendo\.com/[^/]+|
  17                             (?:www\.)?jamendo\.com
  18                         )
  19                         /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
  20                     '''
  21     _TESTS = [{
  22         'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
  23         'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
  24         'info_dict': {
  25             'id': '196219',
  26             'display_id': 'stories-from-emona-i',
  27             'ext': 'flac',
  28             # 'title': 'Maya Filipič - Stories from Emona I',
  29             'title': 'Stories from Emona I',
  30             'artist': 'Maya Filipič',
  31             'album': 'Between two worlds',
  32             'track': 'Stories from Emona I',
  33             'duration': 210,
  34             'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219',
  35             'timestamp': 1217438117,
  36             'upload_date': '20080730',
  37             'license': 'by-nc-nd',
  38             'view_count': int,
  39             'like_count': int,
  40             'average_rating': int,
  41             'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
  42         },
  43     }, {
  44         'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
  45         'only_matching': True,
  46     }]
  47
  48     def _call_api(self, resource, resource_id, fatal=True):
  49         path = f'/api/{resource}s'
  50         rand = str(random.random())
  51         return self._download_json(
  52             'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={
  53                 'id[]': resource_id,
  54             }, headers={
  55                 'X-Jam-Call': f'${hashlib.sha1((path + rand).encode()).hexdigest()}*{rand}~',
  56             })[0]
  57
  58     def _real_extract(self, url):
  59         track_id, display_id = self._match_valid_url(url).groups()
  60         # webpage = self._download_webpage(
  61         #     'https://www.jamendo.com/track/' + track_id, track_id)
  62         # models = self._parse_json(self._html_search_regex(
  63         #     r"data-bundled-models='([^']+)",
  64         #     webpage, 'bundled models'), track_id)
  65         # track = models['track']['models'][0]
  66         track = self._call_api('track', track_id)
  67         title = track_name = track['name']
  68         # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
  69         # artist = get_model('artist')
  70         # artist_name = artist.get('name')
  71         # if artist_name:
  72         #     title = '%s - %s' % (artist_name, title)
  73         # album = get_model('album')
  74         artist = self._call_api('artist', track.get('artistId'), fatal=False)
  75         album = self._call_api('album', track.get('albumId'), fatal=False)
  76
  77         formats = [{
  78             'url': f'https://{sub_domain}.jamendo.com/?trackid={track_id}&format={format_id}&from=app-97dab294',
  79             'format_id': format_id,
  80             'ext': ext,
  81             'quality': quality,
  82         } for quality, (format_id, sub_domain, ext) in enumerate((
  83             ('mp31', 'mp3l', 'mp3'),
  84             ('mp32', 'mp3d', 'mp3'),
  85             ('ogg1', 'ogg', 'ogg'),
  86             ('flac', 'flac', 'flac'),
  87         ))]
  88
  89         urls = []
  90         thumbnails = []
  91         for covers in (track.get('cover') or {}).values():
  92             for cover_id, cover_url in covers.items():
  93                 if not cover_url or cover_url in urls:
  94                     continue
  95                 urls.append(cover_url)
  96                 size = int_or_none(cover_id.lstrip('size'))
  97                 thumbnails.append({
  98                     'id': cover_id,
  99                     'url': cover_url,
 100                     'width': size,
 101                     'height': size,
 102                 })
 103
 104         tags = []
 105         for tag in (track.get('tags') or []):
 106             tag_name = tag.get('name')
 107             if not tag_name:
 108                 continue
 109             tags.append(tag_name)
 110
 111         stats = track.get('stats') or {}
 112         video_license = track.get('licenseCC') or []
 113
 114         return {
 115             'id': track_id,
 116             'display_id': display_id,
 117             'thumbnails': thumbnails,
 118             'title': title,
 119             'description': track.get('description'),
 120             'duration': int_or_none(track.get('duration')),
 121             'artist': artist.get('name'),
 122             'track': track_name,
 123             'album': album.get('name'),
 124             'formats': formats,
 125             'license': '-'.join(video_license) if video_license else None,
 126             'timestamp': int_or_none(track.get('dateCreated')),
 127             'view_count': int_or_none(stats.get('listenedAll')),
 128             'like_count': int_or_none(stats.get('favorited')),
 129             'average_rating': int_or_none(stats.get('averageNote')),
 130             'tags': tags,
 131         }
 132
 133
 134 class JamendoAlbumIE(JamendoIE):  # XXX: Do not subclass from concrete IE
 135     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
 136     _TESTS = [{
 137         'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
 138         'info_dict': {
 139             'id': '121486',
 140             'title': 'Duck On Cover',
 141             'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
 142         },
 143         'playlist': [{
 144             'md5': 'e1a2fcb42bda30dfac990212924149a8',
 145             'info_dict': {
 146                 'id': '1032333',
 147                 'ext': 'flac',
 148                 'title': 'Warmachine',
 149                 'artist': 'Shearer',
 150                 'track': 'Warmachine',
 151                 'timestamp': 1368089771,
 152                 'upload_date': '20130509',
 153                 'view_count': int,
 154                 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333',
 155                 'duration': 190,
 156                 'license': 'by',
 157                 'album': 'Duck On Cover',
 158                 'average_rating': 4,
 159                 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'],
 160                 'like_count': int,
 161             },
 162         }, {
 163             'md5': '1f358d7b2f98edfe90fd55dac0799d50',
 164             'info_dict': {
 165                 'id': '1032330',
 166                 'ext': 'flac',
 167                 'title': 'Without Your Ghost',
 168                 'artist': 'Shearer',
 169                 'track': 'Without Your Ghost',
 170                 'timestamp': 1368089771,
 171                 'upload_date': '20130509',
 172                 'duration': 192,
 173                 'tags': ['rock', 'drums', 'bass', 'world', 'punk'],
 174                 'album': 'Duck On Cover',
 175                 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330',
 176                 'view_count': int,
 177                 'average_rating': 4,
 178                 'license': 'by',
 179                 'like_count': int,
 180             },
 181         }],
 182         'params': {
 183             'playlistend': 2,
 184         },
 185     }]
 186
 187     def _real_extract(self, url):
 188         album_id = self._match_id(url)
 189         album = self._call_api('album', album_id)
 190         album_name = album.get('name')
 191
 192         entries = []
 193         for track in (album.get('tracks') or []):
 194             track_id = track.get('id')
 195             if not track_id:
 196                 continue
 197             track_id = str(track_id)
 198             entries.append({
 199                 '_type': 'url_transparent',
 200                 'url': 'https://www.jamendo.com/track/' + track_id,
 201                 'ie_key': JamendoIE.ie_key(),
 202                 'id': track_id,
 203                 'album': album_name,
 204             })
 205
 206         return self.playlist_result(
 207             entries, album_id, album_name,
 208             clean_html(try_get(album, lambda x: x['description']['en'], str)))