[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / extractor / mx3.py
blob5c42f4d156bd76fc06d31bd5392e3349cbc25a96
1 import re
3 from .common import InfoExtractor
4 from ..networking import HEADRequest
5 from ..utils import (
6 get_element_by_class,
7 int_or_none,
8 try_call,
9 url_or_none,
10 urlhandle_detect_ext,
12 from ..utils.traversal import traverse_obj
15 class Mx3BaseIE(InfoExtractor):
16 _VALID_URL_TMPL = r'https?://(?:www\.)?%s/t/(?P<id>\w+)'
17 _FORMATS = [{
18 'url': 'player_asset',
19 'format_id': 'default',
20 'quality': 0,
21 }, {
22 'url': 'player_asset?quality=hd',
23 'format_id': 'hd',
24 'quality': 1,
25 }, {
26 'url': 'download',
27 'format_id': 'download',
28 'quality': 2,
29 }, {
30 'url': 'player_asset?quality=source',
31 'format_id': 'source',
32 'quality': 2,
35 def _extract_formats(self, track_id):
36 formats = []
37 for fmt in self._FORMATS:
38 format_url = f'https://{self._DOMAIN}/tracks/{track_id}/{fmt["url"]}'
39 urlh = self._request_webpage(
40 HEADRequest(format_url), track_id, fatal=False, expected_status=404,
41 note=f'Checking for format {fmt["format_id"]}')
42 if urlh and urlh.status == 200:
43 formats.append({
44 **fmt,
45 'url': format_url,
46 'ext': urlhandle_detect_ext(urlh),
47 'filesize': int_or_none(urlh.headers.get('Content-Length')),
49 return formats
51 def _real_extract(self, url):
52 track_id = self._match_id(url)
53 webpage = self._download_webpage(url, track_id)
54 more_info = get_element_by_class('single-more-info', webpage)
55 data = self._download_json(f'https://{self._DOMAIN}/t/{track_id}.json', track_id, fatal=False)
57 def get_info_field(name):
58 return self._html_search_regex(
59 rf'<dt[^>]*>\s*{name}\s*</dt>\s*<dd[^>]*>(.*?)</dd>',
60 more_info, name, default=None, flags=re.DOTALL)
62 return {
63 'id': track_id,
64 'formats': self._extract_formats(track_id),
65 'genre': self._html_search_regex(
66 r'<div\b[^>]+class="single-band-genre"[^>]*>([^<]+)</div>', webpage, 'genre', default=None),
67 'release_year': int_or_none(get_info_field('Year of creation')),
68 'description': get_info_field('Description'),
69 'tags': try_call(lambda: get_info_field('Tag').split(', '), list),
70 **traverse_obj(data, {
71 'title': ('title', {str}),
72 'artist': (('performer_name', 'artist'), {str}),
73 'album_artist': ('artist', {str}),
74 'composer': ('composer_name', {str}),
75 'thumbnail': (('picture_url_xlarge', 'picture_url'), {url_or_none}),
76 }, get_all=False),
80 class Mx3IE(Mx3BaseIE):
81 _DOMAIN = 'mx3.ch'
82 _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
83 _TESTS = [{
84 'url': 'https://mx3.ch/t/1Cru',
85 'md5': '7ba09e9826b4447d4e1ce9d69e0e295f',
86 'info_dict': {
87 'id': '1Cru',
88 'ext': 'wav',
89 'artist': 'Godina',
90 'album_artist': 'Tortue Tortue',
91 'composer': 'Olivier Godinat',
92 'genre': 'Rock',
93 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/4643/square_xlarge/1-s-envoler-1.jpg?1630272813',
94 'title': "S'envoler",
95 'release_year': 2021,
96 'tags': [],
98 }, {
99 'url': 'https://mx3.ch/t/1LIY',
100 'md5': '48293cb908342547827f963a5a2e9118',
101 'info_dict': {
102 'id': '1LIY',
103 'ext': 'mov',
104 'artist': 'Tania Kimfumu',
105 'album_artist': 'The Broots',
106 'composer': 'Emmanuel Diserens',
107 'genre': 'Electro',
108 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0110/0003/video_xlarge/frame_0000.png?1686963670',
109 'title': 'The Broots-Larytta remix "Begging For Help"',
110 'release_year': 2023,
111 'tags': ['the broots', 'cassata records', 'larytta'],
112 'description': '"Begging for Help" Larytta Remix Official Video\nRealized By Kali Donkilie in 2023',
114 }, {
115 'url': 'https://mx3.ch/t/1C6E',
116 'md5': '1afcd578493ddb8e5008e94bb6d97e25',
117 'info_dict': {
118 'id': '1C6E',
119 'ext': 'wav',
120 'artist': 'Alien Bubblegum',
121 'album_artist': 'Alien Bubblegum',
122 'composer': 'Alien Bubblegum',
123 'genre': 'Punk',
124 'thumbnail': 'https://mx3.ch/pictures/mx3/file/0101/1551/square_xlarge/pandora-s-box-cover-with-title.png?1627054733',
125 'title': 'Wide Awake',
126 'release_year': 2021,
127 'tags': ['alien bubblegum', 'bubblegum', 'alien', 'pop punk', 'poppunk'],
132 class Mx3NeoIE(Mx3BaseIE):
133 _DOMAIN = 'neo.mx3.ch'
134 _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
135 _TESTS = [{
136 'url': 'https://neo.mx3.ch/t/1hpd',
137 'md5': '6d9986bbae5cac3296ec8813bf965eb2',
138 'info_dict': {
139 'id': '1hpd',
140 'ext': 'wav',
141 'artist': 'Baptiste Lopez',
142 'album_artist': 'Kammerorchester Basel',
143 'composer': 'Jannik Giger',
144 'genre': 'Composition, Orchestra',
145 'title': 'Troisième œil. Für Kammerorchester (2023)',
146 'thumbnail': 'https://neo.mx3.ch/pictures/neo/file/0000/0241/square_xlarge/kammerorchester-basel-group-photo-2_c_-lukasz-rajchert.jpg?1560341252',
147 'release_year': 2023,
148 'tags': [],
153 class Mx3VolksmusikIE(Mx3BaseIE):
154 _DOMAIN = 'volksmusik.mx3.ch'
155 _VALID_URL = Mx3BaseIE._VALID_URL_TMPL % re.escape(_DOMAIN)
156 _TESTS = [{
157 'url': 'https://volksmusik.mx3.ch/t/Zx',
158 'md5': 'dd967a7b0c1ef898f3e072cf9c2eae3c',
159 'info_dict': {
160 'id': 'Zx',
161 'ext': 'mp3',
162 'artist': 'Ländlerkapelle GrischArt',
163 'album_artist': 'Ländlerkapelle GrischArt',
164 'composer': 'Urs Glauser',
165 'genre': 'Instrumental, Graubünden',
166 'title': 'Chämilouf',
167 'thumbnail': 'https://volksmusik.mx3.ch/pictures/vxm/file/0000/3815/square_xlarge/grischart1.jpg?1450530120',
168 'release_year': 2012,
169 'tags': [],