[cleanup] Make more playlist entries lazy (#11763)
[yt-dlp.git] / yt_dlp / extractor / ximalaya.py
blob02bf6a7bebda2f4b17f133dbcad54aecc7c9490a
1 import base64
2 import math
3 import time
5 from .common import InfoExtractor
6 from .videa import VideaIE
7 from ..utils import (
8 InAdvancePagedList,
9 int_or_none,
10 str_or_none,
11 traverse_obj,
12 try_call,
13 update_url_query,
17 class XimalayaBaseIE(InfoExtractor):
18 _GEO_COUNTRIES = ['CN']
21 class XimalayaIE(XimalayaBaseIE):
22 IE_NAME = 'ximalaya'
23 IE_DESC = '喜马拉雅FM'
24 _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:(?P<uid>\d+)/)?sound/(?P<id>[0-9]+)'
25 _TESTS = [
27 'url': 'http://www.ximalaya.com/sound/47740352/',
28 'info_dict': {
29 'id': '47740352',
30 'ext': 'm4a',
31 'uploader': '小彬彬爱听书',
32 'uploader_id': '61425525',
33 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
34 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
35 'description': 'contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。',
36 'thumbnail': r're:^https?://.*\.jpg',
37 'thumbnails': [
39 'name': 'cover_url',
40 'url': r're:^https?://.*\.jpg',
43 'name': 'cover_url_142',
44 'url': r're:^https?://.*\.jpg',
45 'width': 180,
46 'height': 180,
49 'categories': ['其他'],
50 'duration': 93,
51 'view_count': int,
52 'like_count': int,
56 'url': 'http://m.ximalaya.com/61425525/sound/47740352/',
57 'info_dict': {
58 'id': '47740352',
59 'ext': 'm4a',
60 'uploader': '小彬彬爱听书',
61 'uploader_id': '61425525',
62 'uploader_url': 'http://www.ximalaya.com/zhubo/61425525/',
63 'title': '261.唐诗三百首.卷八.送孟浩然之广陵.李白',
64 'description': 'contains:《送孟浩然之广陵》\n作者:李白\n故人西辞黄鹤楼,烟花三月下扬州。\n孤帆远影碧空尽,惟见长江天际流。',
65 'thumbnail': r're:^https?://.*\.jpg',
66 'thumbnails': [
68 'name': 'cover_url',
69 'url': r're:^https?://.*\.jpg',
72 'name': 'cover_url_142',
73 'url': r're:^https?://.*\.jpg',
74 'width': 180,
75 'height': 180,
78 'categories': ['人文'],
79 'duration': 93,
80 'view_count': int,
81 'like_count': int,
85 # VIP-restricted audio
86 'url': 'https://www.ximalaya.com/sound/562111701',
87 'only_matching': True,
91 @staticmethod
92 def _decrypt_filename(file_id, seed):
93 cgstr = ''
94 key = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890'
95 for _ in key:
96 seed = float(int(211 * seed + 30031) % 65536)
97 r = int(seed / 65536 * len(key))
98 cgstr += key[r]
99 key = key.replace(key[r], '')
100 parts = file_id.split('*')
101 filename = ''.join(cgstr[int(part)] for part in parts if part.isdecimal())
102 if not filename.startswith('/'):
103 filename = '/' + filename
104 return filename
106 @staticmethod
107 def _decrypt_url_params(encrypted_params):
108 params = VideaIE.rc4(
109 base64.b64decode(encrypted_params), 'xkt3a41psizxrh9l').split('-')
110 # sign, token, timestamp
111 return params[1], params[2], params[3]
113 def _real_extract(self, url):
114 scheme = 'https' if url.startswith('https') else 'http'
116 audio_id = self._match_id(url)
117 audio_info = self._download_json(
118 f'{scheme}://m.ximalaya.com/tracks/{audio_id}.json', audio_id,
119 'Downloading info json', 'Unable to download info file')
121 formats = []
122 # NOTE: VIP-restricted audio
123 if audio_info.get('is_paid'):
124 ts = int(time.time())
125 vip_info = self._download_json(
126 f'{scheme}://mpay.ximalaya.com/mobile/track/pay/{audio_id}/{ts}',
127 audio_id, 'Downloading VIP info json', 'Unable to download VIP info file',
128 query={'device': 'pc', 'isBackend': 'true', '_': ts})
129 filename = self._decrypt_filename(vip_info['fileId'], vip_info['seed'])
130 sign, token, timestamp = self._decrypt_url_params(vip_info['ep'])
131 vip_url = update_url_query(
132 f'{vip_info["domain"]}/download/{vip_info["apiVersion"]}{filename}', {
133 'sign': sign,
134 'token': token,
135 'timestamp': timestamp,
136 'buy_key': vip_info['buyKey'],
137 'duration': vip_info['duration'],
139 fmt = {
140 'format_id': 'vip',
141 'url': vip_url,
142 'vcodec': 'none',
144 if '_preview_' in vip_url:
145 self.report_warning(
146 f'This tracks requires a VIP account. Using a sample instead. {self._login_hint()}')
147 fmt.update({
148 'format_note': 'Sample',
149 'preference': -10,
150 **traverse_obj(vip_info, {
151 'filesize': ('sampleLength', {int_or_none}),
152 'duration': ('sampleDuration', {int_or_none}),
155 else:
156 fmt.update(traverse_obj(vip_info, {
157 'filesize': ('totalLength', {int_or_none}),
158 'duration': ('duration', {int_or_none}),
161 fmt['abr'] = try_call(lambda: fmt['filesize'] * 8 / fmt['duration'] / 1024)
162 formats.append(fmt)
164 formats.extend([{
165 'format_id': f'{bps}k',
166 'url': audio_info[k],
167 'abr': bps,
168 'vcodec': 'none',
169 } for bps, k in ((24, 'play_path_32'), (64, 'play_path_64')) if audio_info.get(k)])
171 thumbnails = []
172 for k in audio_info:
173 # cover pics kyes like: cover_url', 'cover_url_142'
174 if k.startswith('cover_url'):
175 thumbnail = {'name': k, 'url': audio_info[k]}
176 if k == 'cover_url_142':
177 thumbnail['width'] = 180
178 thumbnail['height'] = 180
179 thumbnails.append(thumbnail)
181 audio_uploader_id = audio_info.get('uid')
183 audio_description = try_call(
184 lambda: audio_info['intro'].replace('\r\n\r\n\r\n ', '\n').replace('\r\n', '\n'))
186 return {
187 'id': audio_id,
188 'uploader': audio_info.get('nickname'),
189 'uploader_id': str_or_none(audio_uploader_id),
190 'uploader_url': f'{scheme}://www.ximalaya.com/zhubo/{audio_uploader_id}/' if audio_uploader_id else None,
191 'title': audio_info['title'],
192 'thumbnails': thumbnails,
193 'description': audio_description,
194 'categories': list(filter(None, [audio_info.get('category_name')])),
195 'duration': audio_info.get('duration'),
196 'view_count': audio_info.get('play_count'),
197 'like_count': audio_info.get('favorites_count'),
198 'formats': formats,
202 class XimalayaAlbumIE(XimalayaBaseIE):
203 IE_NAME = 'ximalaya:album'
204 IE_DESC = '喜马拉雅FM 专辑'
205 _VALID_URL = r'https?://(?:www\.|m\.)?ximalaya\.com/(?:\d+/)?album/(?P<id>[0-9]+)'
206 _TESTS = [{
207 'url': 'http://www.ximalaya.com/61425525/album/5534601/',
208 'info_dict': {
209 'title': '唐诗三百首(含赏析)',
210 'id': '5534601',
212 'playlist_mincount': 323,
213 }, {
214 'url': 'https://www.ximalaya.com/album/6912905',
215 'info_dict': {
216 'title': '埃克哈特《修炼当下的力量》',
217 'id': '6912905',
219 'playlist_mincount': 41,
222 def _real_extract(self, url):
223 playlist_id = self._match_id(url)
225 first_page = self._fetch_page(playlist_id, 1)
226 page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize'])
228 entries = InAdvancePagedList(
229 lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page),
230 page_count, first_page['pageSize'])
232 title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str)
234 return self.playlist_result(entries, playlist_id, title)
236 def _fetch_page(self, playlist_id, page_idx):
237 return self._download_json(
238 'https://www.ximalaya.com/revision/album/v1/getTracksList',
239 playlist_id, note=f'Downloading tracks list page {page_idx}',
240 query={'albumId': playlist_id, 'pageNum': page_idx})['data']
242 def _get_entries(self, page_data):
243 for e in page_data['tracks']:
244 yield self.url_result(
245 self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'),
246 XimalayaIE, e.get('trackId'), e.get('title'))