yt_dlp/extractor/qqmusic.py

   1 import base64
   2 import functools
   3 import json
   4 import random
   5 import time
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     ExtractorError,
  10     OnDemandPagedList,
  11     clean_html,
  12     int_or_none,
  13     join_nonempty,
  14     js_to_json,
  15     str_or_none,
  16     strip_jsonp,
  17     traverse_obj,
  18     unescapeHTML,
  19     url_or_none,
  20     urljoin,
  21 )
  22
  23
  24 class QQMusicBaseIE(InfoExtractor):
  25     def _get_cookie(self, key, default=None):
  26         return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default)
  27
  28     def _get_g_tk(self):
  29         n = 5381
  30         for c in self._get_cookie('qqmusic_key', ''):
  31             n += (n << 5) + ord(c)
  32         return n & 2147483647
  33
  34     def _get_uin(self):
  35         return int_or_none(self._get_cookie('uin')) or 0
  36
  37     @property
  38     def is_logged_in(self):
  39         return bool(self._get_uin() and self._get_cookie('fqm_pvqid'))
  40
  41     # Reference: m_r_GetRUin() in top_player.js
  42     # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
  43     @staticmethod
  44     def _m_r_get_ruin():
  45         cur_ms = int(time.time() * 1000) % 1000
  46         return int(round(random.random() * 2147483647) * cur_ms % 1E10)
  47
  48     def _download_init_data(self, url, mid, fatal=True):
  49         webpage = self._download_webpage(url, mid, fatal=fatal)
  50         return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage,
  51                                  'init data', mid, transform_source=js_to_json, fatal=fatal)
  52
  53     def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs):
  54         return self._download_json(
  55             'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({
  56                 'comm': {
  57                     'cv': 0,
  58                     'ct': 24,
  59                     'format': 'json',
  60                     'uin': self._get_uin(),
  61                 },
  62                 **req_dict,
  63             }, separators=(',', ':')).encode(), headers=headers, **kwargs)
  64
  65
  66 class QQMusicIE(QQMusicBaseIE):
  67     IE_NAME = 'qqmusic'
  68     IE_DESC = 'QQ音乐'
  69     _VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)'
  70     _TESTS = [{
  71         'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ',
  72         'md5': 'd7adc5c438d12e2cb648cca81593fd47',
  73         'info_dict': {
  74             'id': '004Ti8rT003TaZ',
  75             'ext': 'mp3',
  76             'title': '永夜のパレード (永夜的游行)',
  77             'album': '幻想遊園郷 -Fantastic Park-',
  78             'release_date': '20111230',
  79             'duration': 281,
  80             'creators': ['ケーキ姫', 'JUMA'],
  81             'genres': ['Pop'],
  82             'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9',
  83             'size': 4501244,
  84             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
  85             'subtitles': 'count:1',
  86         },
  87     }, {
  88         'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD',
  89         'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
  90         'info_dict': {
  91             'id': '004295Et37taLD',
  92             'ext': 'mp3',
  93             'title': '可惜没如果',
  94             'album': '新地球 - 人 (Special Edition)',
  95             'release_date': '20150129',
  96             'duration': 298,
  97             'creators': ['林俊杰'],
  98             'genres': ['Pop'],
  99             'description': 'md5:f568421ff618d2066e74b65a04149c4e',
 100             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 101         },
 102         'skip': 'premium member only',
 103     }, {
 104         'note': 'There is no mp3-320 version of this song.',
 105         'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV',
 106         'md5': '028aaef1ae13d8a9f4861a92614887f9',
 107         'info_dict': {
 108             'id': '004MsGEo3DdNxV',
 109             'ext': 'mp3',
 110             'title': '如果',
 111             'album': '新传媒电视连续剧金曲系列II',
 112             'release_date': '20050626',
 113             'duration': 220,
 114             'creators': ['李季美'],
 115             'genres': [],
 116             'description': 'md5:fc711212aa623b28534954dc4bd67385',
 117             'size': 3535730,
 118             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 119         },
 120     }, {
 121         'note': 'lyrics not in .lrc format',
 122         'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6',
 123         'info_dict': {
 124             'id': '001JyApY11tIp6',
 125             'ext': 'mp3',
 126             'title': 'Shadows Over Transylvania',
 127             'release_date': '19970225',
 128             'creator': 'Dark Funeral',
 129             'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
 130             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 131         },
 132         'params': {'skip_download': True},
 133         'skip': 'no longer available',
 134     }]
 135
 136     _FORMATS = {
 137         'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60},
 138         'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50},
 139         'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
 140         'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
 141         'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96},
 142         'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48},
 143     }
 144
 145     def _real_extract(self, url):
 146         mid = self._match_id(url)
 147
 148         init_data = self._download_init_data(url, mid, fatal=False)
 149         info_data = self._make_fcu_req({'info': {
 150             'module': 'music.pf_song_detail_svr',
 151             'method': 'get_song_detail_yqq',
 152             'param': {
 153                 'song_mid': mid,
 154                 'song_type': 0,
 155             },
 156         }}, mid, note='Downloading song info')['info']['data']['track_info']
 157
 158         media_mid = info_data['file']['media_mid']
 159
 160         data = self._make_fcu_req({
 161             'req_1': {
 162                 'module': 'vkey.GetVkeyServer',
 163                 'method': 'CgiGetVkey',
 164                 'param': {
 165                     'guid': str(self._m_r_get_ruin()),
 166                     'songmid': [mid] * len(self._FORMATS),
 167                     'songtype': [0] * len(self._FORMATS),
 168                     'uin': str(self._get_uin()),
 169                     'loginflag': 1,
 170                     'platform': '20',
 171                     'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()],
 172                 },
 173             },
 174             'req_2': {
 175                 'module': 'music.musichallSong.PlayLyricInfo',
 176                 'method': 'GetPlayLyricInfo',
 177                 'param': {'songMID': mid},
 178             },
 179         }, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers())
 180
 181         code = traverse_obj(data, ('req_1', 'code', {int}))
 182         if code != 0:
 183             raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}')
 184         formats = []
 185         for media_info in traverse_obj(data, (
 186             'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']),
 187         ):
 188             format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]}))
 189             format_info = self._FORMATS.get(format_key) or {}
 190             format_id = format_info.get('name')
 191             formats.append({
 192                 'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']),
 193                 'format': format_id,
 194                 'format_id': format_id,
 195                 'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})),
 196                 'quality': format_info.get('preference'),
 197                 'abr': format_info.get('abr'),
 198                 'ext': format_info.get('ext'),
 199                 'vcodec': 'none',
 200             })
 201
 202         if not formats and not self.is_logged_in:
 203             self.raise_login_required()
 204
 205         if traverse_obj(data, ('req_2', 'code')):
 206             self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}')
 207         lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')}))
 208
 209         info_dict = {
 210             'id': mid,
 211             'formats': formats,
 212             **traverse_obj(info_data, {
 213                 'title': ('title', {str}),
 214                 'album': ('album', 'title', {str}, {lambda x: x or None}),
 215                 'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
 216                 'creators': ('singer', ..., 'name', {str}),
 217                 'alt_title': ('subtitle', {str}, {lambda x: x or None}),
 218                 'duration': ('interval', {int_or_none}),
 219             }),
 220             **traverse_obj(init_data, ('detail', {
 221                 'thumbnail': ('picurl', {url_or_none}),
 222                 'description': ('info', 'intro', 'content', ..., 'value', {str}),
 223                 'genres': ('info', 'genre', 'content', ..., 'value', {str}, all),
 224             }), get_all=False),
 225         }
 226         if lrc_content:
 227             info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]}
 228             info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n')
 229         return info_dict
 230
 231
 232 class QQMusicSingerIE(QQMusicBaseIE):
 233     IE_NAME = 'qqmusic:singer'
 234     IE_DESC = 'QQ音乐 - 歌手'
 235     _VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)'
 236     _TESTS = [{
 237         'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2',
 238         'info_dict': {
 239             'id': '001BLpXF2DyJe2',
 240             'title': '林俊杰',
 241             'description': 'md5:10624ce73b06fa400bc846f59b0305fa',
 242             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 243         },
 244         'playlist_mincount': 100,
 245     }, {
 246         'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV',
 247         'info_dict': {
 248             'id': '000Q00f213YzNV',
 249             'title': '桃几OvO',
 250             'description': '小破站小唱见~希望大家喜欢听我唱歌~！',
 251             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 252         },
 253         'playlist_count': 12,
 254         'playlist': [{
 255             'info_dict': {
 256                 'id': '0016cvsy02mmCl',
 257                 'ext': 'mp3',
 258                 'title': '群青',
 259                 'album': '桃几2021年翻唱集',
 260                 'release_date': '20210913',
 261                 'duration': 248,
 262                 'creators': ['桃几OvO'],
 263                 'genres': ['Pop'],
 264                 'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae',
 265                 'size': 3970822,
 266                 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 267             },
 268         }],
 269     }]
 270
 271     _PAGE_SIZE = 50
 272
 273     def _fetch_page(self, mid, page_size, page_num):
 274         data = self._make_fcu_req({'req_1': {
 275             'module': 'music.web_singer_info_svr',
 276             'method': 'get_singer_detail_info',
 277             'param': {
 278                 'sort': 5,
 279                 'singermid': mid,
 280                 'sin': page_num * page_size,
 281                 'num': page_size,
 282             }}}, mid, note=f'Downloading page {page_num}')
 283         yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result(
 284             f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))}))
 285
 286     def _real_extract(self, url):
 287         mid = self._match_id(url)
 288         init_data = self._download_init_data(url, mid, fatal=False)
 289
 290         return self.playlist_result(
 291             OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE),
 292             mid, **traverse_obj(init_data, ('singerDetail', {
 293                 'title': ('basic_info', 'name', {str}),
 294                 'description': ('ex_info', 'desc', {str}),
 295                 'thumbnail': ('pic', 'pic', {url_or_none}),
 296             })))
 297
 298
 299 class QQPlaylistBaseIE(InfoExtractor):
 300     def _extract_entries(self, info_json, path):
 301         for song in traverse_obj(info_json, path):
 302             song_mid = song.get('songmid')
 303             if not song_mid:
 304                 continue
 305             yield self.url_result(
 306                 f'https://y.qq.com/n/ryqq/songDetail/{song_mid}',
 307                 QQMusicIE, song_mid, song.get('songname'))
 308
 309
 310 class QQMusicAlbumIE(QQPlaylistBaseIE):
 311     IE_NAME = 'qqmusic:album'
 312     IE_DESC = 'QQ音乐 - 专辑'
 313     _VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)'
 314
 315     _TESTS = [{
 316         'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1',
 317         'info_dict': {
 318             'id': '000gXCTb2AhRR1',
 319             'title': '我们都是这样长大的',
 320             'description': 'md5:179c5dce203a5931970d306aa9607ea6',
 321         },
 322         'playlist_count': 4,
 323     }, {
 324         'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3',
 325         'info_dict': {
 326             'id': '002Y5a3b3AlCu3',
 327             'title': '그리고…',
 328             'description': 'md5:a48823755615508a95080e81b51ba729',
 329         },
 330         'playlist_count': 8,
 331     }]
 332
 333     def _real_extract(self, url):
 334         mid = self._match_id(url)
 335
 336         album_json = self._download_json(
 337             'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg',
 338             mid, 'Download album page',
 339             query={'albummid': mid, 'format': 'json'})['data']
 340
 341         entries = self._extract_entries(album_json, ('list', ...))
 342
 343         return self.playlist_result(entries, mid, **traverse_obj(album_json, {
 344             'title': ('name', {str}),
 345             'description': ('desc', {str.strip}),
 346         }))
 347
 348
 349 class QQMusicToplistIE(QQPlaylistBaseIE):
 350     IE_NAME = 'qqmusic:toplist'
 351     IE_DESC = 'QQ音乐 - 排行榜'
 352     _VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)'
 353
 354     _TESTS = [{
 355         'url': 'https://y.qq.com/n/ryqq/toplist/123',
 356         'info_dict': {
 357             'id': '123',
 358             'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}',
 359             'description': '美国热门音乐榜，每周一更新。',
 360         },
 361         'playlist_count': 95,
 362     }, {
 363         'url': 'https://y.qq.com/n/ryqq/toplist/3',
 364         'info_dict': {
 365             'id': '3',
 366             'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}',
 367             'description': 'md5:4def03b60d3644be4c9a36f21fd33857',
 368         },
 369         'playlist_count': 100,
 370     }, {
 371         'url': 'https://y.qq.com/n/ryqq/toplist/106',
 372         'info_dict': {
 373             'id': '106',
 374             'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}',
 375             'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
 376         },
 377         'playlist_count': 50,
 378     }]
 379
 380     def _real_extract(self, url):
 381         list_id = self._match_id(url)
 382
 383         toplist_json = self._download_json(
 384             'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id,
 385             note='Download toplist page',
 386             query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
 387
 388         return self.playlist_result(
 389             self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id,
 390             playlist_title=join_nonempty(*traverse_obj(
 391                 toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '),
 392             playlist_description=traverse_obj(toplist_json, ('topinfo', 'info')))
 393
 394
 395 class QQMusicPlaylistIE(QQPlaylistBaseIE):
 396     IE_NAME = 'qqmusic:playlist'
 397     IE_DESC = 'QQ音乐 - 歌单'
 398     _VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)'
 399
 400     _TESTS = [{
 401         'url': 'https://y.qq.com/n/ryqq/playlist/1374105607',
 402         'info_dict': {
 403             'id': '1374105607',
 404             'title': '易入人心的华语民谣',
 405             'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪，就是这样的简单才易入人心。',
 406         },
 407         'playlist_count': 20,
 408     }]
 409
 410     def _real_extract(self, url):
 411         list_id = self._match_id(url)
 412
 413         list_json = self._download_json(
 414             'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
 415             list_id, 'Download list page',
 416             query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
 417             transform_source=strip_jsonp, headers={'Referer': url})
 418         if not len(list_json.get('cdlist', [])):
 419             raise ExtractorError(join_nonempty(
 420                 'Unable to get playlist info',
 421                 join_nonempty('code', 'subcode', from_dict=list_json),
 422                 list_json.get('msg'), delim=': '))
 423
 424         entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...))
 425
 426         return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
 427             'title': ('dissname', {str}),
 428             'description': ('desc', {unescapeHTML}, {clean_html}),
 429         })))
 430
 431
 432 class QQMusicVideoIE(QQMusicBaseIE):
 433     IE_NAME = 'qqmusic:mv'
 434     IE_DESC = 'QQ音乐 - MV'
 435     _VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)'
 436
 437     _TESTS = [{
 438         'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K',
 439         'info_dict': {
 440             'id': '002Vsarh3SVU8K',
 441             'ext': 'mp4',
 442             'title': 'The Chant (Extended Mix / Audio)',
 443             'description': '',
 444             'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
 445             'release_timestamp': 1688918400,
 446             'release_date': '20230709',
 447             'duration': 313,
 448             'creators': ['Duke Dumont'],
 449             'view_count': int,
 450         },
 451     }]
 452
 453     def _parse_url_formats(self, url_data):
 454         return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], {
 455             'url': ('freeflow_url', 0, {url_or_none}),
 456             'filesize': ('fileSize', {int_or_none}),
 457             'format_id': ('newFileType', {str_or_none}),
 458         }))
 459
 460     def _real_extract(self, url):
 461         video_id = self._match_id(url)
 462
 463         video_info = self._make_fcu_req({
 464             'mvInfo': {
 465                 'module': 'music.video.VideoData',
 466                 'method': 'get_video_info_batch',
 467                 'param': {
 468                     'vidlist': [video_id],
 469                     'required': [
 470                         'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers',
 471                         'video_pay', 'hint', 'code', 'msg', 'name', 'desc',
 472                         'playcnt', 'pubdate', 'play_forbid_reason'],
 473                 },
 474             },
 475             'mvUrl': {
 476                 'module': 'music.stream.MvUrlProxy',
 477                 'method': 'GetMvUrls',
 478                 'param': {'vids': [video_id]},
 479             },
 480         }, video_id, headers=self.geo_verification_headers())
 481         if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3:
 482             self.raise_geo_restricted()
 483
 484         return {
 485             'id': video_id,
 486             'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))),
 487             **traverse_obj(video_info, ('mvInfo', 'data', video_id, {
 488                 'title': ('name', {str}),
 489                 'description': ('desc', {str}),
 490                 'thumbnail': ('cover_pic', {url_or_none}),
 491                 'release_timestamp': ('pubdate', {int_or_none}),
 492                 'duration': ('duration', {int_or_none}),
 493                 'creators': ('singers', ..., 'name', {str}),
 494                 'view_count': ('playcnt', {int_or_none}),
 495             })),
 496         }