yt_dlp/extractor/vk.py

   1 import collections
   2 import hashlib
   3 import re
   4
   5 from .common import InfoExtractor
   6 from .dailymotion import DailymotionIE
   7 from .odnoklassniki import OdnoklassnikiIE
   8 from .pladform import PladformIE
   9 from .sibnet import SibnetEmbedIE
  10 from .vimeo import VimeoIE
  11 from .youtube import YoutubeIE
  12 from ..utils import (
  13     ExtractorError,
  14     UserNotLive,
  15     clean_html,
  16     get_element_by_class,
  17     get_element_html_by_id,
  18     int_or_none,
  19     join_nonempty,
  20     parse_qs,
  21     parse_resolution,
  22     str_or_none,
  23     str_to_int,
  24     try_call,
  25     unescapeHTML,
  26     unified_timestamp,
  27     update_url_query,
  28     url_or_none,
  29     urlencode_postdata,
  30     urljoin,
  31 )
  32 from ..utils.traversal import require, traverse_obj
  33
  34
  35 class VKBaseIE(InfoExtractor):
  36     _NETRC_MACHINE = 'vk'
  37
  38     def _download_webpage_handle(self, url_or_request, video_id, *args, fatal=True, **kwargs):
  39         response = super()._download_webpage_handle(url_or_request, video_id, *args, fatal=fatal, **kwargs)
  40         challenge_url, cookie = response[1].url if response else '', None
  41         if challenge_url.startswith('https://vk.com/429.html?'):
  42             cookie = self._get_cookies(challenge_url).get('hash429')
  43         if not cookie:
  44             return response
  45
  46         hash429 = hashlib.md5(cookie.value.encode('ascii')).hexdigest()
  47         self._request_webpage(
  48             update_url_query(challenge_url, {'key': hash429}), video_id, fatal=fatal,
  49             note='Resolving WAF challenge', errnote='Failed to bypass WAF challenge')
  50         return super()._download_webpage_handle(url_or_request, video_id, *args, fatal=True, **kwargs)
  51
  52     def _perform_login(self, username, password):
  53         login_page, url_handle = self._download_webpage_handle(
  54             'https://vk.com', None, 'Downloading login page')
  55
  56         login_form = self._hidden_inputs(login_page)
  57
  58         login_form.update({
  59             'email': username.encode('cp1251'),
  60             'pass': password.encode('cp1251'),
  61         })
  62
  63         # vk serves two same remixlhk cookies in Set-Cookie header and expects
  64         # first one to be actually set
  65         self._apply_first_set_cookie_header(url_handle, 'remixlhk')
  66
  67         login_page = self._download_webpage(
  68             'https://vk.com/login', None,
  69             note='Logging in',
  70             data=urlencode_postdata(login_form))
  71
  72         if re.search(r'onLoginFailed', login_page):
  73             raise ExtractorError(
  74                 'Unable to login, incorrect username and/or password', expected=True)
  75
  76     def _download_payload(self, path, video_id, data, fatal=True):
  77         endpoint = f'https://vk.com/{path}.php'
  78         data['al'] = 1
  79         code, payload = self._download_json(
  80             endpoint, video_id, data=urlencode_postdata(data), fatal=fatal,
  81             headers={
  82                 'Referer': endpoint,
  83                 'X-Requested-With': 'XMLHttpRequest',
  84             })['payload']
  85         if code == '3':
  86             self.raise_login_required()
  87         elif code == '8':
  88             raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
  89         return payload
  90
  91
  92 class VKIE(VKBaseIE):
  93     IE_NAME = 'vk'
  94     IE_DESC = 'VK'
  95     _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk(?:(?:video)?\.ru|\.com)/video_ext\.php.+?)\1']
  96     _VALID_URL = r'''(?x)
  97                     https?://
  98                         (?:
  99                             (?:
 100                                 (?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/video_|
 101                                 (?:www\.)?daxab\.com/
 102                             )
 103                             ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
 104                             (?:
 105                                 (?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/(?:.+?\?.*?z=)?(?:video|clip)|
 106                                 (?:www\.)?daxab\.com/embed/
 107                             )
 108                             (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
 109                         )
 110                     '''
 111
 112     _TESTS = [
 113         {
 114             'url': 'https://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
 115             'info_dict': {
 116                 'id': '-77521_162222515',
 117                 'ext': 'mp4',
 118                 'title': 'ProtivoGunz - Хуёвая песня',
 119                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
 120                 'uploader_id': '39545378',
 121                 'duration': 195,
 122                 'timestamp': 1329049880,
 123                 'upload_date': '20120212',
 124                 'comment_count': int,
 125                 'like_count': int,
 126                 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
 127             },
 128             'params': {'skip_download': 'm3u8'},
 129         },
 130         {
 131             'url': 'https://vk.com/video205387401_165548505',
 132             'info_dict': {
 133                 'id': '205387401_165548505',
 134                 'ext': 'mp4',
 135                 'title': 'No name',
 136                 'uploader': 'Tom Cruise',
 137                 'uploader_id': '205387401',
 138                 'duration': 9,
 139                 'timestamp': 1374364108,
 140                 'upload_date': '20130720',
 141                 'comment_count': int,
 142                 'like_count': int,
 143                 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
 144             },
 145         },
 146         {
 147             'note': 'Embedded video',
 148             'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
 149             'info_dict': {
 150                 'id': '-77521_162222515',
 151                 'ext': 'mp4',
 152                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
 153                 'title': 'ProtivoGunz - Хуёвая песня',
 154                 'duration': 195,
 155                 'upload_date': '20120212',
 156                 'timestamp': 1329049880,
 157                 'uploader_id': '39545378',
 158                 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
 159             },
 160             'params': {'skip_download': 'm3u8'},
 161         },
 162         {
 163             'url': 'https://vk.com/video-93049196_456239755?list=ln-cBjJ7S4jYYx3ADnmDT',
 164             'info_dict': {
 165                 'id': '-93049196_456239755',
 166                 'ext': 'mp4',
 167                 'title': '8 серия (озвучка)',
 168                 'duration': 8383,
 169                 'comment_count': int,
 170                 'uploader': 'Dizi2021',
 171                 'like_count': int,
 172                 'timestamp': 1640162189,
 173                 'upload_date': '20211222',
 174                 'uploader_id': '-93049196',
 175                 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
 176             },
 177         },
 178         {
 179             'note': 'youtube embed',
 180             'url': 'https://vk.com/video276849682_170681728',
 181             'info_dict': {
 182                 'id': 'V3K4mi0SYkc',
 183                 'ext': 'mp4',
 184                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
 185                 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
 186                 'duration': 179,
 187                 'upload_date': '20130117',
 188                 'uploader': "Children's Joy Foundation Inc.",
 189                 'uploader_id': '@CJFIofficial',
 190                 'view_count': int,
 191                 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
 192                 'availability': 'public',
 193                 'like_count': int,
 194                 'live_status': 'not_live',
 195                 'playable_in_embed': True,
 196                 'channel': 'Children\'s Joy Foundation Inc.',
 197                 'uploader_url': 'https://www.youtube.com/@CJFIofficial',
 198                 'thumbnail': r're:https?://.+\.jpg$',
 199                 'tags': 'count:27',
 200                 'start_time': 0.0,
 201                 'categories': ['Nonprofits & Activism'],
 202                 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
 203                 'channel_follower_count': int,
 204                 'age_limit': 0,
 205                 'timestamp': 1358394935,
 206             },
 207         },
 208         {
 209             'note': 'dailymotion embed',
 210             'url': 'https://vk.com/video-95168827_456239103?list=cca524a0f0d5557e16',
 211             'info_dict': {
 212                 'id': 'x8gfli0',
 213                 'ext': 'mp4',
 214                 'title': 'md5:45410f60ccd4b2760da98cb5fc777d70',
 215                 'description': 'md5:2e71c5c9413735cfa06cf1a166f16c84',
 216                 'uploader': 'Movies and cinema.',
 217                 'upload_date': '20221218',
 218                 'uploader_id': 'x1jdavv',
 219                 'timestamp': 1671387617,
 220                 'age_limit': 0,
 221                 'duration': 2918,
 222                 'like_count': int,
 223                 'view_count': int,
 224                 'thumbnail': r're:https?://.+x1080$',
 225                 'tags': list,
 226             },
 227             'skip': 'This video has been deleted and is no longer available.',
 228         },
 229         {
 230             'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
 231             'info_dict': {
 232                 'id': '-74006511_456247211',
 233                 'ext': 'mp4',
 234                 'comment_count': int,
 235                 'duration': 9,
 236                 'like_count': int,
 237                 'thumbnail': r're:https?://.+(?:\.jpg|getVideoPreview.*)$',
 238                 'timestamp': 1664995597,
 239                 'title': 'Clip by @madempress',
 240                 'upload_date': '20221005',
 241                 'uploader': 'Шальная Императрица',
 242                 'uploader_id': '-74006511',
 243             },
 244         },
 245         {
 246             # video key is extra_data not url\d+
 247             'url': 'https://vk.com/video-110305615_171782105',
 248             'md5': 'e13fcda136f99764872e739d13fac1d1',
 249             'info_dict': {
 250                 'id': '-110305615_171782105',
 251                 'ext': 'mp4',
 252                 'title': 'S-Dance, репетиции к The way show',
 253                 'uploader': 'THE WAY SHOW | 17 апреля',
 254                 'uploader_id': '-110305615',
 255                 'timestamp': 1454859345,
 256                 'upload_date': '20160207',
 257             },
 258             'skip': 'Removed',
 259         },
 260         {
 261             'note': 'finished live stream, postlive_mp4',
 262             'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
 263             'info_dict': {
 264                 'id': '-387766_456242764',
 265                 'ext': 'mp4',
 266                 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
 267                 'uploader': 'Игромания',
 268                 'duration': 5239,
 269                 'upload_date': '20160929',
 270                 'uploader_id': '-387766',
 271                 'timestamp': 1475137527,
 272                 'thumbnail': r're:https?://.+\.jpg$',
 273                 'comment_count': int,
 274                 'like_count': int,
 275             },
 276             'params': {
 277                 'skip_download': True,
 278             },
 279             'skip': 'No formats found',
 280         },
 281         {
 282             # live stream, hls and rtmp links, most likely already finished live
 283             # stream by the time you are reading this comment
 284             'url': 'https://vk.com/video-140332_456239111',
 285             'only_matching': True,
 286         },
 287         {
 288             # removed video, just testing that we match the pattern
 289             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
 290             'only_matching': True,
 291         },
 292         {
 293             # age restricted video, requires vk account credentials
 294             'url': 'https://vk.com/video205387401_164765225',
 295             'only_matching': True,
 296         },
 297         {
 298             # pladform embed
 299             'url': 'https://vk.com/video-76116461_171554880',
 300             'only_matching': True,
 301         },
 302         {
 303             'url': 'http://new.vk.com/video205387401_165548505',
 304             'only_matching': True,
 305         },
 306         {
 307             # This video is no longer available, because its author has been blocked.
 308             'url': 'https://vk.com/video-10639516_456240611',
 309             'only_matching': True,
 310         },
 311         {
 312             # The video is not available in your region.
 313             'url': 'https://vk.com/video-51812607_171445436',
 314             'only_matching': True,
 315         },
 316         {
 317             'url': 'https://vk.com/clip30014565_456240946',
 318             'only_matching': True,
 319         },
 320         {
 321             'url': 'https://vkvideo.ru/video-127553155_456242961',
 322             'only_matching': True,
 323         },
 324         {
 325             'url': 'https://vk.ru/video-220754053_456242564',
 326             'only_matching': True,
 327         },
 328     ]
 329
 330     def _real_extract(self, url):
 331         mobj = self._match_valid_url(url)
 332         video_id = mobj.group('videoid')
 333
 334         mv_data = {}
 335         if video_id:
 336             data = {
 337                 'act': 'show',
 338                 'video': video_id,
 339             }
 340             # Some videos (removed?) can only be downloaded with list id specified
 341             list_id = mobj.group('list_id')
 342             if list_id:
 343                 data['list'] = list_id
 344
 345             payload = self._download_payload('al_video', video_id, data)
 346             info_page = payload[1]
 347             opts = payload[-1]
 348             mv_data = opts.get('mvData') or {}
 349             player = opts.get('player') or {}
 350         else:
 351             video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
 352
 353             info_page = self._download_webpage(
 354                 'https://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
 355
 356             error_message = self._html_search_regex(
 357                 [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
 358                     r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
 359                 info_page, 'error message', default=None)
 360             if error_message:
 361                 raise ExtractorError(error_message, expected=True)
 362
 363             if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
 364                 raise ExtractorError(
 365                     'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
 366                     expected=True)
 367
 368             ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
 369
 370             ERRORS = {
 371                 r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
 372                 ERROR_COPYRIGHT,
 373
 374                 r'>The video .*? was removed from public access by request of the copyright holder.<':
 375                 ERROR_COPYRIGHT,
 376
 377                 r'<!>Please log in or <':
 378                 'Video %s is only available for registered users, '
 379                 'use --username and --password options to provide account credentials.',
 380
 381                 r'<!>Unknown error':
 382                 'Video %s does not exist.',
 383
 384                 r'<!>Видео временно недоступно':
 385                 'Video %s is temporarily unavailable.',
 386
 387                 r'<!>Access denied':
 388                 'Access denied to video %s.',
 389
 390                 r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
 391                 'Video %s is no longer available, because its author has been blocked.',
 392
 393                 r'<!>This video is no longer available, because its author has been blocked.':
 394                 'Video %s is no longer available, because its author has been blocked.',
 395
 396                 r'<!>This video is no longer available, because it has been deleted.':
 397                 'Video %s is no longer available, because it has been deleted.',
 398
 399                 r'<!>The video .+? is not available in your region.':
 400                 'Video %s is not available in your region.',
 401             }
 402
 403             for error_re, error_msg in ERRORS.items():
 404                 if re.search(error_re, info_page):
 405                     raise ExtractorError(error_msg % video_id, expected=True)
 406
 407             player = self._parse_json(self._search_regex(
 408                 r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
 409                 info_page, 'player params'), video_id)
 410
 411         youtube_url = YoutubeIE._extract_url(info_page)
 412         if youtube_url:
 413             return self.url_result(youtube_url, YoutubeIE.ie_key())
 414
 415         vimeo_url = VimeoIE._extract_url(url, info_page)
 416         if vimeo_url is not None:
 417             return self.url_result(vimeo_url, VimeoIE.ie_key())
 418
 419         pladform_url = PladformIE._extract_url(info_page)
 420         if pladform_url:
 421             return self.url_result(pladform_url, PladformIE.ie_key())
 422
 423         m_rutube = re.search(
 424             r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
 425         if m_rutube is not None:
 426             rutube_url = self._proto_relative_url(
 427                 m_rutube.group(1).replace('\\', ''))
 428             return self.url_result(rutube_url)
 429
 430         dailymotion_url = next(DailymotionIE._extract_embed_urls(url, info_page), None)
 431         if dailymotion_url:
 432             return self.url_result(dailymotion_url, DailymotionIE.ie_key())
 433
 434         odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
 435         if odnoklassniki_url:
 436             return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
 437
 438         sibnet_url = next(SibnetEmbedIE._extract_embed_urls(url, info_page), None)
 439         if sibnet_url:
 440             return self.url_result(sibnet_url)
 441
 442         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
 443         if m_opts:
 444             m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
 445             if m_opts_url:
 446                 opts_url = m_opts_url.group(1)
 447                 if opts_url.startswith('//'):
 448                     opts_url = 'https:' + opts_url
 449                 return self.url_result(opts_url)
 450
 451         data = player['params'][0]
 452         title = unescapeHTML(data['md_title'])
 453
 454         # 2 = live
 455         # 3 = post live (finished live)
 456         is_live = data.get('live') == 2
 457
 458         timestamp = unified_timestamp(self._html_search_regex(
 459             r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
 460             'upload date', default=None)) or int_or_none(data.get('date'))
 461
 462         view_count = str_to_int(self._search_regex(
 463             r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
 464             info_page, 'view count', default=None))
 465
 466         formats = []
 467         subtitles = {}
 468         for format_id, format_url in data.items():
 469             format_url = url_or_none(format_url)
 470             if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
 471                 continue
 472             if (format_id.startswith(('url', 'cache'))
 473                     or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
 474                 height = int_or_none(self._search_regex(
 475                     r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
 476                 formats.append({
 477                     'format_id': format_id,
 478                     'url': format_url,
 479                     'ext': 'mp4',
 480                     'source_preference': 1,
 481                     'height': height,
 482                 })
 483             elif format_id.startswith('hls') and format_id != 'hls_live_playback':
 484                 fmts, subs = self._extract_m3u8_formats_and_subtitles(
 485                     format_url, video_id, 'mp4', 'm3u8_native',
 486                     m3u8_id=format_id, fatal=False, live=is_live)
 487                 formats.extend(fmts)
 488                 self._merge_subtitles(subs, target=subtitles)
 489             elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'):
 490                 fmts, subs = self._extract_mpd_formats_and_subtitles(
 491                     format_url, video_id, mpd_id=format_id, fatal=False)
 492                 formats.extend(fmts)
 493                 self._merge_subtitles(subs, target=subtitles)
 494             elif format_id == 'rtmp':
 495                 formats.append({
 496                     'format_id': format_id,
 497                     'url': format_url,
 498                     'ext': 'flv',
 499                 })
 500
 501         for sub in data.get('subs') or {}:
 502             subtitles.setdefault(sub.get('lang', 'en'), []).append({
 503                 'ext': sub.get('title', '.srt').split('.')[-1],
 504                 'url': url_or_none(sub.get('url')),
 505             })
 506
 507         return {
 508             'id': video_id,
 509             'formats': formats,
 510             'title': title,
 511             'thumbnail': data.get('jpg'),
 512             'uploader': data.get('md_author'),
 513             'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
 514             'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
 515             'timestamp': timestamp,
 516             'view_count': view_count,
 517             'like_count': int_or_none(mv_data.get('likes')),
 518             'comment_count': int_or_none(mv_data.get('commcount')),
 519             'is_live': is_live,
 520             'subtitles': subtitles,
 521             '_format_sort_fields': ('res', 'source'),
 522         }
 523
 524
 525 class VKUserVideosIE(VKBaseIE):
 526     IE_NAME = 'vk:uservideos'
 527     IE_DESC = "VK - User's Videos"
 528     _BASE_URL_RE = r'https?://(?:(?:m|new)\.)?vk(?:video\.ru|\.com/video)'
 529     _VALID_URL = [
 530         rf'{_BASE_URL_RE}/playlist/(?P<id>-?\d+_\d+)',
 531         rf'{_BASE_URL_RE}/(?P<id>@[^/?#]+)(?:/all)?/?(?!\?.*\bz=video)(?:[?#]|$)',
 532     ]
 533     _TESTS = [{
 534         'url': 'https://vk.com/video/@mobidevices',
 535         'info_dict': {
 536             'id': '-17892518_all',
 537         },
 538         'playlist_mincount': 1355,
 539     }, {
 540         'url': 'https://vk.com/video/@mobidevices?section=uploaded',
 541         'info_dict': {
 542             'id': '-17892518_uploaded',
 543         },
 544         'playlist_mincount': 182,
 545     }, {
 546         'url': 'https://vkvideo.ru/playlist/-204353299_426',
 547         'info_dict': {
 548             'id': '-204353299_playlist_426',
 549         },
 550         'playlist_mincount': 33,
 551     }, {
 552         'url': 'https://vk.com/video/@gorkyfilmstudio/all',
 553         'only_matching': True,
 554     }, {
 555         'url': 'https://vkvideo.ru/@mobidevices',
 556         'only_matching': True,
 557     }, {
 558         'url': 'https://vk.com/video/playlist/-174476437_2',
 559         'only_matching': True,
 560     }]
 561     _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
 562
 563     def _entries(self, page_id, section):
 564         video_list_json = self._download_payload('al_video', page_id, {
 565             'act': 'load_videos_silent',
 566             'offset': 0,
 567             'oid': page_id,
 568             'section': section,
 569         })[0][section]
 570         count = video_list_json['count']
 571         total = video_list_json['total']
 572         video_list = video_list_json['list']
 573
 574         while True:
 575             for video in video_list:
 576                 v = self._VIDEO._make(video[:2])
 577                 video_id = '%d_%d' % (v.owner_id, v.id)
 578                 yield self.url_result(
 579                     'https://vk.com/video' + video_id, VKIE.ie_key(), video_id)
 580             if count >= total:
 581                 break
 582             video_list_json = self._download_payload('al_video', page_id, {
 583                 'act': 'load_videos_silent',
 584                 'offset': count,
 585                 'oid': page_id,
 586                 'section': section,
 587             })[0][section]
 588             new_count = video_list_json['count']
 589             if not new_count:
 590                 self.to_screen(f'{page_id}: Skipping {total - count} unavailable videos')
 591                 break
 592             count += new_count
 593             video_list = video_list_json['list']
 594
 595     def _real_extract(self, url):
 596         u_id = self._match_id(url)
 597         webpage = self._download_webpage(url, u_id)
 598
 599         if u_id.startswith('@'):
 600             page_id = traverse_obj(
 601                 self._search_json(r'\bvar newCur\s*=', webpage, 'cursor data', u_id),
 602                 ('oid', {int}, {str_or_none}, {require('page id')}))
 603             section = traverse_obj(parse_qs(url), ('section', 0)) or 'all'
 604         else:
 605             page_id, _, section = u_id.partition('_')
 606             section = f'playlist_{section}'
 607
 608         playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
 609         return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
 610
 611
 612 class VKWallPostIE(VKBaseIE):
 613     IE_NAME = 'vk:wallpost'
 614     _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
 615     _TESTS = [{
 616         # public page URL, audio playlist
 617         'url': 'https://vk.com/bs.official?w=wall-23538238_35',
 618         'info_dict': {
 619             'id': '-23538238_35',
 620             'title': 'Black Shadow - Wall post -23538238_35',
 621             'description': 'md5:190c78f905a53e0de793d83933c6e67f',
 622         },
 623         'playlist': [{
 624             'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
 625             'info_dict': {
 626                 'id': '135220665_111806521',
 627                 'ext': 'm4a',
 628                 'title': 'Black Shadow - Слепое Верование',
 629                 'duration': 370,
 630                 'uploader': 'Black Shadow',
 631                 'artist': 'Black Shadow',
 632                 'track': 'Слепое Верование',
 633             },
 634         }, {
 635             'md5': '4cc7e804579122b17ea95af7834c9233',
 636             'info_dict': {
 637                 'id': '135220665_111802303',
 638                 'ext': 'm4a',
 639                 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
 640                 'duration': 423,
 641                 'uploader': 'Black Shadow',
 642                 'artist': 'Black Shadow',
 643                 'track': 'Война - Негасимое Бездны Пламя!',
 644             },
 645         }],
 646         'params': {
 647             'skip_download': True,
 648         },
 649     }, {
 650         # single YouTube embed with irrelevant reaction videos
 651         'url': 'https://vk.com/wall-32370614_7173954',
 652         'info_dict': {
 653             'id': '-32370614_7173954',
 654             'title': 'md5:9f93c405bbc00061d34007d78c75e3bc',
 655             'description': 'md5:953b811f26fa9f21ee5856e2ea8e68fc',
 656         },
 657         'playlist_count': 1,
 658     }, {
 659         # wall page URL
 660         'url': 'https://vk.com/wall-23538238_35',
 661         'only_matching': True,
 662     }, {
 663         # mobile wall page URL
 664         'url': 'https://m.vk.com/wall-23538238_35',
 665         'only_matching': True,
 666     }]
 667     _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
 668     _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
 669
 670     def _decode(self, enc):
 671         dec = ''
 672         e = n = 0
 673         for c in enc:
 674             r = self._BASE64_CHARS.index(c)
 675             cond = n % 4
 676             e = 64 * e + r if cond else r
 677             n += 1
 678             if cond:
 679                 dec += chr(255 & e >> (-2 * n & 6))
 680         return dec
 681
 682     def _unmask_url(self, mask_url, vk_id):
 683         if 'audio_api_unavailable' in mask_url:
 684             extra = mask_url.split('?extra=')[1].split('#')
 685             func, base = self._decode(extra[1]).split(chr(11))
 686             mask_url = list(self._decode(extra[0]))
 687             url_len = len(mask_url)
 688             indexes = [None] * url_len
 689             index = int(base) ^ vk_id
 690             for n in range(url_len - 1, -1, -1):
 691                 index = (url_len * (n + 1) ^ index + n) % url_len
 692                 indexes[n] = index
 693             for n in range(1, url_len):
 694                 c = mask_url[n]
 695                 index = indexes[url_len - 1 - n]
 696                 mask_url[n] = mask_url[index]
 697                 mask_url[index] = c
 698             mask_url = ''.join(mask_url)
 699         return mask_url
 700
 701     def _real_extract(self, url):
 702         post_id = self._match_id(url)
 703
 704         webpage = self._download_payload('wkview', post_id, {
 705             'act': 'show',
 706             'w': 'wall' + post_id,
 707         })[1]
 708
 709         uploader = clean_html(get_element_by_class('PostHeaderTitle__authorName', webpage))
 710
 711         entries = []
 712
 713         for audio in re.findall(r'data-audio="([^"]+)', webpage):
 714             audio = self._parse_json(unescapeHTML(audio), post_id)
 715             if not audio['url']:
 716                 continue
 717             title = unescapeHTML(audio.get('title'))
 718             artist = unescapeHTML(audio.get('artist'))
 719             entries.append({
 720                 'id': f'{audio["owner_id"]}_{audio["id"]}',
 721                 'title': join_nonempty(artist, title, delim=' - '),
 722                 'thumbnails': try_call(lambda: [{'url': u} for u in audio['coverUrl'].split(',')]),
 723                 'duration': int_or_none(audio.get('duration')),
 724                 'uploader': uploader,
 725                 'artist': artist,
 726                 'track': title,
 727                 'formats': [{
 728                     'url': audio['url'],
 729                     'ext': 'm4a',
 730                     'vcodec': 'none',
 731                     'acodec': 'mp3',
 732                     'container': 'm4a_dash',
 733                 }],
 734             })
 735
 736         entries.extend(self.url_result(urljoin(url, entry), VKIE) for entry in set(re.findall(
 737             r'<a[^>]+href=(?:["\'])(/video(?:-?[\d_]+)[^"\']*)',
 738             get_element_html_by_id('wl_post_body', webpage))))
 739
 740         return self.playlist_result(
 741             entries, post_id, join_nonempty(uploader, f'Wall post {post_id}', delim=' - '),
 742             clean_html(get_element_by_class('wall_post_text', webpage)))
 743
 744
 745 class VKPlayBaseIE(InfoExtractor):
 746     _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vk(?:play|video)\.ru)/'
 747     _RESOLUTIONS = {
 748         'tiny': '256x144',
 749         'lowest': '426x240',
 750         'low': '640x360',
 751         'medium': '852x480',
 752         'high': '1280x720',
 753         'full_hd': '1920x1080',
 754         'quad_hd': '2560x1440',
 755     }
 756
 757     def _extract_from_initial_state(self, url, video_id, path):
 758         webpage = self._download_webpage(url, video_id)
 759         video_info = traverse_obj(self._search_json(
 760             r'<script[^>]+\bid="initial-state"[^>]*>', webpage, 'initial state', video_id),
 761             path, expected_type=dict)
 762         if not video_info:
 763             raise ExtractorError('Unable to extract video info from html inline initial state')
 764         return video_info
 765
 766     def _extract_formats(self, stream_info, video_id):
 767         formats = []
 768         for stream in traverse_obj(stream_info, (
 769                 'data', 0, 'playerUrls', lambda _, v: url_or_none(v['url']) and v['type'])):
 770             url = stream['url']
 771             format_id = str_or_none(stream['type'])
 772             if format_id in ('hls', 'live_hls', 'live_playback_hls') or '.m3u8' in url:
 773                 formats.extend(self._extract_m3u8_formats(url, video_id, m3u8_id=format_id, fatal=False))
 774             elif format_id == 'dash':
 775                 formats.extend(self._extract_mpd_formats(url, video_id, mpd_id=format_id, fatal=False))
 776             elif format_id in ('live_dash', 'live_playback_dash'):
 777                 self.write_debug(f'Not extracting unsupported format "{format_id}"')
 778             else:
 779                 formats.append({
 780                     'url': url,
 781                     'ext': 'mp4',
 782                     'format_id': format_id,
 783                     **parse_resolution(self._RESOLUTIONS.get(format_id)),
 784                 })
 785         return formats
 786
 787     def _extract_common_meta(self, stream_info):
 788         return traverse_obj(stream_info, {
 789             'id': ('id', {str_or_none}),
 790             'title': ('title', {str}),
 791             'release_timestamp': ('startTime', {int_or_none}),
 792             'thumbnail': ('previewUrl', {url_or_none}),
 793             'view_count': ('count', 'views', {int_or_none}),
 794             'like_count': ('count', 'likes', {int_or_none}),
 795             'categories': ('category', 'title', {str}, {lambda x: [x] if x else None}),
 796             'uploader': (('user', ('blog', 'owner')), 'nick', {str}),
 797             'uploader_id': (('user', ('blog', 'owner')), 'id', {str_or_none}),
 798             'duration': ('duration', {int_or_none}),
 799             'is_live': ('isOnline', {bool}),
 800             'concurrent_view_count': ('count', 'viewers', {int_or_none}),
 801         }, get_all=False)
 802
 803
 804 class VKPlayIE(VKPlayBaseIE):
 805     _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<username>[^/#?]+)/record/(?P<id>[\da-f-]+)'
 806     _TESTS = [{
 807         'url': 'https://vkplay.live/zitsmann/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da',
 808         'info_dict': {
 809             'id': 'f5e6e3b5-dc52-4d14-965d-0680dd2882da',
 810             'ext': 'mp4',
 811             'title': 'Atomic Heart (пробуем!) спасибо подписчику EKZO!',
 812             'uploader': 'ZitsmanN',
 813             'uploader_id': '13159830',
 814             'release_timestamp': 1683461378,
 815             'release_date': '20230507',
 816             'thumbnail': r're:https://[^/]+/public_video_stream/record/f5e6e3b5-dc52-4d14-965d-0680dd2882da/preview',
 817             'duration': 10608,
 818             'view_count': int,
 819             'like_count': int,
 820             'categories': ['Atomic Heart'],
 821         },
 822         'params': {'skip_download': 'm3u8'},
 823     }, {
 824         'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
 825         'only_matching': True,
 826     }, {
 827         'url': 'https://live.vkvideo.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
 828         'only_matching': True,
 829     }]
 830
 831     def _real_extract(self, url):
 832         username, video_id = self._match_valid_url(url).groups()
 833
 834         record_info = traverse_obj(self._download_json(
 835             f'https://api.vkplay.live/v1/blog/{username}/public_video_stream/record/{video_id}', video_id, fatal=False),
 836             ('data', 'record', {dict}))
 837         if not record_info:
 838             record_info = self._extract_from_initial_state(url, video_id, ('record', 'currentRecord', 'data'))
 839
 840         return {
 841             **self._extract_common_meta(record_info),
 842             'id': video_id,
 843             'formats': self._extract_formats(record_info, video_id),
 844         }
 845
 846
 847 class VKPlayLiveIE(VKPlayBaseIE):
 848     _VALID_URL = rf'{VKPlayBaseIE._BASE_URL_RE}(?P<id>[^/#?]+)/?(?:[#?]|$)'
 849     _TESTS = [{
 850         'url': 'https://vkplay.live/bayda',
 851         'info_dict': {
 852             'id': 'f02c321e-427b-408d-b12f-ae34e53e0ea2',
 853             'ext': 'mp4',
 854             'title': r're:эскапизм крута .*',
 855             'uploader': 'Bayda',
 856             'uploader_id': '12279401',
 857             'release_timestamp': 1687209962,
 858             'release_date': '20230619',
 859             'thumbnail': r're:https://[^/]+/public_video_stream/12279401/preview',
 860             'view_count': int,
 861             'concurrent_view_count': int,
 862             'like_count': int,
 863             'categories': ['EVE Online'],
 864             'live_status': 'is_live',
 865         },
 866         'skip': 'livestream',
 867         'params': {'skip_download': True},
 868     }, {
 869         'url': 'https://live.vkplay.ru/lebwa',
 870         'only_matching': True,
 871     }, {
 872         'url': 'https://live.vkvideo.ru/panterka',
 873         'only_matching': True,
 874     }]
 875
 876     def _real_extract(self, url):
 877         username = self._match_id(url)
 878
 879         stream_info = self._download_json(
 880             f'https://api.vkplay.live/v1/blog/{username}/public_video_stream', username, fatal=False)
 881         if not stream_info:
 882             stream_info = self._extract_from_initial_state(url, username, ('stream', 'stream', 'data', 'stream'))
 883
 884         formats = self._extract_formats(stream_info, username)
 885         if not formats and not traverse_obj(stream_info, ('isOnline', {bool})):
 886             raise UserNotLive(video_id=username)
 887
 888         return {
 889             **self._extract_common_meta(stream_info),
 890             'formats': formats,
 891         }