yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10 import uuid
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from ..dependencies import Cryptodome
  14 from ..networking.exceptions import HTTPError
  15 from ..utils import (
  16     ExtractorError,
  17     GeoRestrictedError,
  18     InAdvancePagedList,
  19     OnDemandPagedList,
  20     bool_or_none,
  21     clean_html,
  22     determine_ext,
  23     filter_dict,
  24     float_or_none,
  25     format_field,
  26     get_element_by_class,
  27     int_or_none,
  28     join_nonempty,
  29     make_archive_id,
  30     merge_dicts,
  31     mimetype2ext,
  32     parse_count,
  33     parse_qs,
  34     parse_resolution,
  35     qualities,
  36     smuggle_url,
  37     srt_subtitles_timecode,
  38     str_or_none,
  39     traverse_obj,
  40     unified_timestamp,
  41     unsmuggle_url,
  42     url_or_none,
  43     urlencode_postdata,
  44     variadic,
  45 )
  46
  47
  48 class BilibiliBaseIE(InfoExtractor):
  49     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
  50     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  51     _WBI_KEY_CACHE_TIMEOUT = 30  # exact expire timeout is unclear, use 30s for one session
  52     _wbi_key_cache = {}
  53
  54     @property
  55     def is_logged_in(self):
  56         return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
  57
  58     def _check_missing_formats(self, play_info, formats):
  59         parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
  60         missing_formats = join_nonempty(*[
  61             traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
  62             for fmt in traverse_obj(play_info, (
  63                 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
  64         if missing_formats:
  65             self.to_screen(
  66                 f'Format(s) {missing_formats} are missing; you have to login or '
  67                 f'become a premium member to download them. {self._login_hint()}')
  68
  69     def extract_formats(self, play_info):
  70         format_names = {
  71             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  72             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  73         }
  74
  75         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  76         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  77         if flac_audio:
  78             audios.append(flac_audio)
  79         formats = [{
  80             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  81             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  82             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  83             'vcodec': 'none',
  84             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  85             'filesize': int_or_none(audio.get('size')),
  86             'format_id': str_or_none(audio.get('id')),
  87         } for audio in audios]
  88
  89         formats.extend({
  90             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  91             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  92             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  93             'width': int_or_none(video.get('width')),
  94             'height': int_or_none(video.get('height')),
  95             'vcodec': video.get('codecs'),
  96             'acodec': 'none' if audios else None,
  97             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  98             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  99             'filesize': int_or_none(video.get('size')),
 100             'quality': int_or_none(video.get('id')),
 101             'format_id': traverse_obj(
 102                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
 103                 ('id', {str_or_none}), get_all=False),
 104             'format': format_names.get(video.get('id')),
 105         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
 106
 107         if formats:
 108             self._check_missing_formats(play_info, formats)
 109
 110         fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
 111             'url': ('url', {url_or_none}),
 112             'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
 113             'filesize': ('size', {int_or_none}),
 114         }))
 115         if fragments:
 116             formats.append({
 117                 'url': fragments[0]['url'],
 118                 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
 119                 **({
 120                     'fragments': fragments,
 121                     'protocol': 'http_dash_segments',
 122                 } if len(fragments) > 1 else {}),
 123                 **traverse_obj(play_info, {
 124                     'quality': ('quality', {int_or_none}),
 125                     'format_id': ('quality', {str_or_none}),
 126                     'format_note': ('quality', {lambda x: format_names.get(x)}),
 127                     'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
 128                 }),
 129                 **parse_resolution(format_names.get(play_info.get('quality'))),
 130             })
 131         return formats
 132
 133     def _get_wbi_key(self, video_id):
 134         if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
 135             return self._wbi_key_cache['key']
 136
 137         session_data = self._download_json(
 138             'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
 139
 140         lookup = ''.join(traverse_obj(session_data, (
 141             'data', 'wbi_img', ('img_url', 'sub_url'),
 142             {lambda x: x.rpartition('/')[2].partition('.')[0]})))
 143
 144         # from getMixinKey() in the vendor js
 145         mixin_key_enc_tab = [
 146             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
 147             33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
 148             61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
 149             36, 20, 34, 44, 52,
 150         ]
 151
 152         self._wbi_key_cache.update({
 153             'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
 154             'ts': time.time(),
 155         })
 156         return self._wbi_key_cache['key']
 157
 158     def _sign_wbi(self, params, video_id):
 159         params['wts'] = round(time.time())
 160         params = {
 161             k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
 162             for k, v in sorted(params.items())
 163         }
 164         query = urllib.parse.urlencode(params)
 165         params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
 166         return params
 167
 168     def _download_playinfo(self, bvid, cid, headers=None, qn=None):
 169         params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
 170         if qn:
 171             params['qn'] = qn
 172         return self._download_json(
 173             'https://api.bilibili.com/x/player/wbi/playurl', bvid,
 174             query=self._sign_wbi(params, bvid), headers=headers,
 175             note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
 176
 177     def json2srt(self, json_data):
 178         srt_data = ''
 179         for idx, line in enumerate(json_data.get('body') or []):
 180             srt_data += (f'{idx + 1}\n'
 181                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 182                          f'{line["content"]}\n\n')
 183         return srt_data
 184
 185     def _get_subtitles(self, video_id, cid, aid=None):
 186         subtitles = {
 187             'danmaku': [{
 188                 'ext': 'xml',
 189                 'url': f'https://comment.bilibili.com/{cid}.xml',
 190             }],
 191         }
 192
 193         video_info = self._download_json(
 194             'https://api.bilibili.com/x/player/v2', video_id,
 195             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 196             note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
 197         if traverse_obj(video_info, ('data', 'need_login_subtitle')):
 198             self.report_warning(
 199                 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
 200         for s in traverse_obj(video_info, (
 201                 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
 202             subtitles.setdefault(s['lan'], []).append({
 203                 'ext': 'srt',
 204                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
 205             })
 206         return subtitles
 207
 208     def _get_chapters(self, aid, cid):
 209         chapters = aid and cid and self._download_json(
 210             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 211             note='Extracting chapters', fatal=False, headers=self._HEADERS)
 212         return traverse_obj(chapters, ('data', 'view_points', ..., {
 213             'title': 'content',
 214             'start_time': 'from',
 215             'end_time': 'to',
 216         })) or None
 217
 218     def _get_comments(self, aid):
 219         for idx in itertools.count(1):
 220             replies = traverse_obj(
 221                 self._download_json(
 222                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 223                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 224                 ('data', 'replies'))
 225             if not replies:
 226                 return
 227             for children in map(self._get_all_children, replies):
 228                 yield from children
 229
 230     def _get_all_children(self, reply):
 231         yield {
 232             'author': traverse_obj(reply, ('member', 'uname')),
 233             'author_id': traverse_obj(reply, ('member', 'mid')),
 234             'id': reply.get('rpid'),
 235             'text': traverse_obj(reply, ('content', 'message')),
 236             'timestamp': reply.get('ctime'),
 237             'parent': reply.get('parent') or 'root',
 238         }
 239         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 240             yield from children
 241
 242     def _get_episodes_from_season(self, ss_id, url):
 243         season_info = self._download_json(
 244             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 245             note='Downloading season info', query={'season_id': ss_id},
 246             headers={'Referer': url, **self.geo_verification_headers()})
 247
 248         for entry in traverse_obj(season_info, (
 249                 'result', 'main_section', 'episodes',
 250                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 251             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 252
 253     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 254         cid_edges = cid_edges or {}
 255         division_data = self._download_json(
 256             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 257             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 258             note=f'Extracting divisions from edge {edge_id}')
 259         edges.setdefault(edge_id, {}).update(
 260             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 261                 'title': ('title', {str}),
 262                 'cid': ('cid', {int_or_none}),
 263             }), get_all=False))
 264
 265         edges[edge_id].update(traverse_obj(division_data, ('data', {
 266             'title': ('title', {str}),
 267             'choices': ('edges', 'questions', ..., 'choices', ..., {
 268                 'edge_id': ('id', {int_or_none}),
 269                 'cid': ('cid', {int_or_none}),
 270                 'text': ('option', {str}),
 271             }),
 272         })))
 273         # use dict to combine edges that use the same video section (same cid)
 274         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 275         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 276             if choice['edge_id'] not in edges:
 277                 edges[choice['edge_id']] = {'cid': choice['cid']}
 278                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 279         return cid_edges
 280
 281     def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
 282         graph_version = traverse_obj(
 283             self._download_json(
 284                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 285                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
 286             ('data', 'interaction', 'graph_version', {int_or_none}))
 287         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 288         for cid, edges in cid_edges.items():
 289             play_info = self._download_playinfo(video_id, cid, headers=headers)
 290             yield {
 291                 **metainfo,
 292                 'id': f'{video_id}_{cid}',
 293                 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
 294                 'formats': self.extract_formats(play_info),
 295                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 296                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 297                 'subtitles': self.extract_subtitles(video_id, cid),
 298             }
 299
 300
 301 class BiliBiliIE(BilibiliBaseIE):
 302     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 303
 304     _TESTS = [{
 305         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 306         'info_dict': {
 307             'id': 'BV13x41117TL',
 308             'title': '阿滴英文｜英文歌分享#6 "Closer',
 309             'ext': 'mp4',
 310             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 311             'uploader_id': '65880958',
 312             'uploader': '阿滴英文',
 313             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 314             'duration': 554.117,
 315             'tags': list,
 316             'comment_count': int,
 317             'upload_date': '20170301',
 318             'timestamp': 1488353834,
 319             'like_count': int,
 320             'view_count': int,
 321             '_old_archive_ids': ['bilibili 8903802_part1'],
 322         },
 323     }, {
 324         'note': 'old av URL version',
 325         'url': 'http://www.bilibili.com/video/av1074402/',
 326         'info_dict': {
 327             'id': 'BV11x411K7CN',
 328             'ext': 'mp4',
 329             'title': '【金坷垃】金泡沫',
 330             'uploader': '菊子桑',
 331             'uploader_id': '156160',
 332             'duration': 308.36,
 333             'upload_date': '20140420',
 334             'timestamp': 1397983878,
 335             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 336             'like_count': int,
 337             'comment_count': int,
 338             'view_count': int,
 339             'tags': list,
 340             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 341             '_old_archive_ids': ['bilibili 1074402_part1'],
 342         },
 343         'params': {'skip_download': True},
 344     }, {
 345         'note': 'Anthology',
 346         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 347         'info_dict': {
 348             'id': 'BV1bK411W797',
 349             'title': '物语中的人物是如何吐槽自己的OP的',
 350         },
 351         'playlist_count': 18,
 352         'playlist': [{
 353             'info_dict': {
 354                 'id': 'BV1bK411W797_p1',
 355                 'ext': 'mp4',
 356                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 357                 'tags': 'count:10',
 358                 'timestamp': 1589601697,
 359                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 360                 'uploader': '打牌还是打桩',
 361                 'uploader_id': '150259984',
 362                 'like_count': int,
 363                 'comment_count': int,
 364                 'upload_date': '20200516',
 365                 'view_count': int,
 366                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 367                 'duration': 90.314,
 368                 '_old_archive_ids': ['bilibili 498159642_part1'],
 369             },
 370         }],
 371     }, {
 372         'note': 'Specific page of Anthology',
 373         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 374         'info_dict': {
 375             'id': 'BV1bK411W797_p1',
 376             'ext': 'mp4',
 377             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 378             'tags': 'count:10',
 379             'timestamp': 1589601697,
 380             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 381             'uploader': '打牌还是打桩',
 382             'uploader_id': '150259984',
 383             'like_count': int,
 384             'comment_count': int,
 385             'upload_date': '20200516',
 386             'view_count': int,
 387             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 388             'duration': 90.314,
 389             '_old_archive_ids': ['bilibili 498159642_part1'],
 390         },
 391     }, {
 392         'url': 'https://www.bilibili.com/video/av8903802/',
 393         'info_dict': {
 394             'id': 'BV13x41117TL',
 395             'ext': 'mp4',
 396             'title': '阿滴英文｜英文歌分享#6 "Closer',
 397             'upload_date': '20170301',
 398             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 399             'timestamp': 1488353834,
 400             'uploader_id': '65880958',
 401             'uploader': '阿滴英文',
 402             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 403             'duration': 554.117,
 404             'tags': list,
 405             'comment_count': int,
 406             'view_count': int,
 407             'like_count': int,
 408             '_old_archive_ids': ['bilibili 8903802_part1'],
 409         },
 410         'params': {
 411             'skip_download': True,
 412         },
 413     }, {
 414         'note': 'video has chapter',
 415         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 416         'info_dict': {
 417             'id': 'BV1vL411G7N7',
 418             'ext': 'mp4',
 419             'title': '如何为你的B站视频添加进度条分段',
 420             'timestamp': 1634554558,
 421             'upload_date': '20211018',
 422             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 423             'tags': list,
 424             'uploader': '爱喝咖啡的当麻',
 425             'duration': 669.482,
 426             'uploader_id': '1680903',
 427             'chapters': 'count:6',
 428             'comment_count': int,
 429             'view_count': int,
 430             'like_count': int,
 431             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 432             '_old_archive_ids': ['bilibili 463665680_part1'],
 433         },
 434         'params': {'skip_download': True},
 435     }, {
 436         'note': 'video redirects to festival page',
 437         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 438         'info_dict': {
 439             'id': 'BV1wP4y1P72h',
 440             'ext': 'mp4',
 441             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 442             'timestamp': 1643947497,
 443             'upload_date': '20220204',
 444             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 445             'uploader': '叨叨冯聊音乐',
 446             'duration': 246.719,
 447             'uploader_id': '528182630',
 448             'view_count': int,
 449             'like_count': int,
 450             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 451             '_old_archive_ids': ['bilibili 893839363_part1'],
 452         },
 453     }, {
 454         'note': 'newer festival video',
 455         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 456         'info_dict': {
 457             'id': 'BV1ay4y1d77f',
 458             'ext': 'mp4',
 459             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 460             'timestamp': 1674273600,
 461             'upload_date': '20230121',
 462             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 463             'uploader': '果蝇轰',
 464             'duration': 1111.722,
 465             'uploader_id': '8469526',
 466             'view_count': int,
 467             'like_count': int,
 468             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 469             '_old_archive_ids': ['bilibili 778246196_part1'],
 470         },
 471     }, {
 472         'note': 'legacy flv/mp4 video',
 473         'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
 474         'info_dict': {
 475             'id': 'BV1ms411Q7vw_p4',
 476             'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
 477             'timestamp': 1458222815,
 478             'upload_date': '20160317',
 479             'description': '云南方言快乐生产线出品',
 480             'duration': float,
 481             'uploader': '一笑颠天',
 482             'uploader_id': '3916081',
 483             'view_count': int,
 484             'comment_count': int,
 485             'like_count': int,
 486             'tags': list,
 487             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 488             '_old_archive_ids': ['bilibili 4120229_part4'],
 489         },
 490         'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
 491         'playlist_count': 19,
 492         'playlist': [{
 493             'info_dict': {
 494                 'id': 'BV1ms411Q7vw_p4_0',
 495                 'ext': 'flv',
 496                 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
 497                 'duration': 399.102,
 498             },
 499         }],
 500     }, {
 501         'note': 'legacy mp4-only video',
 502         'url': 'https://www.bilibili.com/video/BV1nx411u79K',
 503         'info_dict': {
 504             'id': 'BV1nx411u79K',
 505             'ext': 'mp4',
 506             'title': '【练习室】201603声乐练习《No Air》with VigoVan',
 507             'timestamp': 1508893551,
 508             'upload_date': '20171025',
 509             'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
 510             'duration': 80.384,
 511             'uploader': '伯远',
 512             'uploader_id': '10584494',
 513             'comment_count': int,
 514             'view_count': int,
 515             'like_count': int,
 516             'tags': list,
 517             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 518             '_old_archive_ids': ['bilibili 15700301_part1'],
 519         },
 520     }, {
 521         'note': 'interactive/split-path video',
 522         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 523         'info_dict': {
 524             'id': 'BV1af4y1H7ga',
 525             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 526             'timestamp': 1630500414,
 527             'upload_date': '20210901',
 528             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 529             'tags': list,
 530             'uploader': '钉宫妮妮Ninico',
 531             'duration': 1503,
 532             'uploader_id': '8881297',
 533             'comment_count': int,
 534             'view_count': int,
 535             'like_count': int,
 536             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 537             '_old_archive_ids': ['bilibili 292734508_part1'],
 538         },
 539         'playlist_count': 33,
 540         'playlist': [{
 541             'info_dict': {
 542                 'id': 'BV1af4y1H7ga_400950101',
 543                 'ext': 'mp4',
 544                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 545                 'timestamp': 1630500414,
 546                 'upload_date': '20210901',
 547                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 548                 'tags': list,
 549                 'uploader': '钉宫妮妮Ninico',
 550                 'duration': 11.605,
 551                 'uploader_id': '8881297',
 552                 'comment_count': int,
 553                 'view_count': int,
 554                 'like_count': int,
 555                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 556                 '_old_archive_ids': ['bilibili 292734508_part1'],
 557             },
 558         }],
 559     }, {
 560         'note': '301 redirect to bangumi link',
 561         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 562         'info_dict': {
 563             'id': '288525',
 564             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 565             'ext': 'mp4',
 566             'series': '我和我的祖国',
 567             'series_id': '4780',
 568             'season': '幕后纪实',
 569             'season_id': '28609',
 570             'season_number': 1,
 571             'episode': '钱学森弹道和乘波体飞行器是什么？',
 572             'episode_id': '288525',
 573             'episode_number': 105,
 574             'duration': 1183.957,
 575             'timestamp': 1571648124,
 576             'upload_date': '20191021',
 577             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 578         },
 579     }, {
 580         'note': 'video has subtitles, which requires login',
 581         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 582         'info_dict': {
 583             'id': 'BV12N4y1M7rh',
 584             'ext': 'mp4',
 585             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 586             'tags': list,
 587             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 588             'duration': 313.557,
 589             'upload_date': '20220709',
 590             'uploader': '小夫太渴',
 591             'timestamp': 1657347907,
 592             'uploader_id': '1326814124',
 593             'comment_count': int,
 594             'view_count': int,
 595             'like_count': int,
 596             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 597             'subtitles': 'count:2',  # login required for CC subtitle
 598             '_old_archive_ids': ['bilibili 898179753_part1'],
 599         },
 600         'params': {'listsubtitles': True},
 601         'skip': 'login required for subtitle',
 602     }, {
 603         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 604         'info_dict': {
 605             'id': 'BV1jL41167ZG',
 606             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 607             'ext': 'mp4',
 608         },
 609         'skip': 'supporter-only video',
 610     }, {
 611         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 612         'info_dict': {
 613             'id': 'BV1Ks411f7aQ',
 614             'title': '【BD1080P】狼与香辛料I【华盟】',
 615             'ext': 'mp4',
 616         },
 617         'skip': 'login required',
 618     }, {
 619         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 620         'info_dict': {
 621             'id': 'BV1GJ411x7h7',
 622             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 623             'ext': 'mp4',
 624         },
 625         'skip': 'geo-restricted',
 626     }, {
 627         'note': 'has - in the last path segment of the url',
 628         'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
 629         'only_matching': True,
 630     }]
 631
 632     def _real_extract(self, url):
 633         video_id = self._match_id(url)
 634         headers = self.geo_verification_headers()
 635         webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
 636         if not self._match_valid_url(urlh.url):
 637             return self.url_result(urlh.url)
 638
 639         headers['Referer'] = url
 640
 641         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 642         is_festival = 'videoData' not in initial_state
 643         if is_festival:
 644             video_data = initial_state['videoInfo']
 645         else:
 646             play_info_obj = self._search_json(
 647                 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
 648             if not play_info_obj:
 649                 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 650                     self.raise_login_required()
 651                 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 652                     raise ExtractorError(
 653                         'This video may be deleted or geo-restricted. '
 654                         'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 655             play_info = traverse_obj(play_info_obj, ('data', {dict}))
 656             if not play_info:
 657                 if traverse_obj(play_info_obj, 'code') == 87007:
 658                     toast = get_element_by_class('tips-toast', webpage) or ''
 659                     msg = clean_html(
 660                         f'{get_element_by_class("belongs-to", toast) or ""}，'
 661                         + (get_element_by_class('level', toast) or ''))
 662                     raise ExtractorError(
 663                         f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
 664                 raise ExtractorError('Failed to extract play info')
 665             video_data = initial_state['videoData']
 666
 667         video_id, title = video_data['bvid'], video_data.get('title')
 668
 669         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 670         page_list_json = not is_festival and traverse_obj(
 671             self._download_json(
 672                 'https://api.bilibili.com/x/player/pagelist', video_id,
 673                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 674                 note='Extracting videos in anthology', headers=headers),
 675             'data', expected_type=list) or []
 676         is_anthology = len(page_list_json) > 1
 677
 678         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 679         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 680             return self.playlist_from_matches(
 681                 page_list_json, video_id, title, ie=BiliBiliIE,
 682                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 683
 684         if is_anthology:
 685             part_id = part_id or 1
 686             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 687
 688         aid = video_data.get('aid')
 689         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 690         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 691
 692         festival_info = {}
 693         if is_festival:
 694             play_info = self._download_playinfo(video_id, cid, headers=headers)
 695
 696             festival_info = traverse_obj(initial_state, {
 697                 'uploader': ('videoInfo', 'upName'),
 698                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 699                 'like_count': ('videoStatus', 'like', {int_or_none}),
 700                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 701             }, get_all=False)
 702
 703         metainfo = {
 704             **traverse_obj(initial_state, {
 705                 'uploader': ('upData', 'name'),
 706                 'uploader_id': ('upData', 'mid', {str_or_none}),
 707                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 708                 'tags': ('tags', ..., 'tag_name'),
 709                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 710             }),
 711             **festival_info,
 712             **traverse_obj(video_data, {
 713                 'description': 'desc',
 714                 'timestamp': ('pubdate', {int_or_none}),
 715                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 716                 'comment_count': ('stat', 'reply', {int_or_none}),
 717             }, get_all=False),
 718             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 719             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 720             'title': title,
 721             'http_headers': {'Referer': url},
 722         }
 723
 724         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 725         if is_interactive:
 726             return self.playlist_result(
 727                 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
 728                 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 729                 __post_extractor=self.extract_comments(aid))
 730         else:
 731             formats = self.extract_formats(play_info)
 732
 733             if not traverse_obj(play_info, ('dash')):
 734                 # we only have legacy formats and need additional work
 735                 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
 736                 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
 737                     formats.extend(traverse_obj(
 738                         self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
 739                         lambda _, v: not has_qn(v['quality'])))
 740                 self._check_missing_formats(play_info, formats)
 741                 flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
 742                 if flv_formats and len(flv_formats) < len(formats):
 743                     # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
 744                     if not self._configuration_arg('prefer_multi_flv'):
 745                         dropped_fmts = ', '.join(
 746                             f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
 747                         formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
 748                         if dropped_fmts:
 749                             self.to_screen(
 750                                 f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
 751                                 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
 752                     else:
 753                         formats = traverse_obj(
 754                             # XXX: Filtering by extractor-arg is for testing purposes
 755                             formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
 756                         ) or [max(flv_formats, key=lambda x: x['quality'])]
 757
 758             if traverse_obj(formats, (0, 'fragments')):
 759                 # We have flv formats, which are individual short videos with their own timestamps and metainfo
 760                 # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
 761                 return {
 762                     **metainfo,
 763                     '_type': 'multi_video',
 764                     'entries': [{
 765                         'id': f'{metainfo["id"]}_{idx}',
 766                         'title': metainfo['title'],
 767                         'http_headers': metainfo['http_headers'],
 768                         'formats': [{
 769                             **fragment,
 770                             'format_id': formats[0].get('format_id'),
 771                         }],
 772                         'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
 773                         '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
 774                     } for idx, fragment in enumerate(formats[0]['fragments'])],
 775                     'duration': float_or_none(play_info.get('timelength'), scale=1000),
 776                 }
 777             else:
 778                 return {
 779                     **metainfo,
 780                     'formats': formats,
 781                     'duration': float_or_none(play_info.get('timelength'), scale=1000),
 782                     'chapters': self._get_chapters(aid, cid),
 783                     'subtitles': self.extract_subtitles(video_id, cid),
 784                     '__post_extractor': self.extract_comments(aid),
 785                 }
 786
 787
 788 class BiliBiliBangumiIE(BilibiliBaseIE):
 789     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 790
 791     _TESTS = [{
 792         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 793         'info_dict': {
 794             'id': '21495',
 795             'ext': 'mp4',
 796             'series': '悠久之翼',
 797             'series_id': '774',
 798             'season': '第二季',
 799             'season_id': '1182',
 800             'season_number': 2,
 801             'episode': 'forever／ef',
 802             'episode_id': '21495',
 803             'episode_number': 12,
 804             'title': '12 forever／ef',
 805             'duration': 1420.791,
 806             'timestamp': 1320412200,
 807             'upload_date': '20111104',
 808             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 809         },
 810     }, {
 811         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 812         'info_dict': {
 813             'id': '267851',
 814             'ext': 'mp4',
 815             'series': '鬼灭之刃',
 816             'series_id': '4358',
 817             'season': '立志篇',
 818             'season_id': '26801',
 819             'season_number': 1,
 820             'episode': '残酷',
 821             'episode_id': '267851',
 822             'episode_number': 1,
 823             'title': '1 残酷',
 824             'duration': 1425.256,
 825             'timestamp': 1554566400,
 826             'upload_date': '20190406',
 827             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 828         },
 829         'skip': 'Geo-restricted',
 830     }, {
 831         'note': 'a making-of which falls outside main section',
 832         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 833         'info_dict': {
 834             'id': '345120',
 835             'ext': 'mp4',
 836             'series': '鬼灭之刃',
 837             'series_id': '4358',
 838             'season': '立志篇',
 839             'season_id': '26801',
 840             'season_number': 1,
 841             'episode': '炭治郎篇',
 842             'episode_id': '345120',
 843             'episode_number': 27,
 844             'title': '#1 炭治郎篇',
 845             'duration': 1922.129,
 846             'timestamp': 1602853860,
 847             'upload_date': '20201016',
 848             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 849         },
 850     }]
 851
 852     def _real_extract(self, url):
 853         episode_id = self._match_id(url)
 854         headers = self.geo_verification_headers()
 855         webpage = self._download_webpage(url, episode_id, headers=headers)
 856
 857         if '您所在的地区无法观看本片' in webpage:
 858             raise GeoRestrictedError('This video is restricted')
 859         elif '正在观看预览，大会员免费看全片' in webpage:
 860             self.raise_login_required('This video is for premium members only')
 861
 862         headers['Referer'] = url
 863         play_info = self._download_json(
 864             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 865             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 866             headers=headers)
 867         premium_only = play_info.get('code') == -10403
 868         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 869
 870         formats = self.extract_formats(play_info)
 871         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 872             self.raise_login_required('This video is for premium members only')
 873
 874         bangumi_info = self._download_json(
 875             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 876             query={'ep_id': episode_id}, headers=headers)['result']
 877
 878         episode_number, episode_info = next((
 879             (idx, ep) for idx, ep in enumerate(traverse_obj(
 880                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 881             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 882
 883         season_id = bangumi_info.get('season_id')
 884         season_number, season_title = season_id and next((
 885             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 886                 traverse_obj(bangumi_info, ('seasons', ...)))
 887             if e.get('season_id') == season_id
 888         ), (None, None))
 889
 890         aid = episode_info.get('aid')
 891
 892         return {
 893             'id': episode_id,
 894             'formats': formats,
 895             **traverse_obj(bangumi_info, {
 896                 'series': ('series', 'series_title', {str}),
 897                 'series_id': ('series', 'series_id', {str_or_none}),
 898                 'thumbnail': ('square_cover', {url_or_none}),
 899             }),
 900             **traverse_obj(episode_info, {
 901                 'episode': ('long_title', {str}),
 902                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 903                 'timestamp': ('pub_time', {int_or_none}),
 904                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 905             }),
 906             'episode_id': episode_id,
 907             'season': str_or_none(season_title),
 908             'season_id': str_or_none(season_id),
 909             'season_number': season_number,
 910             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 911             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 912             '__post_extractor': self.extract_comments(aid),
 913             'http_headers': {'Referer': url},
 914         }
 915
 916
 917 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 918     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 919     _TESTS = [{
 920         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 921         'info_dict': {
 922             'id': '24097891',
 923             'title': 'CAROLE & TUESDAY',
 924             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 925         },
 926         'playlist_mincount': 25,
 927     }, {
 928         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 929         'info_dict': {
 930             'id': '1565',
 931             'title': '攻壳机动队 S.A.C. 2nd GIG',
 932             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 933         },
 934         'playlist_count': 26,
 935         'playlist': [{
 936             'info_dict': {
 937                 'id': '68540',
 938                 'ext': 'mp4',
 939                 'series': '攻壳机动队',
 940                 'series_id': '1077',
 941                 'season': '第二季',
 942                 'season_id': '1565',
 943                 'season_number': 2,
 944                 'episode': '再启动 REEMBODY',
 945                 'episode_id': '68540',
 946                 'episode_number': 1,
 947                 'title': '1 再启动 REEMBODY',
 948                 'duration': 1525.777,
 949                 'timestamp': 1425074413,
 950                 'upload_date': '20150227',
 951                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 952             },
 953         }],
 954     }]
 955
 956     def _real_extract(self, url):
 957         media_id = self._match_id(url)
 958         webpage = self._download_webpage(url, media_id)
 959
 960         initial_state = self._search_json(
 961             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 962         ss_id = initial_state['mediaInfo']['season_id']
 963
 964         return self.playlist_result(
 965             self._get_episodes_from_season(ss_id, url), media_id,
 966             **traverse_obj(initial_state, ('mediaInfo', {
 967                 'title': ('title', {str}),
 968                 'description': ('evaluate', {str}),
 969             })))
 970
 971
 972 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 973     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 974     _TESTS = [{
 975         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 976         'info_dict': {
 977             'id': '26801',
 978             'title': '鬼灭之刃',
 979             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 980         },
 981         'playlist_mincount': 26,
 982     }, {
 983         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 984         'info_dict': {
 985             'id': '2251',
 986             'title': '玲音',
 987             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
 988         },
 989         'playlist_count': 13,
 990         'playlist': [{
 991             'info_dict': {
 992                 'id': '50188',
 993                 'ext': 'mp4',
 994                 'series': '玲音',
 995                 'series_id': '1526',
 996                 'season': 'TV',
 997                 'season_id': '2251',
 998                 'season_number': 1,
 999                 'episode': 'WEIRD',
1000                 'episode_id': '50188',
1001                 'episode_number': 1,
1002                 'title': '1 WEIRD',
1003                 'duration': 1436.992,
1004                 'timestamp': 1343185080,
1005                 'upload_date': '20120725',
1006                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1007             },
1008         }],
1009     }]
1010
1011     def _real_extract(self, url):
1012         ss_id = self._match_id(url)
1013         webpage = self._download_webpage(url, ss_id)
1014         metainfo = traverse_obj(
1015             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1016             ('itemListElement', ..., {
1017                 'title': ('name', {str}),
1018                 'description': ('description', {str}),
1019             }), get_all=False)
1020
1021         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1022
1023
1024 class BilibiliCheeseBaseIE(BilibiliBaseIE):
1025     def _extract_episode(self, season_info, ep_id):
1026         episode_info = traverse_obj(season_info, (
1027             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1028         aid, cid = episode_info['aid'], episode_info['cid']
1029
1030         if traverse_obj(episode_info, 'ep_status') == -1:
1031             raise ExtractorError('This course episode is not yet available.', expected=True)
1032         if not traverse_obj(episode_info, 'playable'):
1033             self.raise_login_required('You need to purchase the course to download this episode')
1034
1035         play_info = self._download_json(
1036             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1037             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1038             headers=self._HEADERS, note='Downloading playinfo')['data']
1039
1040         return {
1041             'id': str_or_none(ep_id),
1042             'episode_id': str_or_none(ep_id),
1043             'formats': self.extract_formats(play_info),
1044             'extractor_key': BilibiliCheeseIE.ie_key(),
1045             'extractor': BilibiliCheeseIE.IE_NAME,
1046             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1047             **traverse_obj(episode_info, {
1048                 'episode': ('title', {str}),
1049                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1050                 'alt_title': ('subtitle', {str}),
1051                 'duration': ('duration', {int_or_none}),
1052                 'episode_number': ('index', {int_or_none}),
1053                 'thumbnail': ('cover', {url_or_none}),
1054                 'timestamp': ('release_date', {int_or_none}),
1055                 'view_count': ('play', {int_or_none}),
1056             }),
1057             **traverse_obj(season_info, {
1058                 'uploader': ('up_info', 'uname', {str}),
1059                 'uploader_id': ('up_info', 'mid', {str_or_none}),
1060             }),
1061             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1062             '__post_extractor': self.extract_comments(aid),
1063             'http_headers': self._HEADERS,
1064         }
1065
1066     def _download_season_info(self, query_key, video_id):
1067         return self._download_json(
1068             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1069             headers=self._HEADERS, note='Downloading season info')['data']
1070
1071
1072 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1073     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1074     _TESTS = [{
1075         'url': 'https://www.bilibili.com/cheese/play/ep229832',
1076         'info_dict': {
1077             'id': '229832',
1078             'ext': 'mp4',
1079             'title': '1 - 课程先导片',
1080             'alt_title': '视频课 · 3分41秒',
1081             'uploader': '马督工',
1082             'uploader_id': '316568752',
1083             'episode': '课程先导片',
1084             'episode_id': '229832',
1085             'episode_number': 1,
1086             'duration': 221,
1087             'timestamp': 1695549606,
1088             'upload_date': '20230924',
1089             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1090             'view_count': int,
1091         },
1092     }]
1093
1094     def _real_extract(self, url):
1095         ep_id = self._match_id(url)
1096         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1097
1098
1099 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1100     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1101     _TESTS = [{
1102         'url': 'https://www.bilibili.com/cheese/play/ss5918',
1103         'info_dict': {
1104             'id': '5918',
1105             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
1106             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
1107         },
1108         'playlist': [{
1109             'info_dict': {
1110                 'id': '229832',
1111                 'ext': 'mp4',
1112                 'title': '1 - 课程先导片',
1113                 'alt_title': '视频课 · 3分41秒',
1114                 'uploader': '马督工',
1115                 'uploader_id': '316568752',
1116                 'episode': '课程先导片',
1117                 'episode_id': '229832',
1118                 'episode_number': 1,
1119                 'duration': 221,
1120                 'timestamp': 1695549606,
1121                 'upload_date': '20230924',
1122                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1123                 'view_count': int,
1124             },
1125         }],
1126         'params': {'playlist_items': '1'},
1127     }, {
1128         'url': 'https://www.bilibili.com/cheese/play/ss5918',
1129         'info_dict': {
1130             'id': '5918',
1131             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
1132             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
1133         },
1134         'playlist_mincount': 5,
1135         'skip': 'paid video in list',
1136     }]
1137
1138     def _get_cheese_entries(self, season_info):
1139         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1140             yield self._extract_episode(season_info, ep_id)
1141
1142     def _real_extract(self, url):
1143         season_id = self._match_id(url)
1144         season_info = self._download_season_info('season_id', season_id)
1145
1146         return self.playlist_result(
1147             self._get_cheese_entries(season_info), season_id,
1148             **traverse_obj(season_info, {
1149                 'title': ('title', {str}),
1150                 'description': ('subtitle', {str}),
1151             }))
1152
1153
1154 class BilibiliSpaceBaseIE(BilibiliBaseIE):
1155     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1156         first_page = fetch_page(0)
1157         metadata = get_metadata(first_page)
1158
1159         paged_list = InAdvancePagedList(
1160             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1161             metadata['page_count'], metadata['page_size'])
1162
1163         return metadata, paged_list
1164
1165
1166 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1167     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1168     _TESTS = [{
1169         'url': 'https://space.bilibili.com/3985676/video',
1170         'info_dict': {
1171             'id': '3985676',
1172         },
1173         'playlist_mincount': 178,
1174         'skip': 'login required',
1175     }, {
1176         'url': 'https://space.bilibili.com/313580179/video',
1177         'info_dict': {
1178             'id': '313580179',
1179         },
1180         'playlist_mincount': 92,
1181         'skip': 'login required',
1182     }]
1183
1184     def _real_extract(self, url):
1185         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1186         if not is_video_url:
1187             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1188                            'To download audios, add a "/audio" to the URL')
1189
1190         def fetch_page(page_idx):
1191             query = {
1192                 'keyword': '',
1193                 'mid': playlist_id,
1194                 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1195                 'order_avoided': 'true',
1196                 'platform': 'web',
1197                 'pn': page_idx + 1,
1198                 'ps': 30,
1199                 'tid': 0,
1200                 'web_location': 1550101,
1201             }
1202
1203             try:
1204                 response = self._download_json(
1205                     'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1206                     query=self._sign_wbi(query, playlist_id),
1207                     note=f'Downloading space page {page_idx}', headers={'Referer': url})
1208             except ExtractorError as e:
1209                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1210                     raise ExtractorError(
1211                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1212                 raise
1213             status_code = response['code']
1214             if status_code == -401:
1215                 raise ExtractorError(
1216                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1217             elif status_code == -352 and not self.is_logged_in:
1218                 self.raise_login_required('Request is rejected, you need to login to access playlist')
1219             elif status_code != 0:
1220                 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1221             return response['data']
1222
1223         def get_metadata(page_data):
1224             page_size = page_data['page']['ps']
1225             entry_count = page_data['page']['count']
1226             return {
1227                 'page_count': math.ceil(entry_count / page_size),
1228                 'page_size': page_size,
1229             }
1230
1231         def get_entries(page_data):
1232             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1233                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1234
1235         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1236         return self.playlist_result(paged_list, playlist_id)
1237
1238
1239 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1240     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1241     _TESTS = [{
1242         'url': 'https://space.bilibili.com/313580179/audio',
1243         'info_dict': {
1244             'id': '313580179',
1245         },
1246         'playlist_mincount': 1,
1247     }]
1248
1249     def _real_extract(self, url):
1250         playlist_id = self._match_id(url)
1251
1252         def fetch_page(page_idx):
1253             return self._download_json(
1254                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1255                 note=f'Downloading page {page_idx}',
1256                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1257
1258         def get_metadata(page_data):
1259             return {
1260                 'page_count': page_data['pageCount'],
1261                 'page_size': page_data['pageSize'],
1262             }
1263
1264         def get_entries(page_data):
1265             for entry in page_data.get('data', []):
1266                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1267
1268         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1269         return self.playlist_result(paged_list, playlist_id)
1270
1271
1272 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1273     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1274         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1275             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1276
1277     def _get_uploader(self, uid, playlist_id):
1278         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1279         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1280
1281     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1282         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1283         metadata.pop('page_count', None)
1284         metadata.pop('page_size', None)
1285         return metadata, page_list
1286
1287
1288 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1289     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1290     _TESTS = [{
1291         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1292         'info_dict': {
1293             'id': '2142762_57445',
1294             'title': '【完结】《底特律 变人》全结局流程解说',
1295             'description': '',
1296             'uploader': '老戴在此',
1297             'uploader_id': '2142762',
1298             'timestamp': int,
1299             'upload_date': str,
1300             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1301         },
1302         'playlist_mincount': 31,
1303     }]
1304
1305     def _real_extract(self, url):
1306         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1307         playlist_id = f'{mid}_{sid}'
1308
1309         def fetch_page(page_idx):
1310             return self._download_json(
1311                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1312                 playlist_id, note=f'Downloading page {page_idx}',
1313                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1314
1315         def get_metadata(page_data):
1316             page_size = page_data['page']['page_size']
1317             entry_count = page_data['page']['total']
1318             return {
1319                 'page_count': math.ceil(entry_count / page_size),
1320                 'page_size': page_size,
1321                 'uploader': self._get_uploader(mid, playlist_id),
1322                 **traverse_obj(page_data, {
1323                     'title': ('meta', 'name', {str}),
1324                     'description': ('meta', 'description', {str}),
1325                     'uploader_id': ('meta', 'mid', {str_or_none}),
1326                     'timestamp': ('meta', 'ptime', {int_or_none}),
1327                     'thumbnail': ('meta', 'cover', {url_or_none}),
1328                 }),
1329             }
1330
1331         def get_entries(page_data):
1332             return self._get_entries(page_data, 'archives')
1333
1334         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1335         return self.playlist_result(paged_list, playlist_id, **metadata)
1336
1337
1338 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1339     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1340     _TESTS = [{
1341         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1342         'info_dict': {
1343             'id': '1958703906_547718',
1344             'title': '直播回放',
1345             'description': '直播回放',
1346             'uploader': '靡烟miya',
1347             'uploader_id': '1958703906',
1348             'timestamp': 1637985853,
1349             'upload_date': '20211127',
1350             'modified_timestamp': int,
1351             'modified_date': str,
1352         },
1353         'playlist_mincount': 513,
1354     }]
1355
1356     def _real_extract(self, url):
1357         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1358         playlist_id = f'{mid}_{sid}'
1359         playlist_meta = traverse_obj(self._download_json(
1360             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1361         ), {
1362             'title': ('data', 'meta', 'name', {str}),
1363             'description': ('data', 'meta', 'description', {str}),
1364             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1365             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1366             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1367         })
1368
1369         def fetch_page(page_idx):
1370             return self._download_json(
1371                 'https://api.bilibili.com/x/series/archives',
1372                 playlist_id, note=f'Downloading page {page_idx}',
1373                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1374
1375         def get_metadata(page_data):
1376             page_size = page_data['page']['size']
1377             entry_count = page_data['page']['total']
1378             return {
1379                 'page_count': math.ceil(entry_count / page_size),
1380                 'page_size': page_size,
1381                 'uploader': self._get_uploader(mid, playlist_id),
1382                 **playlist_meta,
1383             }
1384
1385         def get_entries(page_data):
1386             return self._get_entries(page_data, 'archives')
1387
1388         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1389         return self.playlist_result(paged_list, playlist_id, **metadata)
1390
1391
1392 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1393     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1394     _TESTS = [{
1395         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1396         'info_dict': {
1397             'id': '1103407912',
1398             'title': '【V2】（旧）',
1399             'description': '',
1400             'uploader': '晓月春日',
1401             'uploader_id': '84912',
1402             'timestamp': 1604905176,
1403             'upload_date': '20201109',
1404             'modified_timestamp': int,
1405             'modified_date': str,
1406             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1407             'view_count': int,
1408             'like_count': int,
1409         },
1410         'playlist_mincount': 22,
1411     }, {
1412         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1413         'only_matching': True,
1414     }]
1415
1416     def _real_extract(self, url):
1417         fid = self._match_id(url)
1418
1419         list_info = self._download_json(
1420             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1421             fid, note='Downloading favlist metadata')
1422         if list_info['code'] == -403:
1423             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1424
1425         entries = self._get_entries(self._download_json(
1426             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1427             fid, note='Download favlist entries'), 'data')
1428
1429         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1430             'title': ('title', {str}),
1431             'description': ('intro', {str}),
1432             'uploader': ('upper', 'name', {str}),
1433             'uploader_id': ('upper', 'mid', {str_or_none}),
1434             'timestamp': ('ctime', {int_or_none}),
1435             'modified_timestamp': ('mtime', {int_or_none}),
1436             'thumbnail': ('cover', {url_or_none}),
1437             'view_count': ('cnt_info', 'play', {int_or_none}),
1438             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1439         })))
1440
1441
1442 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1443     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1444     _TESTS = [{
1445         'url': 'https://www.bilibili.com/watchlater/#/list',
1446         'info_dict': {
1447             'id': r're:\d+',
1448             'title': '稍后再看',
1449         },
1450         'playlist_mincount': 0,
1451         'skip': 'login required',
1452     }]
1453
1454     def _real_extract(self, url):
1455         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1456         watchlater_info = self._download_json(
1457             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1458         if watchlater_info['code'] == -101:
1459             self.raise_login_required(msg='You need to login to access your watchlater list')
1460         entries = self._get_entries(watchlater_info, ('data', 'list'))
1461         return self.playlist_result(entries, id=list_id, title='稍后再看')
1462
1463
1464 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1465     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1466     _TESTS = [{
1467         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1468         'info_dict': {
1469             'id': '5_547718',
1470             'title': '直播回放',
1471             'uploader': '靡烟miya',
1472             'uploader_id': '1958703906',
1473             'timestamp': 1637985853,
1474             'upload_date': '20211127',
1475         },
1476         'playlist_mincount': 513,
1477     }, {
1478         'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1479         'info_dict': {
1480             'id': 'BV1DU4y1r7tz',
1481             'ext': 'mp4',
1482             'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1483             'upload_date': '20220820',
1484             'description': '',
1485             'timestamp': 1661016330,
1486             'uploader_id': '1958703906',
1487             'uploader': '靡烟miya',
1488             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1489             'duration': 9552.903,
1490             'tags': list,
1491             'comment_count': int,
1492             'view_count': int,
1493             'like_count': int,
1494             '_old_archive_ids': ['bilibili 687146339_part1'],
1495         },
1496         'params': {'noplaylist': True},
1497     }, {
1498         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1499         'info_dict': {
1500             'id': '5_547718',
1501         },
1502         'playlist_mincount': 513,
1503         'skip': 'redirect url',
1504     }, {
1505         'url': 'https://www.bilibili.com/list/ml1103407912',
1506         'info_dict': {
1507             'id': '3_1103407912',
1508             'title': '【V2】（旧）',
1509             'uploader': '晓月春日',
1510             'uploader_id': '84912',
1511             'timestamp': 1604905176,
1512             'upload_date': '20201109',
1513             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1514         },
1515         'playlist_mincount': 22,
1516     }, {
1517         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1518         'info_dict': {
1519             'id': '3_1103407912',
1520         },
1521         'playlist_mincount': 22,
1522         'skip': 'redirect url',
1523     }, {
1524         'url': 'https://www.bilibili.com/list/watchlater',
1525         'info_dict': {
1526             'id': r're:2_\d+',
1527             'title': '稍后再看',
1528             'uploader': str,
1529             'uploader_id': str,
1530         },
1531         'playlist_mincount': 0,
1532         'skip': 'login required',
1533     }, {
1534         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1535         'info_dict': {'id': 'watchlater'},
1536         'playlist_mincount': 0,
1537         'skip': 'redirect url & login required',
1538     }]
1539
1540     def _extract_medialist(self, query, list_id):
1541         for page_num in itertools.count(1):
1542             page_data = self._download_json(
1543                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1544                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1545             )['data']
1546             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1547             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1548             if not page_data.get('has_more', False):
1549                 break
1550
1551     def _real_extract(self, url):
1552         list_id = self._match_id(url)
1553
1554         bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1555         if not self._yes_playlist(list_id, bvid):
1556             return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1557
1558         webpage = self._download_webpage(url, list_id)
1559         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1560         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1561             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1562             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1563             if error_code == -400 and list_id == 'watchlater':
1564                 self.raise_login_required('You need to login to access your watchlater playlist')
1565             elif error_code == -403:
1566                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1567             elif error_code == 11010:
1568                 raise ExtractorError('Playlist is no longer available', expected=True)
1569             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1570
1571         query = {
1572             'ps': 20,
1573             'with_current': False,
1574             **traverse_obj(initial_state, {
1575                 'type': ('playlist', 'type', {int_or_none}),
1576                 'biz_id': ('playlist', 'id', {int_or_none}),
1577                 'tid': ('tid', {int_or_none}),
1578                 'sort_field': ('sortFiled', {int_or_none}),
1579                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1580             }),
1581         }
1582         metadata = {
1583             'id': f'{query["type"]}_{query["biz_id"]}',
1584             **traverse_obj(initial_state, ('mediaListInfo', {
1585                 'title': ('title', {str}),
1586                 'uploader': ('upper', 'name', {str}),
1587                 'uploader_id': ('upper', 'mid', {str_or_none}),
1588                 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
1589                 'thumbnail': ('cover', {url_or_none}),
1590             })),
1591         }
1592         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1593
1594
1595 class BilibiliCategoryIE(InfoExtractor):
1596     IE_NAME = 'Bilibili category extractor'
1597     _MAX_RESULTS = 1000000
1598     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1599     _TESTS = [{
1600         'url': 'https://www.bilibili.com/v/kichiku/mad',
1601         'info_dict': {
1602             'id': 'kichiku: mad',
1603             'title': 'kichiku: mad',
1604         },
1605         'playlist_mincount': 45,
1606         'params': {
1607             'playlistend': 45,
1608         },
1609     }]
1610
1611     def _fetch_page(self, api_url, num_pages, query, page_num):
1612         parsed_json = self._download_json(
1613             api_url, query, query={'Search_key': query, 'pn': page_num},
1614             note=f'Extracting results from page {page_num} of {num_pages}')
1615
1616         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1617         if not video_list:
1618             raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1619
1620         for video in video_list:
1621             yield self.url_result(
1622                 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1623
1624     def _entries(self, category, subcategory, query):
1625         # map of categories : subcategories : RIDs
1626         rid_map = {
1627             'kichiku': {
1628                 'mad': 26,
1629                 'manual_vocaloid': 126,
1630                 'guide': 22,
1631                 'theatre': 216,
1632                 'course': 127,
1633             },
1634         }
1635
1636         if category not in rid_map:
1637             raise ExtractorError(
1638                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1639         if subcategory not in rid_map[category]:
1640             raise ExtractorError(
1641                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1642         rid_value = rid_map[category][subcategory]
1643
1644         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1645         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1646         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1647         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1648         if count is None or not size:
1649             raise ExtractorError('Failed to calculate either page count or size')
1650
1651         num_pages = math.ceil(count / size)
1652
1653         return OnDemandPagedList(functools.partial(
1654             self._fetch_page, api_url, num_pages, query), size)
1655
1656     def _real_extract(self, url):
1657         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1658         query = f'{category}: {subcategory}'
1659
1660         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1661
1662
1663 class BiliBiliSearchIE(SearchInfoExtractor):
1664     IE_DESC = 'Bilibili video search'
1665     _MAX_RESULTS = 100000
1666     _SEARCH_KEY = 'bilisearch'
1667     _TESTS = [{
1668         'url': 'bilisearch3:靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1669         'playlist_count': 3,
1670         'info_dict': {
1671             'id': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1672             'title': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1673         },
1674         'playlist': [{
1675             'info_dict': {
1676                 'id': 'BV1n44y1Q7sc',
1677                 'ext': 'mp4',
1678                 'title': '“出道一年，我怎么还在等你单推的女人睡觉后开播啊？”【一分钟了解靡烟miya】',
1679                 'timestamp': 1669889987,
1680                 'upload_date': '20221201',
1681                 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1682                 'tags': list,
1683                 'uploader': '靡烟miya',
1684                 'duration': 123.156,
1685                 'uploader_id': '1958703906',
1686                 'comment_count': int,
1687                 'view_count': int,
1688                 'like_count': int,
1689                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1690                 '_old_archive_ids': ['bilibili 988222410_part1'],
1691             },
1692         }],
1693     }]
1694
1695     def _search_results(self, query):
1696         if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1697             self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1698         for page_num in itertools.count(1):
1699             videos = self._download_json(
1700                 'https://api.bilibili.com/x/web-interface/search/type', query,
1701                 note=f'Extracting results from page {page_num}', query={
1702                     'Search_key': query,
1703                     'keyword': query,
1704                     'page': page_num,
1705                     'context': '',
1706                     'duration': 0,
1707                     'tids_2': '',
1708                     '__refresh__': 'true',
1709                     'search_type': 'video',
1710                     'tids': 0,
1711                     'highlight': 1,
1712                 })['data'].get('result')
1713             if not videos:
1714                 break
1715             for video in videos:
1716                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1717
1718
1719 class BilibiliAudioBaseIE(InfoExtractor):
1720     def _call_api(self, path, sid, query=None):
1721         if not query:
1722             query = {'sid': sid}
1723         return self._download_json(
1724             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1725             sid, query=query)['data']
1726
1727
1728 class BilibiliAudioIE(BilibiliAudioBaseIE):
1729     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1730     _TEST = {
1731         'url': 'https://www.bilibili.com/audio/au1003142',
1732         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1733         'info_dict': {
1734             'id': '1003142',
1735             'ext': 'm4a',
1736             'title': '【tsukimi】YELLOW / 神山羊',
1737             'artist': 'tsukimi',
1738             'comment_count': int,
1739             'description': 'YELLOW的mp3版！',
1740             'duration': 183,
1741             'subtitles': {
1742                 'origin': [{
1743                     'ext': 'lrc',
1744                 }],
1745             },
1746             'thumbnail': r're:^https?://.+\.jpg',
1747             'timestamp': 1564836614,
1748             'upload_date': '20190803',
1749             'uploader': 'tsukimi-つきみぐー',
1750             'view_count': int,
1751         },
1752     }
1753
1754     def _real_extract(self, url):
1755         au_id = self._match_id(url)
1756
1757         play_data = self._call_api('url', au_id)
1758         formats = [{
1759             'url': play_data['cdns'][0],
1760             'filesize': int_or_none(play_data.get('size')),
1761             'vcodec': 'none',
1762         }]
1763
1764         for a_format in formats:
1765             a_format.setdefault('http_headers', {}).update({
1766                 'Referer': url,
1767             })
1768
1769         song = self._call_api('song/info', au_id)
1770         title = song['title']
1771         statistic = song.get('statistic') or {}
1772
1773         subtitles = None
1774         lyric = song.get('lyric')
1775         if lyric:
1776             subtitles = {
1777                 'origin': [{
1778                     'url': lyric,
1779                 }],
1780             }
1781
1782         return {
1783             'id': au_id,
1784             'title': title,
1785             'formats': formats,
1786             'artist': song.get('author'),
1787             'comment_count': int_or_none(statistic.get('comment')),
1788             'description': song.get('intro'),
1789             'duration': int_or_none(song.get('duration')),
1790             'subtitles': subtitles,
1791             'thumbnail': song.get('cover'),
1792             'timestamp': int_or_none(song.get('passtime')),
1793             'uploader': song.get('uname'),
1794             'view_count': int_or_none(statistic.get('play')),
1795         }
1796
1797
1798 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1799     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1800     _TEST = {
1801         'url': 'https://www.bilibili.com/audio/am10624',
1802         'info_dict': {
1803             'id': '10624',
1804             'title': '每日新曲推荐（每日11:00更新）',
1805             'description': '每天11:00更新，为你推送最新音乐',
1806         },
1807         'playlist_count': 19,
1808     }
1809
1810     def _real_extract(self, url):
1811         am_id = self._match_id(url)
1812
1813         songs = self._call_api(
1814             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1815
1816         entries = []
1817         for song in songs:
1818             sid = str_or_none(song.get('id'))
1819             if not sid:
1820                 continue
1821             entries.append(self.url_result(
1822                 'https://www.bilibili.com/audio/au' + sid,
1823                 BilibiliAudioIE.ie_key(), sid))
1824
1825         if entries:
1826             album_data = self._call_api('menu/info', am_id) or {}
1827             album_title = album_data.get('title')
1828             if album_title:
1829                 for entry in entries:
1830                     entry['album'] = album_title
1831                 return self.playlist_result(
1832                     entries, am_id, album_title, album_data.get('intro'))
1833
1834         return self.playlist_result(entries, am_id)
1835
1836
1837 class BiliBiliPlayerIE(InfoExtractor):
1838     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1839     _TEST = {
1840         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1841         'only_matching': True,
1842     }
1843
1844     def _real_extract(self, url):
1845         video_id = self._match_id(url)
1846         return self.url_result(
1847             f'http://www.bilibili.tv/video/av{video_id}/',
1848             ie=BiliBiliIE.ie_key(), video_id=video_id)
1849
1850
1851 class BiliIntlBaseIE(InfoExtractor):
1852     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1853     _NETRC_MACHINE = 'biliintl'
1854     _HEADERS = {'Referer': 'https://www.bilibili.tv/'}
1855
1856     def _call_api(self, endpoint, *args, **kwargs):
1857         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1858         if json.get('code'):
1859             if json['code'] in (10004004, 10004005, 10023006):
1860                 self.raise_login_required()
1861             elif json['code'] == 10004001:
1862                 self.raise_geo_restricted()
1863             else:
1864                 if json.get('message') and str(json['code']) != json['message']:
1865                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1866                 else:
1867                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1868                 if kwargs.get('fatal'):
1869                     raise ExtractorError(errmsg)
1870                 else:
1871                     self.report_warning(errmsg)
1872         return json.get('data')
1873
1874     def json2srt(self, json):
1875         return '\n\n'.join(
1876             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1877             for i, line in enumerate(traverse_obj(json, (
1878                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1879
1880     def _get_subtitles(self, *, ep_id=None, aid=None):
1881         sub_json = self._call_api(
1882             '/web/v2/subtitle', ep_id or aid, fatal=False,
1883             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1884             query=filter_dict({
1885                 'platform': 'web',
1886                 's_locale': 'en_US',
1887                 'episode_id': ep_id,
1888                 'aid': aid,
1889             })) or {}
1890         subtitles = {}
1891         fetched_urls = set()
1892         for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1893             for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1894                 if url in fetched_urls:
1895                     continue
1896                 fetched_urls.add(url)
1897                 sub_ext = determine_ext(url)
1898                 sub_lang = sub.get('lang_key') or 'en'
1899
1900                 if sub_ext == 'ass':
1901                     subtitles.setdefault(sub_lang, []).append({
1902                         'ext': 'ass',
1903                         'url': url,
1904                     })
1905                 elif sub_ext == 'json':
1906                     sub_data = self._download_json(
1907                         url, ep_id or aid, fatal=False,
1908                         note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1909                         errnote='Unable to download subtitles')
1910
1911                     if sub_data:
1912                         subtitles.setdefault(sub_lang, []).append({
1913                             'ext': 'srt',
1914                             'data': self.json2srt(sub_data),
1915                         })
1916                 else:
1917                     self.report_warning('Unexpected subtitle extension', ep_id or aid)
1918
1919         return subtitles
1920
1921     def _get_formats(self, *, ep_id=None, aid=None):
1922         video_json = self._call_api(
1923             '/web/playurl', ep_id or aid, note='Downloading video formats',
1924             errnote='Unable to download video formats', query=filter_dict({
1925                 'platform': 'web',
1926                 'ep_id': ep_id,
1927                 'aid': aid,
1928             }))
1929         video_json = video_json['playurl']
1930         formats = []
1931         for vid in video_json.get('video') or []:
1932             video_res = vid.get('video_resource') or {}
1933             video_info = vid.get('stream_info') or {}
1934             if not video_res.get('url'):
1935                 continue
1936             formats.append({
1937                 'url': video_res['url'],
1938                 'ext': 'mp4',
1939                 'format_note': video_info.get('desc_words'),
1940                 'width': video_res.get('width'),
1941                 'height': video_res.get('height'),
1942                 'vbr': video_res.get('bandwidth'),
1943                 'acodec': 'none',
1944                 'vcodec': video_res.get('codecs'),
1945                 'filesize': video_res.get('size'),
1946             })
1947         for aud in video_json.get('audio_resource') or []:
1948             if not aud.get('url'):
1949                 continue
1950             formats.append({
1951                 'url': aud['url'],
1952                 'ext': 'mp4',
1953                 'abr': aud.get('bandwidth'),
1954                 'acodec': aud.get('codecs'),
1955                 'vcodec': 'none',
1956                 'filesize': aud.get('size'),
1957             })
1958
1959         return formats
1960
1961     def _parse_video_metadata(self, video_data):
1962         return {
1963             'title': video_data.get('title_display') or video_data.get('title'),
1964             'description': video_data.get('desc'),
1965             'thumbnail': video_data.get('cover'),
1966             'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1967             'episode_number': int_or_none(self._search_regex(
1968                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1969         }
1970
1971     def _perform_login(self, username, password):
1972         if not Cryptodome.RSA:
1973             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1974
1975         key_data = self._download_json(
1976             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1977             note='Downloading login key', errnote='Unable to download login key')['data']
1978
1979         public_key = Cryptodome.RSA.importKey(key_data['key'])
1980         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1981         login_post = self._download_json(
1982             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1983             data=urlencode_postdata({
1984                 'username': username,
1985                 'password': base64.b64encode(password_hash).decode('ascii'),
1986                 'keep_me': 'true',
1987                 's_locale': 'en_US',
1988                 'isTrusted': 'true',
1989             }), note='Logging in', errnote='Unable to log in')
1990         if login_post.get('code'):
1991             if login_post.get('message'):
1992                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1993             else:
1994                 raise ExtractorError('Unable to log in')
1995
1996
1997 class BiliIntlIE(BiliIntlBaseIE):
1998     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1999     _TESTS = [{
2000         # Bstation page
2001         'url': 'https://www.bilibili.tv/en/play/34613/341736',
2002         'info_dict': {
2003             'id': '341736',
2004             'ext': 'mp4',
2005             'title': 'E2 - The First Night',
2006             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2007             'episode_number': 2,
2008             'upload_date': '20201009',
2009             'episode': 'Episode 2',
2010             'timestamp': 1602259500,
2011             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2012             'chapters': [{
2013                 'start_time': 0,
2014                 'end_time': 76.242,
2015                 'title': '<Untitled Chapter 1>',
2016             }, {
2017                 'start_time': 76.242,
2018                 'end_time': 161.161,
2019                 'title': 'Intro',
2020             }, {
2021                 'start_time': 1325.742,
2022                 'end_time': 1403.903,
2023                 'title': 'Outro',
2024             }],
2025         },
2026     }, {
2027         # Non-Bstation page
2028         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2029         'info_dict': {
2030             'id': '11005006',
2031             'ext': 'mp4',
2032             'title': 'E3 - Who?',
2033             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2034             'episode_number': 3,
2035             'description': 'md5:e1a775e71a35c43f141484715470ad09',
2036             'episode': 'Episode 3',
2037             'upload_date': '20211219',
2038             'timestamp': 1639928700,
2039             'chapters': [{
2040                 'start_time': 0,
2041                 'end_time': 88.0,
2042                 'title': '<Untitled Chapter 1>',
2043             }, {
2044                 'start_time': 88.0,
2045                 'end_time': 156.0,
2046                 'title': 'Intro',
2047             }, {
2048                 'start_time': 1173.0,
2049                 'end_time': 1259.535,
2050                 'title': 'Outro',
2051             }],
2052         },
2053     }, {
2054         # Subtitle with empty content
2055         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2056         'info_dict': {
2057             'id': '10131790',
2058             'ext': 'mp4',
2059             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2060             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2061             'episode_number': 140,
2062         },
2063         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2064     }, {
2065         # episode comment extraction
2066         'url': 'https://www.bilibili.tv/en/play/34580/340317',
2067         'info_dict': {
2068             'id': '340317',
2069             'ext': 'mp4',
2070             'timestamp': 1604057820,
2071             'upload_date': '20201030',
2072             'episode_number': 5,
2073             'title': 'E5 - My Own Steel',
2074             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2075             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2076             'episode': 'Episode 5',
2077             'comment_count': int,
2078             'chapters': [{
2079                 'start_time': 0,
2080                 'end_time': 61.0,
2081                 'title': '<Untitled Chapter 1>',
2082             }, {
2083                 'start_time': 61.0,
2084                 'end_time': 134.0,
2085                 'title': 'Intro',
2086             }, {
2087                 'start_time': 1290.0,
2088                 'end_time': 1379.0,
2089                 'title': 'Outro',
2090             }],
2091         },
2092         'params': {
2093             'getcomments': True,
2094         },
2095     }, {
2096         # user generated content comment extraction
2097         'url': 'https://www.bilibili.tv/en/video/2045730385',
2098         'info_dict': {
2099             'id': '2045730385',
2100             'ext': 'mp4',
2101             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2102             'timestamp': 1667891924,
2103             'upload_date': '20221108',
2104             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2105             'comment_count': int,
2106             'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2107         },
2108         'params': {
2109             'getcomments': True,
2110         },
2111     }, {
2112         # episode id without intro and outro
2113         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2114         'info_dict': {
2115             'id': '11246489',
2116             'ext': 'mp4',
2117             'title': 'E1 - Operation \'Strix\' <Owl>',
2118             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2119             'timestamp': 1649516400,
2120             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2121             'episode': 'Episode 1',
2122             'episode_number': 1,
2123             'upload_date': '20220409',
2124         },
2125     }, {
2126         'url': 'https://www.biliintl.com/en/play/34613/341736',
2127         'only_matching': True,
2128     }, {
2129         # User-generated content (as opposed to a series licensed from a studio)
2130         'url': 'https://bilibili.tv/en/video/2019955076',
2131         'only_matching': True,
2132     }, {
2133         # No language in URL
2134         'url': 'https://www.bilibili.tv/video/2019955076',
2135         'only_matching': True,
2136     }, {
2137         # Uppercase language in URL
2138         'url': 'https://www.bilibili.tv/EN/video/2019955076',
2139         'only_matching': True,
2140     }]
2141
2142     @staticmethod
2143     def _make_url(video_id, series_id=None):
2144         if series_id:
2145             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2146         return f'https://www.bilibili.tv/en/video/{video_id}'
2147
2148     def _extract_video_metadata(self, url, video_id, season_id):
2149         url, smuggled_data = unsmuggle_url(url, {})
2150         if smuggled_data.get('title'):
2151             return smuggled_data
2152
2153         webpage = self._download_webpage(url, video_id)
2154         # Bstation layout
2155         initial_data = (
2156             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2157             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2158         video_data = traverse_obj(
2159             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2160
2161         if season_id and not video_data:
2162             # Non-Bstation layout, read through episode list
2163             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2164             video_data = traverse_obj(season_json, (
2165                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2166             ), expected_type=dict, get_all=False)
2167
2168         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2169         return merge_dicts(
2170             self._parse_video_metadata(video_data), {
2171                 'title': get_element_by_class(
2172                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2173                 'description': get_element_by_class(
2174                     'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2175             }, self._search_json_ld(webpage, video_id, default={}))
2176
2177     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2178         comment_api_raw_data = self._download_json(
2179             'https://api.bilibili.tv/reply/web/detail', display_id,
2180             note=f'Downloading reply comment of {root_id} - {next_id}',
2181             query={
2182                 'platform': 'web',
2183                 'ps': 20,  # comment's reply per page (default: 3)
2184                 'root': root_id,
2185                 'next': next_id,
2186             })
2187
2188         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2189             yield {
2190                 'author': traverse_obj(replies, ('member', 'name')),
2191                 'author_id': traverse_obj(replies, ('member', 'mid')),
2192                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2193                 'text': traverse_obj(replies, ('content', 'message')),
2194                 'id': replies.get('rpid'),
2195                 'like_count': int_or_none(replies.get('like_count')),
2196                 'parent': replies.get('parent'),
2197                 'timestamp': unified_timestamp(replies.get('ctime_text')),
2198             }
2199
2200         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2201             yield from self._get_comments_reply(
2202                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2203
2204     def _get_comments(self, video_id, ep_id):
2205         for i in itertools.count(0):
2206             comment_api_raw_data = self._download_json(
2207                 'https://api.bilibili.tv/reply/web/root', video_id,
2208                 note=f'Downloading comment page {i + 1}',
2209                 query={
2210                     'platform': 'web',
2211                     'pn': i,  # page number
2212                     'ps': 20,  # comment per page (default: 20)
2213                     'oid': video_id,
2214                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
2215                     'sort_type': 1,  # 1: best, 2: recent
2216                 })
2217
2218             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2219                 yield {
2220                     'author': traverse_obj(replies, ('member', 'name')),
2221                     'author_id': traverse_obj(replies, ('member', 'mid')),
2222                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2223                     'text': traverse_obj(replies, ('content', 'message')),
2224                     'id': replies.get('rpid'),
2225                     'like_count': int_or_none(replies.get('like_count')),
2226                     'timestamp': unified_timestamp(replies.get('ctime_text')),
2227                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2228                 }
2229                 if replies.get('count'):
2230                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2231
2232             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2233                 break
2234
2235     def _real_extract(self, url):
2236         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2237         video_id = ep_id or aid
2238         chapters = None
2239
2240         if ep_id:
2241             intro_ending_json = self._call_api(
2242                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2243                 video_id, fatal=False) or {}
2244             if intro_ending_json.get('skip'):
2245                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2246                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2247                 chapters = [{
2248                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2249                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2250                     'title': 'Intro',
2251                 }, {
2252                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2253                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2254                     'title': 'Outro',
2255                 }]
2256
2257         return {
2258             'id': video_id,
2259             **self._extract_video_metadata(url, video_id, season_id),
2260             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2261             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2262             'chapters': chapters,
2263             '__post_extractor': self.extract_comments(video_id, ep_id),
2264             'http_headers': self._HEADERS,
2265         }
2266
2267
2268 class BiliIntlSeriesIE(BiliIntlBaseIE):
2269     IE_NAME = 'biliIntl:series'
2270     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2271     _TESTS = [{
2272         'url': 'https://www.bilibili.tv/en/play/34613',
2273         'playlist_mincount': 15,
2274         'info_dict': {
2275             'id': '34613',
2276             'title': 'TONIKAWA: Over the Moon For You',
2277             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2278             'categories': ['Slice of life', 'Comedy', 'Romance'],
2279             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2280             'view_count': int,
2281         },
2282         'params': {
2283             'skip_download': True,
2284         },
2285     }, {
2286         'url': 'https://www.bilibili.tv/en/media/1048837',
2287         'info_dict': {
2288             'id': '1048837',
2289             'title': 'SPY×FAMILY',
2290             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2291             'categories': ['Adventure', 'Action', 'Comedy'],
2292             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2293             'view_count': int,
2294         },
2295         'playlist_mincount': 25,
2296     }, {
2297         'url': 'https://www.biliintl.com/en/play/34613',
2298         'only_matching': True,
2299     }, {
2300         'url': 'https://www.biliintl.com/EN/play/34613',
2301         'only_matching': True,
2302     }]
2303
2304     def _entries(self, series_id):
2305         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2306         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2307             episode_id = str(episode['episode_id'])
2308             yield self.url_result(smuggle_url(
2309                 BiliIntlIE._make_url(episode_id, series_id),
2310                 self._parse_video_metadata(episode),
2311             ), BiliIntlIE, episode_id)
2312
2313     def _real_extract(self, url):
2314         series_id = self._match_id(url)
2315         series_info = self._call_api(
2316             f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2317         return self.playlist_result(
2318             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2319             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2320             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2321
2322
2323 class BiliLiveIE(InfoExtractor):
2324     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2325
2326     _TESTS = [{
2327         'url': 'https://live.bilibili.com/196',
2328         'info_dict': {
2329             'id': '33989',
2330             'description': '周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）',
2331             'ext': 'flv',
2332             'title': '太空狼人杀联动，不被爆杀就算赢',
2333             'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2334             'timestamp': 1650802769,
2335         },
2336         'skip': 'not live',
2337     }, {
2338         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2339         'only_matching': True,
2340     }, {
2341         'url': 'https://live.bilibili.com/blanc/196',
2342         'only_matching': True,
2343     }]
2344
2345     _FORMATS = {
2346         80: {'format_id': 'low', 'format_note': '流畅'},
2347         150: {'format_id': 'high_res', 'format_note': '高清'},
2348         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2349         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2350         10000: {'format_id': 'source', 'format_note': '原画'},
2351         20000: {'format_id': '4K', 'format_note': '4K'},
2352         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2353     }
2354
2355     _quality = staticmethod(qualities(list(_FORMATS)))
2356
2357     def _call_api(self, path, room_id, query):
2358         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2359         if api_result.get('code') != 0:
2360             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2361         return api_result.get('data') or {}
2362
2363     def _parse_formats(self, qn, fmt):
2364         for codec in fmt.get('codec') or []:
2365             if codec.get('current_qn') != qn:
2366                 continue
2367             for url_info in codec['url_info']:
2368                 yield {
2369                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2370                     'ext': fmt.get('format_name'),
2371                     'vcodec': codec.get('codec_name'),
2372                     'quality': self._quality(qn),
2373                     **self._FORMATS[qn],
2374                 }
2375
2376     def _real_extract(self, url):
2377         room_id = self._match_id(url)
2378         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2379         if room_data.get('live_status') == 0:
2380             raise ExtractorError('Streamer is not live', expected=True)
2381
2382         formats = []
2383         for qn in self._FORMATS:
2384             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2385                 'room_id': room_id,
2386                 'qn': qn,
2387                 'codec': '0,1',
2388                 'format': '0,2',
2389                 'mask': '0',
2390                 'no_playurl': '0',
2391                 'platform': 'web',
2392                 'protocol': '0,1',
2393             })
2394             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2395                 formats.extend(self._parse_formats(qn, fmt))
2396
2397         return {
2398             'id': room_id,
2399             'title': room_data.get('title'),
2400             'description': room_data.get('description'),
2401             'thumbnail': room_data.get('user_cover'),
2402             'timestamp': stream_data.get('live_time'),
2403             'formats': formats,
2404             'is_live': True,
2405             'http_headers': {
2406                 'Referer': url,
2407             },
2408         }