yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10 import uuid
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from ..dependencies import Cryptodome
  14 from ..networking.exceptions import HTTPError
  15 from ..utils import (
  16     ExtractorError,
  17     GeoRestrictedError,
  18     InAdvancePagedList,
  19     OnDemandPagedList,
  20     bool_or_none,
  21     determine_ext,
  22     filter_dict,
  23     float_or_none,
  24     format_field,
  25     get_element_by_class,
  26     int_or_none,
  27     join_nonempty,
  28     make_archive_id,
  29     merge_dicts,
  30     mimetype2ext,
  31     parse_count,
  32     parse_qs,
  33     parse_resolution,
  34     qualities,
  35     smuggle_url,
  36     srt_subtitles_timecode,
  37     str_or_none,
  38     traverse_obj,
  39     unified_timestamp,
  40     unsmuggle_url,
  41     url_or_none,
  42     urlencode_postdata,
  43     variadic,
  44 )
  45
  46
  47 class BilibiliBaseIE(InfoExtractor):
  48     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
  49     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  50     _WBI_KEY_CACHE_TIMEOUT = 30  # exact expire timeout is unclear, use 30s for one session
  51     _wbi_key_cache = {}
  52
  53     @property
  54     def is_logged_in(self):
  55         return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
  56
  57     def _check_missing_formats(self, play_info, formats):
  58         parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
  59         missing_formats = join_nonempty(*[
  60             traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
  61             for fmt in traverse_obj(play_info, (
  62                 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
  63         if missing_formats:
  64             self.to_screen(
  65                 f'Format(s) {missing_formats} are missing; you have to '
  66                 f'become a premium member to download them. {self._login_hint()}')
  67
  68     def extract_formats(self, play_info):
  69         format_names = {
  70             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  71             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  72         }
  73
  74         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  75         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  76         if flac_audio:
  77             audios.append(flac_audio)
  78         formats = [{
  79             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  80             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  81             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  82             'vcodec': 'none',
  83             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  84             'filesize': int_or_none(audio.get('size')),
  85             'format_id': str_or_none(audio.get('id')),
  86         } for audio in audios]
  87
  88         formats.extend({
  89             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  90             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  91             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  92             'width': int_or_none(video.get('width')),
  93             'height': int_or_none(video.get('height')),
  94             'vcodec': video.get('codecs'),
  95             'acodec': 'none' if audios else None,
  96             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  97             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  98             'filesize': int_or_none(video.get('size')),
  99             'quality': int_or_none(video.get('id')),
 100             'format_id': traverse_obj(
 101                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
 102                 ('id', {str_or_none}), get_all=False),
 103             'format': format_names.get(video.get('id')),
 104         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
 105
 106         if formats:
 107             self._check_missing_formats(play_info, formats)
 108
 109         fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
 110             'url': ('url', {url_or_none}),
 111             'duration': ('length', {float_or_none(scale=1000)}),
 112             'filesize': ('size', {int_or_none}),
 113         }))
 114         if fragments:
 115             formats.append({
 116                 'url': fragments[0]['url'],
 117                 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
 118                 **({
 119                     'fragments': fragments,
 120                     'protocol': 'http_dash_segments',
 121                 } if len(fragments) > 1 else {}),
 122                 **traverse_obj(play_info, {
 123                     'quality': ('quality', {int_or_none}),
 124                     'format_id': ('quality', {str_or_none}),
 125                     'format_note': ('quality', {lambda x: format_names.get(x)}),
 126                     'duration': ('timelength', {float_or_none(scale=1000)}),
 127                 }),
 128                 **parse_resolution(format_names.get(play_info.get('quality'))),
 129             })
 130         return formats
 131
 132     def _get_wbi_key(self, video_id):
 133         if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
 134             return self._wbi_key_cache['key']
 135
 136         session_data = self._download_json(
 137             'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
 138
 139         lookup = ''.join(traverse_obj(session_data, (
 140             'data', 'wbi_img', ('img_url', 'sub_url'),
 141             {lambda x: x.rpartition('/')[2].partition('.')[0]})))
 142
 143         # from getMixinKey() in the vendor js
 144         mixin_key_enc_tab = [
 145             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
 146             33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
 147             61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
 148             36, 20, 34, 44, 52,
 149         ]
 150
 151         self._wbi_key_cache.update({
 152             'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
 153             'ts': time.time(),
 154         })
 155         return self._wbi_key_cache['key']
 156
 157     def _sign_wbi(self, params, video_id):
 158         params['wts'] = round(time.time())
 159         params = {
 160             k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
 161             for k, v in sorted(params.items())
 162         }
 163         query = urllib.parse.urlencode(params)
 164         params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
 165         return params
 166
 167     def _download_playinfo(self, bvid, cid, headers=None, query=None):
 168         params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
 169         if self.is_logged_in:
 170             params.pop('try_look', None)
 171         if qn := params.get('qn'):
 172             note = f'Downloading video format {qn} for cid {cid}'
 173         else:
 174             note = f'Downloading video formats for cid {cid}'
 175
 176         return self._download_json(
 177             'https://api.bilibili.com/x/player/wbi/playurl', bvid,
 178             query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
 179
 180     def json2srt(self, json_data):
 181         srt_data = ''
 182         for idx, line in enumerate(json_data.get('body') or []):
 183             srt_data += (f'{idx + 1}\n'
 184                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 185                          f'{line["content"]}\n\n')
 186         return srt_data
 187
 188     def _get_subtitles(self, video_id, cid, aid=None):
 189         subtitles = {
 190             'danmaku': [{
 191                 'ext': 'xml',
 192                 'url': f'https://comment.bilibili.com/{cid}.xml',
 193             }],
 194         }
 195
 196         video_info = self._download_json(
 197             'https://api.bilibili.com/x/player/wbi/v2', video_id,
 198             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 199             note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
 200         if traverse_obj(video_info, ('data', 'need_login_subtitle')):
 201             self.report_warning(
 202                 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
 203         for s in traverse_obj(video_info, (
 204                 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
 205             subtitles.setdefault(s['lan'], []).append({
 206                 'ext': 'srt',
 207                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
 208             })
 209         return subtitles
 210
 211     def _get_chapters(self, aid, cid):
 212         chapters = aid and cid and self._download_json(
 213             'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
 214             note='Extracting chapters', fatal=False, headers=self._HEADERS)
 215         return traverse_obj(chapters, ('data', 'view_points', ..., {
 216             'title': 'content',
 217             'start_time': 'from',
 218             'end_time': 'to',
 219         })) or None
 220
 221     def _get_comments(self, aid):
 222         for idx in itertools.count(1):
 223             replies = traverse_obj(
 224                 self._download_json(
 225                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 226                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 227                 ('data', 'replies'))
 228             if not replies:
 229                 return
 230             for children in map(self._get_all_children, replies):
 231                 yield from children
 232
 233     def _get_all_children(self, reply):
 234         yield {
 235             'author': traverse_obj(reply, ('member', 'uname')),
 236             'author_id': traverse_obj(reply, ('member', 'mid')),
 237             'id': reply.get('rpid'),
 238             'text': traverse_obj(reply, ('content', 'message')),
 239             'timestamp': reply.get('ctime'),
 240             'parent': reply.get('parent') or 'root',
 241         }
 242         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 243             yield from children
 244
 245     def _get_episodes_from_season(self, ss_id, url):
 246         season_info = self._download_json(
 247             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 248             note='Downloading season info', query={'season_id': ss_id},
 249             headers={'Referer': url, **self.geo_verification_headers()})
 250
 251         for entry in traverse_obj(season_info, (
 252                 'result', 'main_section', 'episodes',
 253                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 254             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 255
 256     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 257         cid_edges = cid_edges or {}
 258         division_data = self._download_json(
 259             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 260             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 261             note=f'Extracting divisions from edge {edge_id}')
 262         edges.setdefault(edge_id, {}).update(
 263             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 264                 'title': ('title', {str}),
 265                 'cid': ('cid', {int_or_none}),
 266             }), get_all=False))
 267
 268         edges[edge_id].update(traverse_obj(division_data, ('data', {
 269             'title': ('title', {str}),
 270             'choices': ('edges', 'questions', ..., 'choices', ..., {
 271                 'edge_id': ('id', {int_or_none}),
 272                 'cid': ('cid', {int_or_none}),
 273                 'text': ('option', {str}),
 274             }),
 275         })))
 276         # use dict to combine edges that use the same video section (same cid)
 277         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 278         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 279             if choice['edge_id'] not in edges:
 280                 edges[choice['edge_id']] = {'cid': choice['cid']}
 281                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 282         return cid_edges
 283
 284     def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
 285         graph_version = traverse_obj(
 286             self._download_json(
 287                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 288                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
 289             ('data', 'interaction', 'graph_version', {int_or_none}))
 290         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 291         for cid, edges in cid_edges.items():
 292             play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
 293             yield {
 294                 **metainfo,
 295                 'id': f'{video_id}_{cid}',
 296                 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
 297                 'formats': self.extract_formats(play_info),
 298                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 299                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 300                 'subtitles': self.extract_subtitles(video_id, cid),
 301             }
 302
 303
 304 class BiliBiliIE(BilibiliBaseIE):
 305     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 306
 307     _TESTS = [{
 308         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 309         'info_dict': {
 310             'id': 'BV13x41117TL',
 311             'title': '阿滴英文｜英文歌分享#6 "Closer',
 312             'ext': 'mp4',
 313             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 314             'uploader_id': '65880958',
 315             'uploader': '阿滴英文',
 316             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 317             'duration': 554.117,
 318             'tags': list,
 319             'comment_count': int,
 320             'upload_date': '20170301',
 321             'timestamp': 1488353834,
 322             'like_count': int,
 323             'view_count': int,
 324             '_old_archive_ids': ['bilibili 8903802_part1'],
 325         },
 326     }, {
 327         'note': 'old av URL version',
 328         'url': 'http://www.bilibili.com/video/av1074402/',
 329         'info_dict': {
 330             'id': 'BV11x411K7CN',
 331             'ext': 'mp4',
 332             'title': '【金坷垃】金泡沫',
 333             'uploader': '菊子桑',
 334             'uploader_id': '156160',
 335             'duration': 308.36,
 336             'upload_date': '20140420',
 337             'timestamp': 1397983878,
 338             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 339             'like_count': int,
 340             'comment_count': int,
 341             'view_count': int,
 342             'tags': list,
 343             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 344             '_old_archive_ids': ['bilibili 1074402_part1'],
 345         },
 346         'params': {'skip_download': True},
 347     }, {
 348         'note': 'Anthology',
 349         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 350         'info_dict': {
 351             'id': 'BV1bK411W797',
 352             'title': '物语中的人物是如何吐槽自己的OP的',
 353         },
 354         'playlist_count': 18,
 355         'playlist': [{
 356             'info_dict': {
 357                 'id': 'BV1bK411W797_p1',
 358                 'ext': 'mp4',
 359                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 360                 'tags': 'count:10',
 361                 'timestamp': 1589601697,
 362                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 363                 'uploader': '打牌还是打桩',
 364                 'uploader_id': '150259984',
 365                 'like_count': int,
 366                 'comment_count': int,
 367                 'upload_date': '20200516',
 368                 'view_count': int,
 369                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 370                 'duration': 90.314,
 371                 '_old_archive_ids': ['bilibili 498159642_part1'],
 372             },
 373         }],
 374     }, {
 375         'note': 'Specific page of Anthology',
 376         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 377         'info_dict': {
 378             'id': 'BV1bK411W797_p1',
 379             'ext': 'mp4',
 380             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 381             'tags': 'count:10',
 382             'timestamp': 1589601697,
 383             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 384             'uploader': '打牌还是打桩',
 385             'uploader_id': '150259984',
 386             'like_count': int,
 387             'comment_count': int,
 388             'upload_date': '20200516',
 389             'view_count': int,
 390             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 391             'duration': 90.314,
 392             '_old_archive_ids': ['bilibili 498159642_part1'],
 393         },
 394     }, {
 395         'url': 'https://www.bilibili.com/video/av8903802/',
 396         'info_dict': {
 397             'id': 'BV13x41117TL',
 398             'ext': 'mp4',
 399             'title': '阿滴英文｜英文歌分享#6 "Closer',
 400             'upload_date': '20170301',
 401             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 402             'timestamp': 1488353834,
 403             'uploader_id': '65880958',
 404             'uploader': '阿滴英文',
 405             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 406             'duration': 554.117,
 407             'tags': list,
 408             'comment_count': int,
 409             'view_count': int,
 410             'like_count': int,
 411             '_old_archive_ids': ['bilibili 8903802_part1'],
 412         },
 413         'params': {
 414             'skip_download': True,
 415         },
 416     }, {
 417         'note': 'video has chapter',
 418         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 419         'info_dict': {
 420             'id': 'BV1vL411G7N7',
 421             'ext': 'mp4',
 422             'title': '如何为你的B站视频添加进度条分段',
 423             'timestamp': 1634554558,
 424             'upload_date': '20211018',
 425             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 426             'tags': list,
 427             'uploader': '爱喝咖啡的当麻',
 428             'duration': 669.482,
 429             'uploader_id': '1680903',
 430             'chapters': 'count:6',
 431             'comment_count': int,
 432             'view_count': int,
 433             'like_count': int,
 434             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 435             '_old_archive_ids': ['bilibili 463665680_part1'],
 436         },
 437         'params': {'skip_download': True},
 438     }, {
 439         'note': 'video redirects to festival page',
 440         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 441         'info_dict': {
 442             'id': 'BV1wP4y1P72h',
 443             'ext': 'mp4',
 444             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 445             'timestamp': 1643947497,
 446             'upload_date': '20220204',
 447             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 448             'uploader': '叨叨冯聊音乐',
 449             'duration': 246.719,
 450             'uploader_id': '528182630',
 451             'view_count': int,
 452             'like_count': int,
 453             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 454             '_old_archive_ids': ['bilibili 893839363_part1'],
 455         },
 456     }, {
 457         'note': 'newer festival video',
 458         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 459         'info_dict': {
 460             'id': 'BV1ay4y1d77f',
 461             'ext': 'mp4',
 462             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 463             'timestamp': 1674273600,
 464             'upload_date': '20230121',
 465             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 466             'uploader': '果蝇轰',
 467             'duration': 1111.722,
 468             'uploader_id': '8469526',
 469             'view_count': int,
 470             'like_count': int,
 471             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 472             '_old_archive_ids': ['bilibili 778246196_part1'],
 473         },
 474     }, {
 475         'note': 'legacy flv/mp4 video',
 476         'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
 477         'info_dict': {
 478             'id': 'BV1ms411Q7vw_p4',
 479             'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
 480             'timestamp': 1458222815,
 481             'upload_date': '20160317',
 482             'description': '云南方言快乐生产线出品',
 483             'duration': float,
 484             'uploader': '一笑颠天',
 485             'uploader_id': '3916081',
 486             'view_count': int,
 487             'comment_count': int,
 488             'like_count': int,
 489             'tags': list,
 490             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 491             '_old_archive_ids': ['bilibili 4120229_part4'],
 492         },
 493         'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
 494         'playlist_count': 19,
 495         'playlist': [{
 496             'info_dict': {
 497                 'id': 'BV1ms411Q7vw_p4_0',
 498                 'ext': 'flv',
 499                 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
 500                 'duration': 399.102,
 501             },
 502         }],
 503     }, {
 504         'note': 'legacy mp4-only video',
 505         'url': 'https://www.bilibili.com/video/BV1nx411u79K',
 506         'info_dict': {
 507             'id': 'BV1nx411u79K',
 508             'ext': 'mp4',
 509             'title': '【练习室】201603声乐练习《No Air》with VigoVan',
 510             'timestamp': 1508893551,
 511             'upload_date': '20171025',
 512             'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
 513             'duration': 80.384,
 514             'uploader': '伯远',
 515             'uploader_id': '10584494',
 516             'comment_count': int,
 517             'view_count': int,
 518             'like_count': int,
 519             'tags': list,
 520             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 521             '_old_archive_ids': ['bilibili 15700301_part1'],
 522         },
 523     }, {
 524         'note': 'interactive/split-path video',
 525         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 526         'info_dict': {
 527             'id': 'BV1af4y1H7ga',
 528             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 529             'timestamp': 1630500414,
 530             'upload_date': '20210901',
 531             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 532             'tags': list,
 533             'uploader': '钉宫妮妮Ninico',
 534             'duration': 1503,
 535             'uploader_id': '8881297',
 536             'comment_count': int,
 537             'view_count': int,
 538             'like_count': int,
 539             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 540             '_old_archive_ids': ['bilibili 292734508_part1'],
 541         },
 542         'playlist_count': 33,
 543         'playlist': [{
 544             'info_dict': {
 545                 'id': 'BV1af4y1H7ga_400950101',
 546                 'ext': 'mp4',
 547                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 548                 'timestamp': 1630500414,
 549                 'upload_date': '20210901',
 550                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 551                 'tags': list,
 552                 'uploader': '钉宫妮妮Ninico',
 553                 'duration': 11.605,
 554                 'uploader_id': '8881297',
 555                 'comment_count': int,
 556                 'view_count': int,
 557                 'like_count': int,
 558                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 559                 '_old_archive_ids': ['bilibili 292734508_part1'],
 560             },
 561         }],
 562     }, {
 563         'note': '301 redirect to bangumi link',
 564         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 565         'info_dict': {
 566             'id': '288525',
 567             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 568             'ext': 'mp4',
 569             'series': '我和我的祖国',
 570             'series_id': '4780',
 571             'season': '幕后纪实',
 572             'season_id': '28609',
 573             'season_number': 1,
 574             'episode': '钱学森弹道和乘波体飞行器是什么？',
 575             'episode_id': '288525',
 576             'episode_number': 105,
 577             'duration': 1183.957,
 578             'timestamp': 1571648124,
 579             'upload_date': '20191021',
 580             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 581         },
 582     }, {
 583         'note': 'video has subtitles, which requires login',
 584         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 585         'info_dict': {
 586             'id': 'BV12N4y1M7rh',
 587             'ext': 'mp4',
 588             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 589             'tags': list,
 590             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 591             'duration': 313.557,
 592             'upload_date': '20220709',
 593             'uploader': '小夫太渴',
 594             'timestamp': 1657347907,
 595             'uploader_id': '1326814124',
 596             'comment_count': int,
 597             'view_count': int,
 598             'like_count': int,
 599             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 600             'subtitles': 'count:2',  # login required for CC subtitle
 601             '_old_archive_ids': ['bilibili 898179753_part1'],
 602         },
 603         'params': {'listsubtitles': True},
 604         'skip': 'login required for subtitle',
 605     }, {
 606         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 607         'info_dict': {
 608             'id': 'BV1jL41167ZG',
 609             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 610             'ext': 'mp4',
 611         },
 612         'skip': 'supporter-only video',
 613     }, {
 614         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 615         'info_dict': {
 616             'id': 'BV1Ks411f7aQ',
 617             'title': '【BD1080P】狼与香辛料I【华盟】',
 618             'ext': 'mp4',
 619         },
 620         'skip': 'login required',
 621     }, {
 622         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 623         'info_dict': {
 624             'id': 'BV1GJ411x7h7',
 625             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 626             'ext': 'mp4',
 627         },
 628         'skip': 'geo-restricted',
 629     }, {
 630         'note': 'has - in the last path segment of the url',
 631         'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
 632         'only_matching': True,
 633     }]
 634
 635     def _real_extract(self, url):
 636         video_id = self._match_id(url)
 637         headers = self.geo_verification_headers()
 638         webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
 639         if not self._match_valid_url(urlh.url):
 640             return self.url_result(urlh.url)
 641
 642         headers['Referer'] = url
 643
 644         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 645
 646         if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 647             self.raise_login_required()
 648         if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 649             raise ExtractorError(
 650                 'This video may be deleted or geo-restricted. '
 651                 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 652
 653         is_festival = 'videoData' not in initial_state
 654         if is_festival:
 655             video_data = initial_state['videoInfo']
 656         else:
 657             video_data = initial_state['videoData']
 658
 659         video_id, title = video_data['bvid'], video_data.get('title')
 660
 661         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 662         page_list_json = (not is_festival and traverse_obj(
 663             self._download_json(
 664                 'https://api.bilibili.com/x/player/pagelist', video_id,
 665                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 666                 note='Extracting videos in anthology', headers=headers),
 667             'data', expected_type=list)) or []
 668         is_anthology = len(page_list_json) > 1
 669
 670         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 671         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 672             return self.playlist_from_matches(
 673                 page_list_json, video_id, title, ie=BiliBiliIE,
 674                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 675
 676         if is_anthology:
 677             part_id = part_id or 1
 678             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 679
 680         aid = video_data.get('aid')
 681         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 682         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 683
 684         festival_info = {}
 685         if is_festival:
 686             festival_info = traverse_obj(initial_state, {
 687                 'uploader': ('videoInfo', 'upName'),
 688                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 689                 'like_count': ('videoStatus', 'like', {int_or_none}),
 690                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 691             }, get_all=False)
 692
 693         metainfo = {
 694             **traverse_obj(initial_state, {
 695                 'uploader': ('upData', 'name'),
 696                 'uploader_id': ('upData', 'mid', {str_or_none}),
 697                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 698                 'tags': ('tags', ..., 'tag_name'),
 699                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 700             }),
 701             **festival_info,
 702             **traverse_obj(video_data, {
 703                 'description': 'desc',
 704                 'timestamp': ('pubdate', {int_or_none}),
 705                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 706                 'comment_count': ('stat', 'reply', {int_or_none}),
 707             }, get_all=False),
 708             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 709             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 710             'title': title,
 711             'http_headers': {'Referer': url},
 712         }
 713
 714         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 715         if is_interactive:
 716             return self.playlist_result(
 717                 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
 718                 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 719                 __post_extractor=self.extract_comments(aid))
 720
 721         play_info = None
 722         if self.is_logged_in:
 723             play_info = traverse_obj(
 724                 self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
 725                 ('data', {dict}))
 726         if not play_info:
 727             play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
 728         formats = self.extract_formats(play_info)
 729
 730         if video_data.get('is_upower_exclusive'):
 731             high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
 732             msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim="，")}. {self._login_hint()}'
 733             if not formats:
 734                 raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
 735             if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
 736                 self.report_warning(
 737                     f'This is a supporter-only video, only the preview will be extracted: {msg}',
 738                     video_id=video_id)
 739
 740         if not traverse_obj(play_info, 'dash'):
 741             # we only have legacy formats and need additional work
 742             has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
 743             for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
 744                 formats.extend(traverse_obj(
 745                     self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
 746                     lambda _, v: not has_qn(v['quality'])))
 747             self._check_missing_formats(play_info, formats)
 748             flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
 749             if flv_formats and len(flv_formats) < len(formats):
 750                 # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
 751                 if not self._configuration_arg('prefer_multi_flv'):
 752                     dropped_fmts = ', '.join(
 753                         f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
 754                     formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
 755                     if dropped_fmts:
 756                         self.to_screen(
 757                             f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
 758                             'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
 759                 else:
 760                     formats = traverse_obj(
 761                         # XXX: Filtering by extractor-arg is for testing purposes
 762                         formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
 763                     ) or [max(flv_formats, key=lambda x: x['quality'])]
 764
 765         if traverse_obj(formats, (0, 'fragments')):
 766             # We have flv formats, which are individual short videos with their own timestamps and metainfo
 767             # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
 768             return {
 769                 **metainfo,
 770                 '_type': 'multi_video',
 771                 'entries': [{
 772                     'id': f'{metainfo["id"]}_{idx}',
 773                     'title': metainfo['title'],
 774                     'http_headers': metainfo['http_headers'],
 775                     'formats': [{
 776                         **fragment,
 777                         'format_id': formats[0].get('format_id'),
 778                     }],
 779                     'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
 780                     '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
 781                 } for idx, fragment in enumerate(formats[0]['fragments'])],
 782                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 783             }
 784
 785         return {
 786             **metainfo,
 787             'formats': formats,
 788             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 789             'chapters': self._get_chapters(aid, cid),
 790             'subtitles': self.extract_subtitles(video_id, cid),
 791             '__post_extractor': self.extract_comments(aid),
 792         }
 793
 794
 795 class BiliBiliBangumiIE(BilibiliBaseIE):
 796     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 797
 798     _TESTS = [{
 799         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 800         'info_dict': {
 801             'id': '21495',
 802             'ext': 'mp4',
 803             'series': '悠久之翼',
 804             'series_id': '774',
 805             'season': '第二季',
 806             'season_id': '1182',
 807             'season_number': 2,
 808             'episode': 'forever／ef',
 809             'episode_id': '21495',
 810             'episode_number': 12,
 811             'title': '12 forever／ef',
 812             'duration': 1420.791,
 813             'timestamp': 1320412200,
 814             'upload_date': '20111104',
 815             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 816         },
 817     }, {
 818         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 819         'info_dict': {
 820             'id': '267851',
 821             'ext': 'mp4',
 822             'series': '鬼灭之刃',
 823             'series_id': '4358',
 824             'season': '立志篇',
 825             'season_id': '26801',
 826             'season_number': 1,
 827             'episode': '残酷',
 828             'episode_id': '267851',
 829             'episode_number': 1,
 830             'title': '1 残酷',
 831             'duration': 1425.256,
 832             'timestamp': 1554566400,
 833             'upload_date': '20190406',
 834             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 835         },
 836         'skip': 'Geo-restricted',
 837     }, {
 838         'note': 'a making-of which falls outside main section',
 839         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 840         'info_dict': {
 841             'id': '345120',
 842             'ext': 'mp4',
 843             'series': '鬼灭之刃',
 844             'series_id': '4358',
 845             'season': '立志篇',
 846             'season_id': '26801',
 847             'season_number': 1,
 848             'episode': '炭治郎篇',
 849             'episode_id': '345120',
 850             'episode_number': 27,
 851             'title': '#1 炭治郎篇',
 852             'duration': 1922.129,
 853             'timestamp': 1602853860,
 854             'upload_date': '20201016',
 855             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 856         },
 857     }]
 858
 859     def _real_extract(self, url):
 860         episode_id = self._match_id(url)
 861         headers = self.geo_verification_headers()
 862         webpage = self._download_webpage(url, episode_id, headers=headers)
 863
 864         if '您所在的地区无法观看本片' in webpage:
 865             raise GeoRestrictedError('This video is restricted')
 866         elif '正在观看预览，大会员免费看全片' in webpage:
 867             self.raise_login_required('This video is for premium members only')
 868
 869         headers['Referer'] = url
 870
 871         play_info = (
 872             self._search_json(
 873                 r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
 874                 end_pattern='\n', default=None)
 875             or self._download_json(
 876                 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 877                 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
 878                 headers=headers))
 879
 880         premium_only = play_info.get('code') == -10403
 881         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 882
 883         formats = self.extract_formats(play_info)
 884         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 885             self.raise_login_required('This video is for premium members only')
 886
 887         bangumi_info = self._download_json(
 888             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 889             query={'ep_id': episode_id}, headers=headers)['result']
 890
 891         episode_number, episode_info = next((
 892             (idx, ep) for idx, ep in enumerate(traverse_obj(
 893                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 894             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 895
 896         season_id = bangumi_info.get('season_id')
 897         season_number, season_title = season_id and next((
 898             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 899                 traverse_obj(bangumi_info, ('seasons', ...)))
 900             if e.get('season_id') == season_id
 901         ), (None, None))
 902
 903         aid = episode_info.get('aid')
 904
 905         return {
 906             'id': episode_id,
 907             'formats': formats,
 908             **traverse_obj(bangumi_info, {
 909                 'series': ('series', 'series_title', {str}),
 910                 'series_id': ('series', 'series_id', {str_or_none}),
 911                 'thumbnail': ('square_cover', {url_or_none}),
 912             }),
 913             **traverse_obj(episode_info, {
 914                 'episode': ('long_title', {str}),
 915                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 916                 'timestamp': ('pub_time', {int_or_none}),
 917                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 918             }),
 919             'episode_id': episode_id,
 920             'season': str_or_none(season_title),
 921             'season_id': str_or_none(season_id),
 922             'season_number': season_number,
 923             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 924             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 925             '__post_extractor': self.extract_comments(aid),
 926             'http_headers': {'Referer': url},
 927         }
 928
 929
 930 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 931     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 932     _TESTS = [{
 933         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 934         'info_dict': {
 935             'id': '24097891',
 936             'title': 'CAROLE & TUESDAY',
 937             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 938         },
 939         'playlist_mincount': 25,
 940     }, {
 941         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 942         'info_dict': {
 943             'id': '1565',
 944             'title': '攻壳机动队 S.A.C. 2nd GIG',
 945             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 946         },
 947         'playlist_count': 26,
 948         'playlist': [{
 949             'info_dict': {
 950                 'id': '68540',
 951                 'ext': 'mp4',
 952                 'series': '攻壳机动队',
 953                 'series_id': '1077',
 954                 'season': '第二季',
 955                 'season_id': '1565',
 956                 'season_number': 2,
 957                 'episode': '再启动 REEMBODY',
 958                 'episode_id': '68540',
 959                 'episode_number': 1,
 960                 'title': '1 再启动 REEMBODY',
 961                 'duration': 1525.777,
 962                 'timestamp': 1425074413,
 963                 'upload_date': '20150227',
 964                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 965             },
 966         }],
 967     }]
 968
 969     def _real_extract(self, url):
 970         media_id = self._match_id(url)
 971         webpage = self._download_webpage(url, media_id)
 972
 973         initial_state = self._search_json(
 974             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 975         ss_id = initial_state['mediaInfo']['season_id']
 976
 977         return self.playlist_result(
 978             self._get_episodes_from_season(ss_id, url), media_id,
 979             **traverse_obj(initial_state, ('mediaInfo', {
 980                 'title': ('title', {str}),
 981                 'description': ('evaluate', {str}),
 982             })))
 983
 984
 985 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 986     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 987     _TESTS = [{
 988         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 989         'info_dict': {
 990             'id': '26801',
 991             'title': '鬼灭之刃',
 992             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 993         },
 994         'playlist_mincount': 26,
 995     }, {
 996         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 997         'info_dict': {
 998             'id': '2251',
 999             'title': '玲音',
1000             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
1001         },
1002         'playlist_count': 13,
1003         'playlist': [{
1004             'info_dict': {
1005                 'id': '50188',
1006                 'ext': 'mp4',
1007                 'series': '玲音',
1008                 'series_id': '1526',
1009                 'season': 'TV',
1010                 'season_id': '2251',
1011                 'season_number': 1,
1012                 'episode': 'WEIRD',
1013                 'episode_id': '50188',
1014                 'episode_number': 1,
1015                 'title': '1 WEIRD',
1016                 'duration': 1436.992,
1017                 'timestamp': 1343185080,
1018                 'upload_date': '20120725',
1019                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1020             },
1021         }],
1022     }]
1023
1024     def _real_extract(self, url):
1025         ss_id = self._match_id(url)
1026         webpage = self._download_webpage(url, ss_id)
1027         metainfo = traverse_obj(
1028             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1029             ('itemListElement', ..., {
1030                 'title': ('name', {str}),
1031                 'description': ('description', {str}),
1032             }), get_all=False)
1033
1034         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1035
1036
1037 class BilibiliCheeseBaseIE(BilibiliBaseIE):
1038     def _extract_episode(self, season_info, ep_id):
1039         episode_info = traverse_obj(season_info, (
1040             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1041         aid, cid = episode_info['aid'], episode_info['cid']
1042
1043         if traverse_obj(episode_info, 'ep_status') == -1:
1044             raise ExtractorError('This course episode is not yet available.', expected=True)
1045         if not traverse_obj(episode_info, 'playable'):
1046             self.raise_login_required('You need to purchase the course to download this episode')
1047
1048         play_info = self._download_json(
1049             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1050             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1051             headers=self._HEADERS, note='Downloading playinfo')['data']
1052
1053         return {
1054             'id': str_or_none(ep_id),
1055             'episode_id': str_or_none(ep_id),
1056             'formats': self.extract_formats(play_info),
1057             'extractor_key': BilibiliCheeseIE.ie_key(),
1058             'extractor': BilibiliCheeseIE.IE_NAME,
1059             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1060             **traverse_obj(episode_info, {
1061                 'episode': ('title', {str}),
1062                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1063                 'alt_title': ('subtitle', {str}),
1064                 'duration': ('duration', {int_or_none}),
1065                 'episode_number': ('index', {int_or_none}),
1066                 'thumbnail': ('cover', {url_or_none}),
1067                 'timestamp': ('release_date', {int_or_none}),
1068                 'view_count': ('play', {int_or_none}),
1069             }),
1070             **traverse_obj(season_info, {
1071                 'uploader': ('up_info', 'uname', {str}),
1072                 'uploader_id': ('up_info', 'mid', {str_or_none}),
1073             }),
1074             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1075             '__post_extractor': self.extract_comments(aid),
1076             'http_headers': self._HEADERS,
1077         }
1078
1079     def _download_season_info(self, query_key, video_id):
1080         return self._download_json(
1081             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1082             headers=self._HEADERS, note='Downloading season info')['data']
1083
1084
1085 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1086     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1087     _TESTS = [{
1088         'url': 'https://www.bilibili.com/cheese/play/ep229832',
1089         'info_dict': {
1090             'id': '229832',
1091             'ext': 'mp4',
1092             'title': '1 - 课程先导片',
1093             'alt_title': '视频课 · 3分41秒',
1094             'uploader': '马督工',
1095             'uploader_id': '316568752',
1096             'episode': '课程先导片',
1097             'episode_id': '229832',
1098             'episode_number': 1,
1099             'duration': 221,
1100             'timestamp': 1695549606,
1101             'upload_date': '20230924',
1102             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1103             'view_count': int,
1104         },
1105     }]
1106
1107     def _real_extract(self, url):
1108         ep_id = self._match_id(url)
1109         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1110
1111
1112 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1113     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1114     _TESTS = [{
1115         'url': 'https://www.bilibili.com/cheese/play/ss5918',
1116         'info_dict': {
1117             'id': '5918',
1118             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
1119             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
1120         },
1121         'playlist': [{
1122             'info_dict': {
1123                 'id': '229832',
1124                 'ext': 'mp4',
1125                 'title': '1 - 课程先导片',
1126                 'alt_title': '视频课 · 3分41秒',
1127                 'uploader': '马督工',
1128                 'uploader_id': '316568752',
1129                 'episode': '课程先导片',
1130                 'episode_id': '229832',
1131                 'episode_number': 1,
1132                 'duration': 221,
1133                 'timestamp': 1695549606,
1134                 'upload_date': '20230924',
1135                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1136                 'view_count': int,
1137             },
1138         }],
1139         'params': {'playlist_items': '1'},
1140     }, {
1141         'url': 'https://www.bilibili.com/cheese/play/ss5918',
1142         'info_dict': {
1143             'id': '5918',
1144             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
1145             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
1146         },
1147         'playlist_mincount': 5,
1148         'skip': 'paid video in list',
1149     }]
1150
1151     def _get_cheese_entries(self, season_info):
1152         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1153             yield self._extract_episode(season_info, ep_id)
1154
1155     def _real_extract(self, url):
1156         season_id = self._match_id(url)
1157         season_info = self._download_season_info('season_id', season_id)
1158
1159         return self.playlist_result(
1160             self._get_cheese_entries(season_info), season_id,
1161             **traverse_obj(season_info, {
1162                 'title': ('title', {str}),
1163                 'description': ('subtitle', {str}),
1164             }))
1165
1166
1167 class BilibiliSpaceBaseIE(BilibiliBaseIE):
1168     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1169         first_page = fetch_page(0)
1170         metadata = get_metadata(first_page)
1171
1172         paged_list = InAdvancePagedList(
1173             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1174             metadata['page_count'], metadata['page_size'])
1175
1176         return metadata, paged_list
1177
1178
1179 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1180     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1181     _TESTS = [{
1182         'url': 'https://space.bilibili.com/3985676/video',
1183         'info_dict': {
1184             'id': '3985676',
1185         },
1186         'playlist_mincount': 178,
1187         'skip': 'login required',
1188     }, {
1189         'url': 'https://space.bilibili.com/313580179/video',
1190         'info_dict': {
1191             'id': '313580179',
1192         },
1193         'playlist_mincount': 92,
1194         'skip': 'login required',
1195     }]
1196
1197     def _real_extract(self, url):
1198         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1199         if not is_video_url:
1200             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1201                            'To download audios, add a "/audio" to the URL')
1202
1203         def fetch_page(page_idx):
1204             query = {
1205                 'keyword': '',
1206                 'mid': playlist_id,
1207                 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1208                 'order_avoided': 'true',
1209                 'platform': 'web',
1210                 'pn': page_idx + 1,
1211                 'ps': 30,
1212                 'tid': 0,
1213                 'web_location': 1550101,
1214             }
1215
1216             try:
1217                 response = self._download_json(
1218                     'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1219                     query=self._sign_wbi(query, playlist_id),
1220                     note=f'Downloading space page {page_idx}', headers={'Referer': url})
1221             except ExtractorError as e:
1222                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1223                     raise ExtractorError(
1224                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1225                 raise
1226             status_code = response['code']
1227             if status_code == -401:
1228                 raise ExtractorError(
1229                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1230             elif status_code == -352 and not self.is_logged_in:
1231                 self.raise_login_required('Request is rejected, you need to login to access playlist')
1232             elif status_code != 0:
1233                 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1234             return response['data']
1235
1236         def get_metadata(page_data):
1237             page_size = page_data['page']['ps']
1238             entry_count = page_data['page']['count']
1239             return {
1240                 'page_count': math.ceil(entry_count / page_size),
1241                 'page_size': page_size,
1242             }
1243
1244         def get_entries(page_data):
1245             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1246                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1247
1248         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1249         return self.playlist_result(paged_list, playlist_id)
1250
1251
1252 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1253     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1254     _TESTS = [{
1255         'url': 'https://space.bilibili.com/313580179/audio',
1256         'info_dict': {
1257             'id': '313580179',
1258         },
1259         'playlist_mincount': 1,
1260     }]
1261
1262     def _real_extract(self, url):
1263         playlist_id = self._match_id(url)
1264
1265         def fetch_page(page_idx):
1266             return self._download_json(
1267                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1268                 note=f'Downloading page {page_idx}',
1269                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1270
1271         def get_metadata(page_data):
1272             return {
1273                 'page_count': page_data['pageCount'],
1274                 'page_size': page_data['pageSize'],
1275             }
1276
1277         def get_entries(page_data):
1278             for entry in page_data.get('data', []):
1279                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1280
1281         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1282         return self.playlist_result(paged_list, playlist_id)
1283
1284
1285 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1286     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1287         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1288             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1289
1290     def _get_uploader(self, uid, playlist_id):
1291         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1292         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1293
1294     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1295         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1296         metadata.pop('page_count', None)
1297         metadata.pop('page_size', None)
1298         return metadata, page_list
1299
1300
1301 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1302     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1303     _TESTS = [{
1304         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1305         'info_dict': {
1306             'id': '2142762_57445',
1307             'title': '【完结】《底特律 变人》全结局流程解说',
1308             'description': '',
1309             'uploader': '老戴在此',
1310             'uploader_id': '2142762',
1311             'timestamp': int,
1312             'upload_date': str,
1313             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1314         },
1315         'playlist_mincount': 31,
1316     }]
1317
1318     def _real_extract(self, url):
1319         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1320         playlist_id = f'{mid}_{sid}'
1321
1322         def fetch_page(page_idx):
1323             return self._download_json(
1324                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1325                 playlist_id, note=f'Downloading page {page_idx}',
1326                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1327
1328         def get_metadata(page_data):
1329             page_size = page_data['page']['page_size']
1330             entry_count = page_data['page']['total']
1331             return {
1332                 'page_count': math.ceil(entry_count / page_size),
1333                 'page_size': page_size,
1334                 'uploader': self._get_uploader(mid, playlist_id),
1335                 **traverse_obj(page_data, {
1336                     'title': ('meta', 'name', {str}),
1337                     'description': ('meta', 'description', {str}),
1338                     'uploader_id': ('meta', 'mid', {str_or_none}),
1339                     'timestamp': ('meta', 'ptime', {int_or_none}),
1340                     'thumbnail': ('meta', 'cover', {url_or_none}),
1341                 }),
1342             }
1343
1344         def get_entries(page_data):
1345             return self._get_entries(page_data, 'archives')
1346
1347         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1348         return self.playlist_result(paged_list, playlist_id, **metadata)
1349
1350
1351 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1352     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1353     _TESTS = [{
1354         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1355         'info_dict': {
1356             'id': '1958703906_547718',
1357             'title': '直播回放',
1358             'description': '直播回放',
1359             'uploader': '靡烟miya',
1360             'uploader_id': '1958703906',
1361             'timestamp': 1637985853,
1362             'upload_date': '20211127',
1363             'modified_timestamp': int,
1364             'modified_date': str,
1365         },
1366         'playlist_mincount': 513,
1367     }]
1368
1369     def _real_extract(self, url):
1370         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1371         playlist_id = f'{mid}_{sid}'
1372         playlist_meta = traverse_obj(self._download_json(
1373             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1374         ), {
1375             'title': ('data', 'meta', 'name', {str}),
1376             'description': ('data', 'meta', 'description', {str}),
1377             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1378             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1379             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1380         })
1381
1382         def fetch_page(page_idx):
1383             return self._download_json(
1384                 'https://api.bilibili.com/x/series/archives',
1385                 playlist_id, note=f'Downloading page {page_idx}',
1386                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1387
1388         def get_metadata(page_data):
1389             page_size = page_data['page']['size']
1390             entry_count = page_data['page']['total']
1391             return {
1392                 'page_count': math.ceil(entry_count / page_size),
1393                 'page_size': page_size,
1394                 'uploader': self._get_uploader(mid, playlist_id),
1395                 **playlist_meta,
1396             }
1397
1398         def get_entries(page_data):
1399             return self._get_entries(page_data, 'archives')
1400
1401         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1402         return self.playlist_result(paged_list, playlist_id, **metadata)
1403
1404
1405 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1406     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1407     _TESTS = [{
1408         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1409         'info_dict': {
1410             'id': '1103407912',
1411             'title': '【V2】（旧）',
1412             'description': '',
1413             'uploader': '晓月春日',
1414             'uploader_id': '84912',
1415             'timestamp': 1604905176,
1416             'upload_date': '20201109',
1417             'modified_timestamp': int,
1418             'modified_date': str,
1419             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1420             'view_count': int,
1421             'like_count': int,
1422         },
1423         'playlist_mincount': 22,
1424     }, {
1425         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1426         'only_matching': True,
1427     }]
1428
1429     def _real_extract(self, url):
1430         fid = self._match_id(url)
1431
1432         list_info = self._download_json(
1433             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1434             fid, note='Downloading favlist metadata')
1435         if list_info['code'] == -403:
1436             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1437
1438         entries = self._get_entries(self._download_json(
1439             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1440             fid, note='Download favlist entries'), 'data')
1441
1442         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1443             'title': ('title', {str}),
1444             'description': ('intro', {str}),
1445             'uploader': ('upper', 'name', {str}),
1446             'uploader_id': ('upper', 'mid', {str_or_none}),
1447             'timestamp': ('ctime', {int_or_none}),
1448             'modified_timestamp': ('mtime', {int_or_none}),
1449             'thumbnail': ('cover', {url_or_none}),
1450             'view_count': ('cnt_info', 'play', {int_or_none}),
1451             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1452         })))
1453
1454
1455 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1456     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1457     _TESTS = [{
1458         'url': 'https://www.bilibili.com/watchlater/#/list',
1459         'info_dict': {
1460             'id': r're:\d+',
1461             'title': '稍后再看',
1462         },
1463         'playlist_mincount': 0,
1464         'skip': 'login required',
1465     }]
1466
1467     def _real_extract(self, url):
1468         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1469         watchlater_info = self._download_json(
1470             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1471         if watchlater_info['code'] == -101:
1472             self.raise_login_required(msg='You need to login to access your watchlater list')
1473         entries = self._get_entries(watchlater_info, ('data', 'list'))
1474         return self.playlist_result(entries, id=list_id, title='稍后再看')
1475
1476
1477 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1478     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1479     _TESTS = [{
1480         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1481         'info_dict': {
1482             'id': '5_547718',
1483             'title': '直播回放',
1484             'uploader': '靡烟miya',
1485             'uploader_id': '1958703906',
1486             'timestamp': 1637985853,
1487             'upload_date': '20211127',
1488         },
1489         'playlist_mincount': 513,
1490     }, {
1491         'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1492         'info_dict': {
1493             'id': 'BV1DU4y1r7tz',
1494             'ext': 'mp4',
1495             'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1496             'upload_date': '20220820',
1497             'description': '',
1498             'timestamp': 1661016330,
1499             'uploader_id': '1958703906',
1500             'uploader': '靡烟miya',
1501             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1502             'duration': 9552.903,
1503             'tags': list,
1504             'comment_count': int,
1505             'view_count': int,
1506             'like_count': int,
1507             '_old_archive_ids': ['bilibili 687146339_part1'],
1508         },
1509         'params': {'noplaylist': True},
1510     }, {
1511         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1512         'info_dict': {
1513             'id': '5_547718',
1514         },
1515         'playlist_mincount': 513,
1516         'skip': 'redirect url',
1517     }, {
1518         'url': 'https://www.bilibili.com/list/ml1103407912',
1519         'info_dict': {
1520             'id': '3_1103407912',
1521             'title': '【V2】（旧）',
1522             'uploader': '晓月春日',
1523             'uploader_id': '84912',
1524             'timestamp': 1604905176,
1525             'upload_date': '20201109',
1526             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1527         },
1528         'playlist_mincount': 22,
1529     }, {
1530         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1531         'info_dict': {
1532             'id': '3_1103407912',
1533         },
1534         'playlist_mincount': 22,
1535         'skip': 'redirect url',
1536     }, {
1537         'url': 'https://www.bilibili.com/list/watchlater',
1538         'info_dict': {
1539             'id': r're:2_\d+',
1540             'title': '稍后再看',
1541             'uploader': str,
1542             'uploader_id': str,
1543         },
1544         'playlist_mincount': 0,
1545         'skip': 'login required',
1546     }, {
1547         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1548         'info_dict': {'id': 'watchlater'},
1549         'playlist_mincount': 0,
1550         'skip': 'redirect url & login required',
1551     }]
1552
1553     def _extract_medialist(self, query, list_id):
1554         for page_num in itertools.count(1):
1555             page_data = self._download_json(
1556                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1557                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1558             )['data']
1559             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1560             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1561             if not page_data.get('has_more', False):
1562                 break
1563
1564     def _real_extract(self, url):
1565         list_id = self._match_id(url)
1566
1567         bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1568         if not self._yes_playlist(list_id, bvid):
1569             return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1570
1571         webpage = self._download_webpage(url, list_id)
1572         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1573         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1574             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1575             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1576             if error_code == -400 and list_id == 'watchlater':
1577                 self.raise_login_required('You need to login to access your watchlater playlist')
1578             elif error_code == -403:
1579                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1580             elif error_code == 11010:
1581                 raise ExtractorError('Playlist is no longer available', expected=True)
1582             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1583
1584         query = {
1585             'ps': 20,
1586             'with_current': False,
1587             **traverse_obj(initial_state, {
1588                 'type': ('playlist', 'type', {int_or_none}),
1589                 'biz_id': ('playlist', 'id', {int_or_none}),
1590                 'tid': ('tid', {int_or_none}),
1591                 'sort_field': ('sortFiled', {int_or_none}),
1592                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1593             }),
1594         }
1595         metadata = {
1596             'id': f'{query["type"]}_{query["biz_id"]}',
1597             **traverse_obj(initial_state, ('mediaListInfo', {
1598                 'title': ('title', {str}),
1599                 'uploader': ('upper', 'name', {str}),
1600                 'uploader_id': ('upper', 'mid', {str_or_none}),
1601                 'timestamp': ('ctime', {int_or_none}, filter),
1602                 'thumbnail': ('cover', {url_or_none}),
1603             })),
1604         }
1605         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1606
1607
1608 class BilibiliCategoryIE(InfoExtractor):
1609     IE_NAME = 'Bilibili category extractor'
1610     _MAX_RESULTS = 1000000
1611     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1612     _TESTS = [{
1613         'url': 'https://www.bilibili.com/v/kichiku/mad',
1614         'info_dict': {
1615             'id': 'kichiku: mad',
1616             'title': 'kichiku: mad',
1617         },
1618         'playlist_mincount': 45,
1619         'params': {
1620             'playlistend': 45,
1621         },
1622     }]
1623
1624     def _fetch_page(self, api_url, num_pages, query, page_num):
1625         parsed_json = self._download_json(
1626             api_url, query, query={'Search_key': query, 'pn': page_num},
1627             note=f'Extracting results from page {page_num} of {num_pages}')
1628
1629         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1630         if not video_list:
1631             raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1632
1633         for video in video_list:
1634             yield self.url_result(
1635                 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1636
1637     def _entries(self, category, subcategory, query):
1638         # map of categories : subcategories : RIDs
1639         rid_map = {
1640             'kichiku': {
1641                 'mad': 26,
1642                 'manual_vocaloid': 126,
1643                 'guide': 22,
1644                 'theatre': 216,
1645                 'course': 127,
1646             },
1647         }
1648
1649         if category not in rid_map:
1650             raise ExtractorError(
1651                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1652         if subcategory not in rid_map[category]:
1653             raise ExtractorError(
1654                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1655         rid_value = rid_map[category][subcategory]
1656
1657         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1658         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1659         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1660         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1661         if count is None or not size:
1662             raise ExtractorError('Failed to calculate either page count or size')
1663
1664         num_pages = math.ceil(count / size)
1665
1666         return OnDemandPagedList(functools.partial(
1667             self._fetch_page, api_url, num_pages, query), size)
1668
1669     def _real_extract(self, url):
1670         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1671         query = f'{category}: {subcategory}'
1672
1673         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1674
1675
1676 class BiliBiliSearchIE(SearchInfoExtractor):
1677     IE_DESC = 'Bilibili video search'
1678     _MAX_RESULTS = 100000
1679     _SEARCH_KEY = 'bilisearch'
1680     _TESTS = [{
1681         'url': 'bilisearch3:靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1682         'playlist_count': 3,
1683         'info_dict': {
1684             'id': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1685             'title': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1686         },
1687         'playlist': [{
1688             'info_dict': {
1689                 'id': 'BV1n44y1Q7sc',
1690                 'ext': 'mp4',
1691                 'title': '“出道一年，我怎么还在等你单推的女人睡觉后开播啊？”【一分钟了解靡烟miya】',
1692                 'timestamp': 1669889987,
1693                 'upload_date': '20221201',
1694                 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1695                 'tags': list,
1696                 'uploader': '靡烟miya',
1697                 'duration': 123.156,
1698                 'uploader_id': '1958703906',
1699                 'comment_count': int,
1700                 'view_count': int,
1701                 'like_count': int,
1702                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1703                 '_old_archive_ids': ['bilibili 988222410_part1'],
1704             },
1705         }],
1706     }]
1707
1708     def _search_results(self, query):
1709         if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1710             self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1711         for page_num in itertools.count(1):
1712             videos = self._download_json(
1713                 'https://api.bilibili.com/x/web-interface/search/type', query,
1714                 note=f'Extracting results from page {page_num}', query={
1715                     'Search_key': query,
1716                     'keyword': query,
1717                     'page': page_num,
1718                     'context': '',
1719                     'duration': 0,
1720                     'tids_2': '',
1721                     '__refresh__': 'true',
1722                     'search_type': 'video',
1723                     'tids': 0,
1724                     'highlight': 1,
1725                 })['data'].get('result')
1726             if not videos:
1727                 break
1728             for video in videos:
1729                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1730
1731
1732 class BilibiliAudioBaseIE(InfoExtractor):
1733     def _call_api(self, path, sid, query=None):
1734         if not query:
1735             query = {'sid': sid}
1736         return self._download_json(
1737             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1738             sid, query=query)['data']
1739
1740
1741 class BilibiliAudioIE(BilibiliAudioBaseIE):
1742     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1743     _TEST = {
1744         'url': 'https://www.bilibili.com/audio/au1003142',
1745         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1746         'info_dict': {
1747             'id': '1003142',
1748             'ext': 'm4a',
1749             'title': '【tsukimi】YELLOW / 神山羊',
1750             'artist': 'tsukimi',
1751             'comment_count': int,
1752             'description': 'YELLOW的mp3版！',
1753             'duration': 183,
1754             'subtitles': {
1755                 'origin': [{
1756                     'ext': 'lrc',
1757                 }],
1758             },
1759             'thumbnail': r're:^https?://.+\.jpg',
1760             'timestamp': 1564836614,
1761             'upload_date': '20190803',
1762             'uploader': 'tsukimi-つきみぐー',
1763             'view_count': int,
1764         },
1765     }
1766
1767     def _real_extract(self, url):
1768         au_id = self._match_id(url)
1769
1770         play_data = self._call_api('url', au_id)
1771         formats = [{
1772             'url': play_data['cdns'][0],
1773             'filesize': int_or_none(play_data.get('size')),
1774             'vcodec': 'none',
1775         }]
1776
1777         for a_format in formats:
1778             a_format.setdefault('http_headers', {}).update({
1779                 'Referer': url,
1780             })
1781
1782         song = self._call_api('song/info', au_id)
1783         title = song['title']
1784         statistic = song.get('statistic') or {}
1785
1786         subtitles = None
1787         lyric = song.get('lyric')
1788         if lyric:
1789             subtitles = {
1790                 'origin': [{
1791                     'url': lyric,
1792                 }],
1793             }
1794
1795         return {
1796             'id': au_id,
1797             'title': title,
1798             'formats': formats,
1799             'artist': song.get('author'),
1800             'comment_count': int_or_none(statistic.get('comment')),
1801             'description': song.get('intro'),
1802             'duration': int_or_none(song.get('duration')),
1803             'subtitles': subtitles,
1804             'thumbnail': song.get('cover'),
1805             'timestamp': int_or_none(song.get('passtime')),
1806             'uploader': song.get('uname'),
1807             'view_count': int_or_none(statistic.get('play')),
1808         }
1809
1810
1811 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1812     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1813     _TEST = {
1814         'url': 'https://www.bilibili.com/audio/am10624',
1815         'info_dict': {
1816             'id': '10624',
1817             'title': '每日新曲推荐（每日11:00更新）',
1818             'description': '每天11:00更新，为你推送最新音乐',
1819         },
1820         'playlist_count': 19,
1821     }
1822
1823     def _real_extract(self, url):
1824         am_id = self._match_id(url)
1825
1826         songs = self._call_api(
1827             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1828
1829         entries = []
1830         for song in songs:
1831             sid = str_or_none(song.get('id'))
1832             if not sid:
1833                 continue
1834             entries.append(self.url_result(
1835                 'https://www.bilibili.com/audio/au' + sid,
1836                 BilibiliAudioIE.ie_key(), sid))
1837
1838         if entries:
1839             album_data = self._call_api('menu/info', am_id) or {}
1840             album_title = album_data.get('title')
1841             if album_title:
1842                 for entry in entries:
1843                     entry['album'] = album_title
1844                 return self.playlist_result(
1845                     entries, am_id, album_title, album_data.get('intro'))
1846
1847         return self.playlist_result(entries, am_id)
1848
1849
1850 class BiliBiliPlayerIE(InfoExtractor):
1851     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1852     _TEST = {
1853         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1854         'only_matching': True,
1855     }
1856
1857     def _real_extract(self, url):
1858         video_id = self._match_id(url)
1859         return self.url_result(
1860             f'http://www.bilibili.tv/video/av{video_id}/',
1861             ie=BiliBiliIE.ie_key(), video_id=video_id)
1862
1863
1864 class BiliIntlBaseIE(InfoExtractor):
1865     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1866     _NETRC_MACHINE = 'biliintl'
1867     _HEADERS = {'Referer': 'https://www.bilibili.tv/'}
1868
1869     def _call_api(self, endpoint, *args, **kwargs):
1870         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1871         if json.get('code'):
1872             if json['code'] in (10004004, 10004005, 10023006):
1873                 self.raise_login_required()
1874             elif json['code'] == 10004001:
1875                 self.raise_geo_restricted()
1876             else:
1877                 if json.get('message') and str(json['code']) != json['message']:
1878                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1879                 else:
1880                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1881                 if kwargs.get('fatal'):
1882                     raise ExtractorError(errmsg)
1883                 else:
1884                     self.report_warning(errmsg)
1885         return json.get('data')
1886
1887     def json2srt(self, json):
1888         return '\n\n'.join(
1889             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1890             for i, line in enumerate(traverse_obj(json, (
1891                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1892
1893     def _get_subtitles(self, *, ep_id=None, aid=None):
1894         sub_json = self._call_api(
1895             '/web/v2/subtitle', ep_id or aid, fatal=False,
1896             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1897             query=filter_dict({
1898                 'platform': 'web',
1899                 's_locale': 'en_US',
1900                 'episode_id': ep_id,
1901                 'aid': aid,
1902             })) or {}
1903         subtitles = {}
1904         fetched_urls = set()
1905         for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1906             for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1907                 if url in fetched_urls:
1908                     continue
1909                 fetched_urls.add(url)
1910                 sub_ext = determine_ext(url)
1911                 sub_lang = sub.get('lang_key') or 'en'
1912
1913                 if sub_ext == 'ass':
1914                     subtitles.setdefault(sub_lang, []).append({
1915                         'ext': 'ass',
1916                         'url': url,
1917                     })
1918                 elif sub_ext == 'json':
1919                     sub_data = self._download_json(
1920                         url, ep_id or aid, fatal=False,
1921                         note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1922                         errnote='Unable to download subtitles')
1923
1924                     if sub_data:
1925                         subtitles.setdefault(sub_lang, []).append({
1926                             'ext': 'srt',
1927                             'data': self.json2srt(sub_data),
1928                         })
1929                 else:
1930                     self.report_warning('Unexpected subtitle extension', ep_id or aid)
1931
1932         return subtitles
1933
1934     def _get_formats(self, *, ep_id=None, aid=None):
1935         video_json = self._call_api(
1936             '/web/playurl', ep_id or aid, note='Downloading video formats',
1937             errnote='Unable to download video formats', query=filter_dict({
1938                 'platform': 'web',
1939                 'ep_id': ep_id,
1940                 'aid': aid,
1941             }))
1942         video_json = video_json['playurl']
1943         formats = []
1944         for vid in video_json.get('video') or []:
1945             video_res = vid.get('video_resource') or {}
1946             video_info = vid.get('stream_info') or {}
1947             if not video_res.get('url'):
1948                 continue
1949             formats.append({
1950                 'url': video_res['url'],
1951                 'ext': 'mp4',
1952                 'format_note': video_info.get('desc_words'),
1953                 'width': video_res.get('width'),
1954                 'height': video_res.get('height'),
1955                 'vbr': video_res.get('bandwidth'),
1956                 'acodec': 'none',
1957                 'vcodec': video_res.get('codecs'),
1958                 'filesize': video_res.get('size'),
1959             })
1960         for aud in video_json.get('audio_resource') or []:
1961             if not aud.get('url'):
1962                 continue
1963             formats.append({
1964                 'url': aud['url'],
1965                 'ext': 'mp4',
1966                 'abr': aud.get('bandwidth'),
1967                 'acodec': aud.get('codecs'),
1968                 'vcodec': 'none',
1969                 'filesize': aud.get('size'),
1970             })
1971
1972         return formats
1973
1974     def _parse_video_metadata(self, video_data):
1975         return {
1976             'title': video_data.get('title_display') or video_data.get('title'),
1977             'description': video_data.get('desc'),
1978             'thumbnail': video_data.get('cover'),
1979             'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1980             'episode_number': int_or_none(self._search_regex(
1981                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1982         }
1983
1984     def _perform_login(self, username, password):
1985         if not Cryptodome.RSA:
1986             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1987
1988         key_data = self._download_json(
1989             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1990             note='Downloading login key', errnote='Unable to download login key')['data']
1991
1992         public_key = Cryptodome.RSA.importKey(key_data['key'])
1993         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1994         login_post = self._download_json(
1995             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1996             data=urlencode_postdata({
1997                 'username': username,
1998                 'password': base64.b64encode(password_hash).decode('ascii'),
1999                 'keep_me': 'true',
2000                 's_locale': 'en_US',
2001                 'isTrusted': 'true',
2002             }), note='Logging in', errnote='Unable to log in')
2003         if login_post.get('code'):
2004             if login_post.get('message'):
2005                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
2006             else:
2007                 raise ExtractorError('Unable to log in')
2008
2009
2010 class BiliIntlIE(BiliIntlBaseIE):
2011     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
2012     _TESTS = [{
2013         # Bstation page
2014         'url': 'https://www.bilibili.tv/en/play/34613/341736',
2015         'info_dict': {
2016             'id': '341736',
2017             'ext': 'mp4',
2018             'title': 'E2 - The First Night',
2019             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2020             'episode_number': 2,
2021             'upload_date': '20201009',
2022             'episode': 'Episode 2',
2023             'timestamp': 1602259500,
2024             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2025             'chapters': [{
2026                 'start_time': 0,
2027                 'end_time': 76.242,
2028                 'title': '<Untitled Chapter 1>',
2029             }, {
2030                 'start_time': 76.242,
2031                 'end_time': 161.161,
2032                 'title': 'Intro',
2033             }, {
2034                 'start_time': 1325.742,
2035                 'end_time': 1403.903,
2036                 'title': 'Outro',
2037             }],
2038         },
2039     }, {
2040         # Non-Bstation page
2041         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2042         'info_dict': {
2043             'id': '11005006',
2044             'ext': 'mp4',
2045             'title': 'E3 - Who?',
2046             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2047             'episode_number': 3,
2048             'description': 'md5:e1a775e71a35c43f141484715470ad09',
2049             'episode': 'Episode 3',
2050             'upload_date': '20211219',
2051             'timestamp': 1639928700,
2052             'chapters': [{
2053                 'start_time': 0,
2054                 'end_time': 88.0,
2055                 'title': '<Untitled Chapter 1>',
2056             }, {
2057                 'start_time': 88.0,
2058                 'end_time': 156.0,
2059                 'title': 'Intro',
2060             }, {
2061                 'start_time': 1173.0,
2062                 'end_time': 1259.535,
2063                 'title': 'Outro',
2064             }],
2065         },
2066     }, {
2067         # Subtitle with empty content
2068         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2069         'info_dict': {
2070             'id': '10131790',
2071             'ext': 'mp4',
2072             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2073             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2074             'episode_number': 140,
2075         },
2076         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2077     }, {
2078         # episode comment extraction
2079         'url': 'https://www.bilibili.tv/en/play/34580/340317',
2080         'info_dict': {
2081             'id': '340317',
2082             'ext': 'mp4',
2083             'timestamp': 1604057820,
2084             'upload_date': '20201030',
2085             'episode_number': 5,
2086             'title': 'E5 - My Own Steel',
2087             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2088             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2089             'episode': 'Episode 5',
2090             'comment_count': int,
2091             'chapters': [{
2092                 'start_time': 0,
2093                 'end_time': 61.0,
2094                 'title': '<Untitled Chapter 1>',
2095             }, {
2096                 'start_time': 61.0,
2097                 'end_time': 134.0,
2098                 'title': 'Intro',
2099             }, {
2100                 'start_time': 1290.0,
2101                 'end_time': 1379.0,
2102                 'title': 'Outro',
2103             }],
2104         },
2105         'params': {
2106             'getcomments': True,
2107         },
2108     }, {
2109         # user generated content comment extraction
2110         'url': 'https://www.bilibili.tv/en/video/2045730385',
2111         'info_dict': {
2112             'id': '2045730385',
2113             'ext': 'mp4',
2114             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2115             'timestamp': 1667891924,
2116             'upload_date': '20221108',
2117             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2118             'comment_count': int,
2119             'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2120         },
2121         'params': {
2122             'getcomments': True,
2123         },
2124     }, {
2125         # episode id without intro and outro
2126         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2127         'info_dict': {
2128             'id': '11246489',
2129             'ext': 'mp4',
2130             'title': 'E1 - Operation \'Strix\' <Owl>',
2131             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2132             'timestamp': 1649516400,
2133             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2134             'episode': 'Episode 1',
2135             'episode_number': 1,
2136             'upload_date': '20220409',
2137         },
2138     }, {
2139         'url': 'https://www.biliintl.com/en/play/34613/341736',
2140         'only_matching': True,
2141     }, {
2142         # User-generated content (as opposed to a series licensed from a studio)
2143         'url': 'https://bilibili.tv/en/video/2019955076',
2144         'only_matching': True,
2145     }, {
2146         # No language in URL
2147         'url': 'https://www.bilibili.tv/video/2019955076',
2148         'only_matching': True,
2149     }, {
2150         # Uppercase language in URL
2151         'url': 'https://www.bilibili.tv/EN/video/2019955076',
2152         'only_matching': True,
2153     }]
2154
2155     @staticmethod
2156     def _make_url(video_id, series_id=None):
2157         if series_id:
2158             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2159         return f'https://www.bilibili.tv/en/video/{video_id}'
2160
2161     def _extract_video_metadata(self, url, video_id, season_id):
2162         url, smuggled_data = unsmuggle_url(url, {})
2163         if smuggled_data.get('title'):
2164             return smuggled_data
2165
2166         webpage = self._download_webpage(url, video_id)
2167         # Bstation layout
2168         initial_data = (
2169             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2170             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2171         video_data = traverse_obj(
2172             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2173
2174         if season_id and not video_data:
2175             # Non-Bstation layout, read through episode list
2176             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2177             video_data = traverse_obj(season_json, (
2178                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2179             ), expected_type=dict, get_all=False)
2180
2181         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2182         return merge_dicts(
2183             self._parse_video_metadata(video_data), {
2184                 'title': get_element_by_class(
2185                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2186                 'description': get_element_by_class(
2187                     'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2188             }, self._search_json_ld(webpage, video_id, default={}))
2189
2190     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2191         comment_api_raw_data = self._download_json(
2192             'https://api.bilibili.tv/reply/web/detail', display_id,
2193             note=f'Downloading reply comment of {root_id} - {next_id}',
2194             query={
2195                 'platform': 'web',
2196                 'ps': 20,  # comment's reply per page (default: 3)
2197                 'root': root_id,
2198                 'next': next_id,
2199             })
2200
2201         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2202             yield {
2203                 'author': traverse_obj(replies, ('member', 'name')),
2204                 'author_id': traverse_obj(replies, ('member', 'mid')),
2205                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2206                 'text': traverse_obj(replies, ('content', 'message')),
2207                 'id': replies.get('rpid'),
2208                 'like_count': int_or_none(replies.get('like_count')),
2209                 'parent': replies.get('parent'),
2210                 'timestamp': unified_timestamp(replies.get('ctime_text')),
2211             }
2212
2213         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2214             yield from self._get_comments_reply(
2215                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2216
2217     def _get_comments(self, video_id, ep_id):
2218         for i in itertools.count(0):
2219             comment_api_raw_data = self._download_json(
2220                 'https://api.bilibili.tv/reply/web/root', video_id,
2221                 note=f'Downloading comment page {i + 1}',
2222                 query={
2223                     'platform': 'web',
2224                     'pn': i,  # page number
2225                     'ps': 20,  # comment per page (default: 20)
2226                     'oid': video_id,
2227                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
2228                     'sort_type': 1,  # 1: best, 2: recent
2229                 })
2230
2231             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2232                 yield {
2233                     'author': traverse_obj(replies, ('member', 'name')),
2234                     'author_id': traverse_obj(replies, ('member', 'mid')),
2235                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2236                     'text': traverse_obj(replies, ('content', 'message')),
2237                     'id': replies.get('rpid'),
2238                     'like_count': int_or_none(replies.get('like_count')),
2239                     'timestamp': unified_timestamp(replies.get('ctime_text')),
2240                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2241                 }
2242                 if replies.get('count'):
2243                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2244
2245             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2246                 break
2247
2248     def _real_extract(self, url):
2249         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2250         video_id = ep_id or aid
2251         chapters = None
2252
2253         if ep_id:
2254             intro_ending_json = self._call_api(
2255                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2256                 video_id, fatal=False) or {}
2257             if intro_ending_json.get('skip'):
2258                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2259                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2260                 chapters = [{
2261                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2262                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2263                     'title': 'Intro',
2264                 }, {
2265                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2266                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2267                     'title': 'Outro',
2268                 }]
2269
2270         return {
2271             'id': video_id,
2272             **self._extract_video_metadata(url, video_id, season_id),
2273             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2274             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2275             'chapters': chapters,
2276             '__post_extractor': self.extract_comments(video_id, ep_id),
2277             'http_headers': self._HEADERS,
2278         }
2279
2280
2281 class BiliIntlSeriesIE(BiliIntlBaseIE):
2282     IE_NAME = 'biliIntl:series'
2283     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2284     _TESTS = [{
2285         'url': 'https://www.bilibili.tv/en/play/34613',
2286         'playlist_mincount': 15,
2287         'info_dict': {
2288             'id': '34613',
2289             'title': 'TONIKAWA: Over the Moon For You',
2290             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2291             'categories': ['Slice of life', 'Comedy', 'Romance'],
2292             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2293             'view_count': int,
2294         },
2295         'params': {
2296             'skip_download': True,
2297         },
2298     }, {
2299         'url': 'https://www.bilibili.tv/en/media/1048837',
2300         'info_dict': {
2301             'id': '1048837',
2302             'title': 'SPY×FAMILY',
2303             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2304             'categories': ['Adventure', 'Action', 'Comedy'],
2305             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2306             'view_count': int,
2307         },
2308         'playlist_mincount': 25,
2309     }, {
2310         'url': 'https://www.biliintl.com/en/play/34613',
2311         'only_matching': True,
2312     }, {
2313         'url': 'https://www.biliintl.com/EN/play/34613',
2314         'only_matching': True,
2315     }]
2316
2317     def _entries(self, series_id):
2318         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2319         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2320             episode_id = str(episode['episode_id'])
2321             yield self.url_result(smuggle_url(
2322                 BiliIntlIE._make_url(episode_id, series_id),
2323                 self._parse_video_metadata(episode),
2324             ), BiliIntlIE, episode_id)
2325
2326     def _real_extract(self, url):
2327         series_id = self._match_id(url)
2328         series_info = self._call_api(
2329             f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2330         return self.playlist_result(
2331             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2332             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2333             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2334
2335
2336 class BiliLiveIE(InfoExtractor):
2337     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2338
2339     _TESTS = [{
2340         'url': 'https://live.bilibili.com/196',
2341         'info_dict': {
2342             'id': '33989',
2343             'description': '周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）',
2344             'ext': 'flv',
2345             'title': '太空狼人杀联动，不被爆杀就算赢',
2346             'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2347             'timestamp': 1650802769,
2348         },
2349         'skip': 'not live',
2350     }, {
2351         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2352         'only_matching': True,
2353     }, {
2354         'url': 'https://live.bilibili.com/blanc/196',
2355         'only_matching': True,
2356     }]
2357
2358     _FORMATS = {
2359         80: {'format_id': 'low', 'format_note': '流畅'},
2360         150: {'format_id': 'high_res', 'format_note': '高清'},
2361         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2362         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2363         10000: {'format_id': 'source', 'format_note': '原画'},
2364         20000: {'format_id': '4K', 'format_note': '4K'},
2365         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2366     }
2367
2368     _quality = staticmethod(qualities(list(_FORMATS)))
2369
2370     def _call_api(self, path, room_id, query):
2371         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2372         if api_result.get('code') != 0:
2373             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2374         return api_result.get('data') or {}
2375
2376     def _parse_formats(self, qn, fmt):
2377         for codec in fmt.get('codec') or []:
2378             if codec.get('current_qn') != qn:
2379                 continue
2380             for url_info in codec['url_info']:
2381                 yield {
2382                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2383                     'ext': fmt.get('format_name'),
2384                     'vcodec': codec.get('codec_name'),
2385                     'quality': self._quality(qn),
2386                     **self._FORMATS[qn],
2387                 }
2388
2389     def _real_extract(self, url):
2390         room_id = self._match_id(url)
2391         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2392         if room_data.get('live_status') == 0:
2393             raise ExtractorError('Streamer is not live', expected=True)
2394
2395         formats = []
2396         for qn in self._FORMATS:
2397             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2398                 'room_id': room_id,
2399                 'qn': qn,
2400                 'codec': '0,1',
2401                 'format': '0,2',
2402                 'mask': '0',
2403                 'no_playurl': '0',
2404                 'platform': 'web',
2405                 'protocol': '0,1',
2406             })
2407             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2408                 formats.extend(self._parse_formats(qn, fmt))
2409
2410         return {
2411             'id': room_id,
2412             'title': room_data.get('title'),
2413             'description': room_data.get('description'),
2414             'thumbnail': room_data.get('user_cover'),
2415             'timestamp': stream_data.get('live_time'),
2416             'formats': formats,
2417             'is_live': True,
2418             'http_headers': {
2419                 'Referer': url,
2420             },
2421         }