yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10 import uuid
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from ..dependencies import Cryptodome
  14 from ..networking.exceptions import HTTPError
  15 from ..utils import (
  16     ExtractorError,
  17     GeoRestrictedError,
  18     InAdvancePagedList,
  19     OnDemandPagedList,
  20     bool_or_none,
  21     clean_html,
  22     determine_ext,
  23     filter_dict,
  24     float_or_none,
  25     format_field,
  26     get_element_by_class,
  27     int_or_none,
  28     join_nonempty,
  29     make_archive_id,
  30     merge_dicts,
  31     mimetype2ext,
  32     parse_count,
  33     parse_qs,
  34     parse_resolution,
  35     qualities,
  36     smuggle_url,
  37     srt_subtitles_timecode,
  38     str_or_none,
  39     traverse_obj,
  40     unified_timestamp,
  41     unsmuggle_url,
  42     url_or_none,
  43     urlencode_postdata,
  44     variadic,
  45 )
  46
  47
  48 class BilibiliBaseIE(InfoExtractor):
  49     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  50     _WBI_KEY_CACHE_TIMEOUT = 30  # exact expire timeout is unclear, use 30s for one session
  51     _wbi_key_cache = {}
  52
  53     @property
  54     def is_logged_in(self):
  55         return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
  56
  57     def _check_missing_formats(self, play_info, formats):
  58         parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
  59         missing_formats = join_nonempty(*[
  60             traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
  61             for fmt in traverse_obj(play_info, (
  62                 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
  63         if missing_formats:
  64             self.to_screen(
  65                 f'Format(s) {missing_formats} are missing; you have to login or '
  66                 f'become a premium member to download them. {self._login_hint()}')
  67
  68     def extract_formats(self, play_info):
  69         format_names = {
  70             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  71             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  72         }
  73
  74         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  75         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  76         if flac_audio:
  77             audios.append(flac_audio)
  78         formats = [{
  79             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  80             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  81             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  82             'vcodec': 'none',
  83             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  84             'filesize': int_or_none(audio.get('size')),
  85             'format_id': str_or_none(audio.get('id')),
  86         } for audio in audios]
  87
  88         formats.extend({
  89             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  90             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  91             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  92             'width': int_or_none(video.get('width')),
  93             'height': int_or_none(video.get('height')),
  94             'vcodec': video.get('codecs'),
  95             'acodec': 'none' if audios else None,
  96             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  97             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  98             'filesize': int_or_none(video.get('size')),
  99             'quality': int_or_none(video.get('id')),
 100             'format_id': traverse_obj(
 101                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
 102                 ('id', {str_or_none}), get_all=False),
 103             'format': format_names.get(video.get('id')),
 104         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
 105
 106         if formats:
 107             self._check_missing_formats(play_info, formats)
 108
 109         fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
 110             'url': ('url', {url_or_none}),
 111             'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
 112             'filesize': ('size', {int_or_none}),
 113         }))
 114         if fragments:
 115             formats.append({
 116                 'url': fragments[0]['url'],
 117                 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
 118                 **({
 119                     'fragments': fragments,
 120                     'protocol': 'http_dash_segments',
 121                 } if len(fragments) > 1 else {}),
 122                 **traverse_obj(play_info, {
 123                     'quality': ('quality', {int_or_none}),
 124                     'format_id': ('quality', {str_or_none}),
 125                     'format_note': ('quality', {lambda x: format_names.get(x)}),
 126                     'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
 127                 }),
 128                 **parse_resolution(format_names.get(play_info.get('quality'))),
 129             })
 130         return formats
 131
 132     def _get_wbi_key(self, video_id):
 133         if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
 134             return self._wbi_key_cache['key']
 135
 136         session_data = self._download_json(
 137             'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
 138
 139         lookup = ''.join(traverse_obj(session_data, (
 140             'data', 'wbi_img', ('img_url', 'sub_url'),
 141             {lambda x: x.rpartition('/')[2].partition('.')[0]})))
 142
 143         # from getMixinKey() in the vendor js
 144         mixin_key_enc_tab = [
 145             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
 146             33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
 147             61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
 148             36, 20, 34, 44, 52,
 149         ]
 150
 151         self._wbi_key_cache.update({
 152             'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
 153             'ts': time.time(),
 154         })
 155         return self._wbi_key_cache['key']
 156
 157     def _sign_wbi(self, params, video_id):
 158         params['wts'] = round(time.time())
 159         params = {
 160             k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
 161             for k, v in sorted(params.items())
 162         }
 163         query = urllib.parse.urlencode(params)
 164         params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
 165         return params
 166
 167     def _download_playinfo(self, bvid, cid, headers=None, qn=None):
 168         params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
 169         if qn:
 170             params['qn'] = qn
 171         return self._download_json(
 172             'https://api.bilibili.com/x/player/wbi/playurl', bvid,
 173             query=self._sign_wbi(params, bvid), headers=headers,
 174             note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
 175
 176     def json2srt(self, json_data):
 177         srt_data = ''
 178         for idx, line in enumerate(json_data.get('body') or []):
 179             srt_data += (f'{idx + 1}\n'
 180                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 181                          f'{line["content"]}\n\n')
 182         return srt_data
 183
 184     def _get_subtitles(self, video_id, cid, aid=None):
 185         subtitles = {
 186             'danmaku': [{
 187                 'ext': 'xml',
 188                 'url': f'https://comment.bilibili.com/{cid}.xml',
 189             }],
 190         }
 191
 192         video_info = self._download_json(
 193             'https://api.bilibili.com/x/player/v2', video_id,
 194             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 195             note=f'Extracting subtitle info {cid}')
 196         if traverse_obj(video_info, ('data', 'need_login_subtitle')):
 197             self.report_warning(
 198                 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
 199         for s in traverse_obj(video_info, (
 200                 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
 201             subtitles.setdefault(s['lan'], []).append({
 202                 'ext': 'srt',
 203                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
 204             })
 205         return subtitles
 206
 207     def _get_chapters(self, aid, cid):
 208         chapters = aid and cid and self._download_json(
 209             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 210             note='Extracting chapters', fatal=False)
 211         return traverse_obj(chapters, ('data', 'view_points', ..., {
 212             'title': 'content',
 213             'start_time': 'from',
 214             'end_time': 'to',
 215         })) or None
 216
 217     def _get_comments(self, aid):
 218         for idx in itertools.count(1):
 219             replies = traverse_obj(
 220                 self._download_json(
 221                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 222                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 223                 ('data', 'replies'))
 224             if not replies:
 225                 return
 226             for children in map(self._get_all_children, replies):
 227                 yield from children
 228
 229     def _get_all_children(self, reply):
 230         yield {
 231             'author': traverse_obj(reply, ('member', 'uname')),
 232             'author_id': traverse_obj(reply, ('member', 'mid')),
 233             'id': reply.get('rpid'),
 234             'text': traverse_obj(reply, ('content', 'message')),
 235             'timestamp': reply.get('ctime'),
 236             'parent': reply.get('parent') or 'root',
 237         }
 238         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 239             yield from children
 240
 241     def _get_episodes_from_season(self, ss_id, url):
 242         season_info = self._download_json(
 243             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 244             note='Downloading season info', query={'season_id': ss_id},
 245             headers={'Referer': url, **self.geo_verification_headers()})
 246
 247         for entry in traverse_obj(season_info, (
 248                 'result', 'main_section', 'episodes',
 249                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 250             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 251
 252     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 253         cid_edges = cid_edges or {}
 254         division_data = self._download_json(
 255             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 256             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 257             note=f'Extracting divisions from edge {edge_id}')
 258         edges.setdefault(edge_id, {}).update(
 259             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 260                 'title': ('title', {str}),
 261                 'cid': ('cid', {int_or_none}),
 262             }), get_all=False))
 263
 264         edges[edge_id].update(traverse_obj(division_data, ('data', {
 265             'title': ('title', {str}),
 266             'choices': ('edges', 'questions', ..., 'choices', ..., {
 267                 'edge_id': ('id', {int_or_none}),
 268                 'cid': ('cid', {int_or_none}),
 269                 'text': ('option', {str}),
 270             }),
 271         })))
 272         # use dict to combine edges that use the same video section (same cid)
 273         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 274         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 275             if choice['edge_id'] not in edges:
 276                 edges[choice['edge_id']] = {'cid': choice['cid']}
 277                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 278         return cid_edges
 279
 280     def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
 281         graph_version = traverse_obj(
 282             self._download_json(
 283                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 284                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
 285             ('data', 'interaction', 'graph_version', {int_or_none}))
 286         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 287         for cid, edges in cid_edges.items():
 288             play_info = self._download_playinfo(video_id, cid, headers=headers)
 289             yield {
 290                 **metainfo,
 291                 'id': f'{video_id}_{cid}',
 292                 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
 293                 'formats': self.extract_formats(play_info),
 294                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 295                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 296                 'subtitles': self.extract_subtitles(video_id, cid),
 297             }
 298
 299
 300 class BiliBiliIE(BilibiliBaseIE):
 301     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 302
 303     _TESTS = [{
 304         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 305         'info_dict': {
 306             'id': 'BV13x41117TL',
 307             'title': '阿滴英文｜英文歌分享#6 "Closer',
 308             'ext': 'mp4',
 309             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 310             'uploader_id': '65880958',
 311             'uploader': '阿滴英文',
 312             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 313             'duration': 554.117,
 314             'tags': list,
 315             'comment_count': int,
 316             'upload_date': '20170301',
 317             'timestamp': 1488353834,
 318             'like_count': int,
 319             'view_count': int,
 320             '_old_archive_ids': ['bilibili 8903802_part1'],
 321         },
 322     }, {
 323         'note': 'old av URL version',
 324         'url': 'http://www.bilibili.com/video/av1074402/',
 325         'info_dict': {
 326             'id': 'BV11x411K7CN',
 327             'ext': 'mp4',
 328             'title': '【金坷垃】金泡沫',
 329             'uploader': '菊子桑',
 330             'uploader_id': '156160',
 331             'duration': 308.36,
 332             'upload_date': '20140420',
 333             'timestamp': 1397983878,
 334             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 335             'like_count': int,
 336             'comment_count': int,
 337             'view_count': int,
 338             'tags': list,
 339             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 340             '_old_archive_ids': ['bilibili 1074402_part1'],
 341         },
 342         'params': {'skip_download': True},
 343     }, {
 344         'note': 'Anthology',
 345         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 346         'info_dict': {
 347             'id': 'BV1bK411W797',
 348             'title': '物语中的人物是如何吐槽自己的OP的',
 349         },
 350         'playlist_count': 18,
 351         'playlist': [{
 352             'info_dict': {
 353                 'id': 'BV1bK411W797_p1',
 354                 'ext': 'mp4',
 355                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 356                 'tags': 'count:10',
 357                 'timestamp': 1589601697,
 358                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 359                 'uploader': '打牌还是打桩',
 360                 'uploader_id': '150259984',
 361                 'like_count': int,
 362                 'comment_count': int,
 363                 'upload_date': '20200516',
 364                 'view_count': int,
 365                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 366                 'duration': 90.314,
 367                 '_old_archive_ids': ['bilibili 498159642_part1'],
 368             },
 369         }],
 370     }, {
 371         'note': 'Specific page of Anthology',
 372         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 373         'info_dict': {
 374             'id': 'BV1bK411W797_p1',
 375             'ext': 'mp4',
 376             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 377             'tags': 'count:10',
 378             'timestamp': 1589601697,
 379             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 380             'uploader': '打牌还是打桩',
 381             'uploader_id': '150259984',
 382             'like_count': int,
 383             'comment_count': int,
 384             'upload_date': '20200516',
 385             'view_count': int,
 386             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 387             'duration': 90.314,
 388             '_old_archive_ids': ['bilibili 498159642_part1'],
 389         },
 390     }, {
 391         'url': 'https://www.bilibili.com/video/av8903802/',
 392         'info_dict': {
 393             'id': 'BV13x41117TL',
 394             'ext': 'mp4',
 395             'title': '阿滴英文｜英文歌分享#6 "Closer',
 396             'upload_date': '20170301',
 397             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 398             'timestamp': 1488353834,
 399             'uploader_id': '65880958',
 400             'uploader': '阿滴英文',
 401             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 402             'duration': 554.117,
 403             'tags': list,
 404             'comment_count': int,
 405             'view_count': int,
 406             'like_count': int,
 407             '_old_archive_ids': ['bilibili 8903802_part1'],
 408         },
 409         'params': {
 410             'skip_download': True,
 411         },
 412     }, {
 413         'note': 'video has chapter',
 414         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 415         'info_dict': {
 416             'id': 'BV1vL411G7N7',
 417             'ext': 'mp4',
 418             'title': '如何为你的B站视频添加进度条分段',
 419             'timestamp': 1634554558,
 420             'upload_date': '20211018',
 421             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 422             'tags': list,
 423             'uploader': '爱喝咖啡的当麻',
 424             'duration': 669.482,
 425             'uploader_id': '1680903',
 426             'chapters': 'count:6',
 427             'comment_count': int,
 428             'view_count': int,
 429             'like_count': int,
 430             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 431             '_old_archive_ids': ['bilibili 463665680_part1'],
 432         },
 433         'params': {'skip_download': True},
 434     }, {
 435         'note': 'video redirects to festival page',
 436         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 437         'info_dict': {
 438             'id': 'BV1wP4y1P72h',
 439             'ext': 'mp4',
 440             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 441             'timestamp': 1643947497,
 442             'upload_date': '20220204',
 443             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 444             'uploader': '叨叨冯聊音乐',
 445             'duration': 246.719,
 446             'uploader_id': '528182630',
 447             'view_count': int,
 448             'like_count': int,
 449             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 450             '_old_archive_ids': ['bilibili 893839363_part1'],
 451         },
 452     }, {
 453         'note': 'newer festival video',
 454         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 455         'info_dict': {
 456             'id': 'BV1ay4y1d77f',
 457             'ext': 'mp4',
 458             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 459             'timestamp': 1674273600,
 460             'upload_date': '20230121',
 461             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 462             'uploader': '果蝇轰',
 463             'duration': 1111.722,
 464             'uploader_id': '8469526',
 465             'view_count': int,
 466             'like_count': int,
 467             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 468             '_old_archive_ids': ['bilibili 778246196_part1'],
 469         },
 470     }, {
 471         'note': 'legacy flv/mp4 video',
 472         'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
 473         'info_dict': {
 474             'id': 'BV1ms411Q7vw_p4',
 475             'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
 476             'timestamp': 1458222815,
 477             'upload_date': '20160317',
 478             'description': '云南方言快乐生产线出品',
 479             'duration': float,
 480             'uploader': '一笑颠天',
 481             'uploader_id': '3916081',
 482             'view_count': int,
 483             'comment_count': int,
 484             'like_count': int,
 485             'tags': list,
 486             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 487             '_old_archive_ids': ['bilibili 4120229_part4'],
 488         },
 489         'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
 490         'playlist_count': 19,
 491         'playlist': [{
 492             'info_dict': {
 493                 'id': 'BV1ms411Q7vw_p4_0',
 494                 'ext': 'flv',
 495                 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
 496                 'duration': 399.102,
 497             },
 498         }],
 499     }, {
 500         'note': 'legacy mp4-only video',
 501         'url': 'https://www.bilibili.com/video/BV1nx411u79K',
 502         'info_dict': {
 503             'id': 'BV1nx411u79K',
 504             'ext': 'mp4',
 505             'title': '【练习室】201603声乐练习《No Air》with VigoVan',
 506             'timestamp': 1508893551,
 507             'upload_date': '20171025',
 508             'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
 509             'duration': 80.384,
 510             'uploader': '伯远',
 511             'uploader_id': '10584494',
 512             'comment_count': int,
 513             'view_count': int,
 514             'like_count': int,
 515             'tags': list,
 516             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 517             '_old_archive_ids': ['bilibili 15700301_part1'],
 518         },
 519     }, {
 520         'note': 'interactive/split-path video',
 521         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 522         'info_dict': {
 523             'id': 'BV1af4y1H7ga',
 524             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 525             'timestamp': 1630500414,
 526             'upload_date': '20210901',
 527             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 528             'tags': list,
 529             'uploader': '钉宫妮妮Ninico',
 530             'duration': 1503,
 531             'uploader_id': '8881297',
 532             'comment_count': int,
 533             'view_count': int,
 534             'like_count': int,
 535             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 536             '_old_archive_ids': ['bilibili 292734508_part1'],
 537         },
 538         'playlist_count': 33,
 539         'playlist': [{
 540             'info_dict': {
 541                 'id': 'BV1af4y1H7ga_400950101',
 542                 'ext': 'mp4',
 543                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 544                 'timestamp': 1630500414,
 545                 'upload_date': '20210901',
 546                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 547                 'tags': list,
 548                 'uploader': '钉宫妮妮Ninico',
 549                 'duration': 11.605,
 550                 'uploader_id': '8881297',
 551                 'comment_count': int,
 552                 'view_count': int,
 553                 'like_count': int,
 554                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 555                 '_old_archive_ids': ['bilibili 292734508_part1'],
 556             },
 557         }],
 558     }, {
 559         'note': '301 redirect to bangumi link',
 560         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 561         'info_dict': {
 562             'id': '288525',
 563             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 564             'ext': 'mp4',
 565             'series': '我和我的祖国',
 566             'series_id': '4780',
 567             'season': '幕后纪实',
 568             'season_id': '28609',
 569             'season_number': 1,
 570             'episode': '钱学森弹道和乘波体飞行器是什么？',
 571             'episode_id': '288525',
 572             'episode_number': 105,
 573             'duration': 1183.957,
 574             'timestamp': 1571648124,
 575             'upload_date': '20191021',
 576             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 577         },
 578     }, {
 579         'note': 'video has subtitles, which requires login',
 580         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 581         'info_dict': {
 582             'id': 'BV12N4y1M7rh',
 583             'ext': 'mp4',
 584             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 585             'tags': list,
 586             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 587             'duration': 313.557,
 588             'upload_date': '20220709',
 589             'uploader': '小夫太渴',
 590             'timestamp': 1657347907,
 591             'uploader_id': '1326814124',
 592             'comment_count': int,
 593             'view_count': int,
 594             'like_count': int,
 595             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 596             'subtitles': 'count:2',  # login required for CC subtitle
 597             '_old_archive_ids': ['bilibili 898179753_part1'],
 598         },
 599         'params': {'listsubtitles': True},
 600         'skip': 'login required for subtitle',
 601     }, {
 602         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 603         'info_dict': {
 604             'id': 'BV1jL41167ZG',
 605             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 606             'ext': 'mp4',
 607         },
 608         'skip': 'supporter-only video',
 609     }, {
 610         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 611         'info_dict': {
 612             'id': 'BV1Ks411f7aQ',
 613             'title': '【BD1080P】狼与香辛料I【华盟】',
 614             'ext': 'mp4',
 615         },
 616         'skip': 'login required',
 617     }, {
 618         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 619         'info_dict': {
 620             'id': 'BV1GJ411x7h7',
 621             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 622             'ext': 'mp4',
 623         },
 624         'skip': 'geo-restricted',
 625     }, {
 626         'note': 'has - in the last path segment of the url',
 627         'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
 628         'only_matching': True,
 629     }]
 630
 631     def _real_extract(self, url):
 632         video_id = self._match_id(url)
 633         headers = self.geo_verification_headers()
 634         webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
 635         if not self._match_valid_url(urlh.url):
 636             return self.url_result(urlh.url)
 637
 638         headers['Referer'] = url
 639
 640         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 641         is_festival = 'videoData' not in initial_state
 642         if is_festival:
 643             video_data = initial_state['videoInfo']
 644         else:
 645             play_info_obj = self._search_json(
 646                 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
 647             if not play_info_obj:
 648                 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 649                     self.raise_login_required()
 650                 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 651                     raise ExtractorError(
 652                         'This video may be deleted or geo-restricted. '
 653                         'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 654             play_info = traverse_obj(play_info_obj, ('data', {dict}))
 655             if not play_info:
 656                 if traverse_obj(play_info_obj, 'code') == 87007:
 657                     toast = get_element_by_class('tips-toast', webpage) or ''
 658                     msg = clean_html(
 659                         f'{get_element_by_class("belongs-to", toast) or ""}，'
 660                         + (get_element_by_class('level', toast) or ''))
 661                     raise ExtractorError(
 662                         f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
 663                 raise ExtractorError('Failed to extract play info')
 664             video_data = initial_state['videoData']
 665
 666         video_id, title = video_data['bvid'], video_data.get('title')
 667
 668         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 669         page_list_json = not is_festival and traverse_obj(
 670             self._download_json(
 671                 'https://api.bilibili.com/x/player/pagelist', video_id,
 672                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 673                 note='Extracting videos in anthology', headers=headers),
 674             'data', expected_type=list) or []
 675         is_anthology = len(page_list_json) > 1
 676
 677         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 678         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 679             return self.playlist_from_matches(
 680                 page_list_json, video_id, title, ie=BiliBiliIE,
 681                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 682
 683         if is_anthology:
 684             part_id = part_id or 1
 685             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 686
 687         aid = video_data.get('aid')
 688         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 689         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 690
 691         festival_info = {}
 692         if is_festival:
 693             play_info = self._download_playinfo(video_id, cid, headers=headers)
 694
 695             festival_info = traverse_obj(initial_state, {
 696                 'uploader': ('videoInfo', 'upName'),
 697                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 698                 'like_count': ('videoStatus', 'like', {int_or_none}),
 699                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 700             }, get_all=False)
 701
 702         metainfo = {
 703             **traverse_obj(initial_state, {
 704                 'uploader': ('upData', 'name'),
 705                 'uploader_id': ('upData', 'mid', {str_or_none}),
 706                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 707                 'tags': ('tags', ..., 'tag_name'),
 708                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 709             }),
 710             **festival_info,
 711             **traverse_obj(video_data, {
 712                 'description': 'desc',
 713                 'timestamp': ('pubdate', {int_or_none}),
 714                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 715                 'comment_count': ('stat', 'reply', {int_or_none}),
 716             }, get_all=False),
 717             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 718             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 719             'title': title,
 720             'http_headers': {'Referer': url},
 721         }
 722
 723         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 724         if is_interactive:
 725             return self.playlist_result(
 726                 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
 727                 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 728                 __post_extractor=self.extract_comments(aid))
 729         else:
 730             formats = self.extract_formats(play_info)
 731
 732             if not traverse_obj(play_info, ('dash')):
 733                 # we only have legacy formats and need additional work
 734                 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
 735                 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
 736                     formats.extend(traverse_obj(
 737                         self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
 738                         lambda _, v: not has_qn(v['quality'])))
 739                 self._check_missing_formats(play_info, formats)
 740                 flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
 741                 if flv_formats and len(flv_formats) < len(formats):
 742                     # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
 743                     if not self._configuration_arg('prefer_multi_flv'):
 744                         dropped_fmts = ', '.join(
 745                             f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
 746                         formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
 747                         if dropped_fmts:
 748                             self.to_screen(
 749                                 f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
 750                                 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
 751                     else:
 752                         formats = traverse_obj(
 753                             # XXX: Filtering by extractor-arg is for testing purposes
 754                             formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
 755                         ) or [max(flv_formats, key=lambda x: x['quality'])]
 756
 757             if traverse_obj(formats, (0, 'fragments')):
 758                 # We have flv formats, which are individual short videos with their own timestamps and metainfo
 759                 # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
 760                 return {
 761                     **metainfo,
 762                     '_type': 'multi_video',
 763                     'entries': [{
 764                         'id': f'{metainfo["id"]}_{idx}',
 765                         'title': metainfo['title'],
 766                         'http_headers': metainfo['http_headers'],
 767                         'formats': [{
 768                             **fragment,
 769                             'format_id': formats[0].get('format_id'),
 770                         }],
 771                         'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
 772                         '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
 773                     } for idx, fragment in enumerate(formats[0]['fragments'])],
 774                     'duration': float_or_none(play_info.get('timelength'), scale=1000),
 775                 }
 776             else:
 777                 return {
 778                     **metainfo,
 779                     'formats': formats,
 780                     'duration': float_or_none(play_info.get('timelength'), scale=1000),
 781                     'chapters': self._get_chapters(aid, cid),
 782                     'subtitles': self.extract_subtitles(video_id, cid),
 783                     '__post_extractor': self.extract_comments(aid),
 784                 }
 785
 786
 787 class BiliBiliBangumiIE(BilibiliBaseIE):
 788     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 789
 790     _TESTS = [{
 791         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 792         'info_dict': {
 793             'id': '21495',
 794             'ext': 'mp4',
 795             'series': '悠久之翼',
 796             'series_id': '774',
 797             'season': '第二季',
 798             'season_id': '1182',
 799             'season_number': 2,
 800             'episode': 'forever／ef',
 801             'episode_id': '21495',
 802             'episode_number': 12,
 803             'title': '12 forever／ef',
 804             'duration': 1420.791,
 805             'timestamp': 1320412200,
 806             'upload_date': '20111104',
 807             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 808         },
 809     }, {
 810         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 811         'info_dict': {
 812             'id': '267851',
 813             'ext': 'mp4',
 814             'series': '鬼灭之刃',
 815             'series_id': '4358',
 816             'season': '立志篇',
 817             'season_id': '26801',
 818             'season_number': 1,
 819             'episode': '残酷',
 820             'episode_id': '267851',
 821             'episode_number': 1,
 822             'title': '1 残酷',
 823             'duration': 1425.256,
 824             'timestamp': 1554566400,
 825             'upload_date': '20190406',
 826             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 827         },
 828         'skip': 'Geo-restricted',
 829     }, {
 830         'note': 'a making-of which falls outside main section',
 831         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 832         'info_dict': {
 833             'id': '345120',
 834             'ext': 'mp4',
 835             'series': '鬼灭之刃',
 836             'series_id': '4358',
 837             'season': '立志篇',
 838             'season_id': '26801',
 839             'season_number': 1,
 840             'episode': '炭治郎篇',
 841             'episode_id': '345120',
 842             'episode_number': 27,
 843             'title': '#1 炭治郎篇',
 844             'duration': 1922.129,
 845             'timestamp': 1602853860,
 846             'upload_date': '20201016',
 847             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 848         },
 849     }]
 850
 851     def _real_extract(self, url):
 852         episode_id = self._match_id(url)
 853         headers = self.geo_verification_headers()
 854         webpage = self._download_webpage(url, episode_id, headers=headers)
 855
 856         if '您所在的地区无法观看本片' in webpage:
 857             raise GeoRestrictedError('This video is restricted')
 858         elif '正在观看预览，大会员免费看全片' in webpage:
 859             self.raise_login_required('This video is for premium members only')
 860
 861         headers['Referer'] = url
 862         play_info = self._download_json(
 863             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 864             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 865             headers=headers)
 866         premium_only = play_info.get('code') == -10403
 867         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 868
 869         formats = self.extract_formats(play_info)
 870         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 871             self.raise_login_required('This video is for premium members only')
 872
 873         bangumi_info = self._download_json(
 874             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 875             query={'ep_id': episode_id}, headers=headers)['result']
 876
 877         episode_number, episode_info = next((
 878             (idx, ep) for idx, ep in enumerate(traverse_obj(
 879                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 880             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 881
 882         season_id = bangumi_info.get('season_id')
 883         season_number, season_title = season_id and next((
 884             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 885                 traverse_obj(bangumi_info, ('seasons', ...)))
 886             if e.get('season_id') == season_id
 887         ), (None, None))
 888
 889         aid = episode_info.get('aid')
 890
 891         return {
 892             'id': episode_id,
 893             'formats': formats,
 894             **traverse_obj(bangumi_info, {
 895                 'series': ('series', 'series_title', {str}),
 896                 'series_id': ('series', 'series_id', {str_or_none}),
 897                 'thumbnail': ('square_cover', {url_or_none}),
 898             }),
 899             **traverse_obj(episode_info, {
 900                 'episode': ('long_title', {str}),
 901                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 902                 'timestamp': ('pub_time', {int_or_none}),
 903                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 904             }),
 905             'episode_id': episode_id,
 906             'season': str_or_none(season_title),
 907             'season_id': str_or_none(season_id),
 908             'season_number': season_number,
 909             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 910             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 911             '__post_extractor': self.extract_comments(aid),
 912             'http_headers': {'Referer': url},
 913         }
 914
 915
 916 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 917     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 918     _TESTS = [{
 919         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 920         'info_dict': {
 921             'id': '24097891',
 922             'title': 'CAROLE & TUESDAY',
 923             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 924         },
 925         'playlist_mincount': 25,
 926     }, {
 927         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 928         'info_dict': {
 929             'id': '1565',
 930             'title': '攻壳机动队 S.A.C. 2nd GIG',
 931             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 932         },
 933         'playlist_count': 26,
 934         'playlist': [{
 935             'info_dict': {
 936                 'id': '68540',
 937                 'ext': 'mp4',
 938                 'series': '攻壳机动队',
 939                 'series_id': '1077',
 940                 'season': '第二季',
 941                 'season_id': '1565',
 942                 'season_number': 2,
 943                 'episode': '再启动 REEMBODY',
 944                 'episode_id': '68540',
 945                 'episode_number': 1,
 946                 'title': '1 再启动 REEMBODY',
 947                 'duration': 1525.777,
 948                 'timestamp': 1425074413,
 949                 'upload_date': '20150227',
 950                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 951             },
 952         }],
 953     }]
 954
 955     def _real_extract(self, url):
 956         media_id = self._match_id(url)
 957         webpage = self._download_webpage(url, media_id)
 958
 959         initial_state = self._search_json(
 960             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 961         ss_id = initial_state['mediaInfo']['season_id']
 962
 963         return self.playlist_result(
 964             self._get_episodes_from_season(ss_id, url), media_id,
 965             **traverse_obj(initial_state, ('mediaInfo', {
 966                 'title': ('title', {str}),
 967                 'description': ('evaluate', {str}),
 968             })))
 969
 970
 971 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 972     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 973     _TESTS = [{
 974         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 975         'info_dict': {
 976             'id': '26801',
 977             'title': '鬼灭之刃',
 978             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 979         },
 980         'playlist_mincount': 26,
 981     }, {
 982         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 983         'info_dict': {
 984             'id': '2251',
 985             'title': '玲音',
 986             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
 987         },
 988         'playlist_count': 13,
 989         'playlist': [{
 990             'info_dict': {
 991                 'id': '50188',
 992                 'ext': 'mp4',
 993                 'series': '玲音',
 994                 'series_id': '1526',
 995                 'season': 'TV',
 996                 'season_id': '2251',
 997                 'season_number': 1,
 998                 'episode': 'WEIRD',
 999                 'episode_id': '50188',
1000                 'episode_number': 1,
1001                 'title': '1 WEIRD',
1002                 'duration': 1436.992,
1003                 'timestamp': 1343185080,
1004                 'upload_date': '20120725',
1005                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1006             },
1007         }],
1008     }]
1009
1010     def _real_extract(self, url):
1011         ss_id = self._match_id(url)
1012         webpage = self._download_webpage(url, ss_id)
1013         metainfo = traverse_obj(
1014             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1015             ('itemListElement', ..., {
1016                 'title': ('name', {str}),
1017                 'description': ('description', {str}),
1018             }), get_all=False)
1019
1020         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1021
1022
1023 class BilibiliCheeseBaseIE(BilibiliBaseIE):
1024     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1025
1026     def _extract_episode(self, season_info, ep_id):
1027         episode_info = traverse_obj(season_info, (
1028             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1029         aid, cid = episode_info['aid'], episode_info['cid']
1030
1031         if traverse_obj(episode_info, 'ep_status') == -1:
1032             raise ExtractorError('This course episode is not yet available.', expected=True)
1033         if not traverse_obj(episode_info, 'playable'):
1034             self.raise_login_required('You need to purchase the course to download this episode')
1035
1036         play_info = self._download_json(
1037             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1038             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1039             headers=self._HEADERS, note='Downloading playinfo')['data']
1040
1041         return {
1042             'id': str_or_none(ep_id),
1043             'episode_id': str_or_none(ep_id),
1044             'formats': self.extract_formats(play_info),
1045             'extractor_key': BilibiliCheeseIE.ie_key(),
1046             'extractor': BilibiliCheeseIE.IE_NAME,
1047             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1048             **traverse_obj(episode_info, {
1049                 'episode': ('title', {str}),
1050                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1051                 'alt_title': ('subtitle', {str}),
1052                 'duration': ('duration', {int_or_none}),
1053                 'episode_number': ('index', {int_or_none}),
1054                 'thumbnail': ('cover', {url_or_none}),
1055                 'timestamp': ('release_date', {int_or_none}),
1056                 'view_count': ('play', {int_or_none}),
1057             }),
1058             **traverse_obj(season_info, {
1059                 'uploader': ('up_info', 'uname', {str}),
1060                 'uploader_id': ('up_info', 'mid', {str_or_none}),
1061             }),
1062             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1063             '__post_extractor': self.extract_comments(aid),
1064             'http_headers': self._HEADERS,
1065         }
1066
1067     def _download_season_info(self, query_key, video_id):
1068         return self._download_json(
1069             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1070             headers=self._HEADERS, note='Downloading season info')['data']
1071
1072
1073 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1074     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1075     _TESTS = [{
1076         'url': 'https://www.bilibili.com/cheese/play/ep229832',
1077         'info_dict': {
1078             'id': '229832',
1079             'ext': 'mp4',
1080             'title': '1 - 课程先导片',
1081             'alt_title': '视频课 · 3分41秒',
1082             'uploader': '马督工',
1083             'uploader_id': '316568752',
1084             'episode': '课程先导片',
1085             'episode_id': '229832',
1086             'episode_number': 1,
1087             'duration': 221,
1088             'timestamp': 1695549606,
1089             'upload_date': '20230924',
1090             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1091             'view_count': int,
1092         },
1093     }]
1094
1095     def _real_extract(self, url):
1096         ep_id = self._match_id(url)
1097         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1098
1099
1100 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1101     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1102     _TESTS = [{
1103         'url': 'https://www.bilibili.com/cheese/play/ss5918',
1104         'info_dict': {
1105             'id': '5918',
1106             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
1107             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
1108         },
1109         'playlist': [{
1110             'info_dict': {
1111                 'id': '229832',
1112                 'ext': 'mp4',
1113                 'title': '1 - 课程先导片',
1114                 'alt_title': '视频课 · 3分41秒',
1115                 'uploader': '马督工',
1116                 'uploader_id': '316568752',
1117                 'episode': '课程先导片',
1118                 'episode_id': '229832',
1119                 'episode_number': 1,
1120                 'duration': 221,
1121                 'timestamp': 1695549606,
1122                 'upload_date': '20230924',
1123                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1124                 'view_count': int,
1125             },
1126         }],
1127         'params': {'playlist_items': '1'},
1128     }, {
1129         'url': 'https://www.bilibili.com/cheese/play/ss5918',
1130         'info_dict': {
1131             'id': '5918',
1132             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
1133             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
1134         },
1135         'playlist_mincount': 5,
1136         'skip': 'paid video in list',
1137     }]
1138
1139     def _get_cheese_entries(self, season_info):
1140         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1141             yield self._extract_episode(season_info, ep_id)
1142
1143     def _real_extract(self, url):
1144         season_id = self._match_id(url)
1145         season_info = self._download_season_info('season_id', season_id)
1146
1147         return self.playlist_result(
1148             self._get_cheese_entries(season_info), season_id,
1149             **traverse_obj(season_info, {
1150                 'title': ('title', {str}),
1151                 'description': ('subtitle', {str}),
1152             }))
1153
1154
1155 class BilibiliSpaceBaseIE(BilibiliBaseIE):
1156     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1157         first_page = fetch_page(0)
1158         metadata = get_metadata(first_page)
1159
1160         paged_list = InAdvancePagedList(
1161             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1162             metadata['page_count'], metadata['page_size'])
1163
1164         return metadata, paged_list
1165
1166
1167 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1168     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1169     _TESTS = [{
1170         'url': 'https://space.bilibili.com/3985676/video',
1171         'info_dict': {
1172             'id': '3985676',
1173         },
1174         'playlist_mincount': 178,
1175         'skip': 'login required',
1176     }, {
1177         'url': 'https://space.bilibili.com/313580179/video',
1178         'info_dict': {
1179             'id': '313580179',
1180         },
1181         'playlist_mincount': 92,
1182         'skip': 'login required',
1183     }]
1184
1185     def _real_extract(self, url):
1186         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1187         if not is_video_url:
1188             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1189                            'To download audios, add a "/audio" to the URL')
1190
1191         def fetch_page(page_idx):
1192             query = {
1193                 'keyword': '',
1194                 'mid': playlist_id,
1195                 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1196                 'order_avoided': 'true',
1197                 'platform': 'web',
1198                 'pn': page_idx + 1,
1199                 'ps': 30,
1200                 'tid': 0,
1201                 'web_location': 1550101,
1202             }
1203
1204             try:
1205                 response = self._download_json(
1206                     'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1207                     query=self._sign_wbi(query, playlist_id),
1208                     note=f'Downloading space page {page_idx}', headers={'Referer': url})
1209             except ExtractorError as e:
1210                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1211                     raise ExtractorError(
1212                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1213                 raise
1214             status_code = response['code']
1215             if status_code == -401:
1216                 raise ExtractorError(
1217                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1218             elif status_code == -352 and not self.is_logged_in:
1219                 self.raise_login_required('Request is rejected, you need to login to access playlist')
1220             elif status_code != 0:
1221                 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1222             return response['data']
1223
1224         def get_metadata(page_data):
1225             page_size = page_data['page']['ps']
1226             entry_count = page_data['page']['count']
1227             return {
1228                 'page_count': math.ceil(entry_count / page_size),
1229                 'page_size': page_size,
1230             }
1231
1232         def get_entries(page_data):
1233             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1234                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1235
1236         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1237         return self.playlist_result(paged_list, playlist_id)
1238
1239
1240 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1241     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1242     _TESTS = [{
1243         'url': 'https://space.bilibili.com/313580179/audio',
1244         'info_dict': {
1245             'id': '313580179',
1246         },
1247         'playlist_mincount': 1,
1248     }]
1249
1250     def _real_extract(self, url):
1251         playlist_id = self._match_id(url)
1252
1253         def fetch_page(page_idx):
1254             return self._download_json(
1255                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1256                 note=f'Downloading page {page_idx}',
1257                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1258
1259         def get_metadata(page_data):
1260             return {
1261                 'page_count': page_data['pageCount'],
1262                 'page_size': page_data['pageSize'],
1263             }
1264
1265         def get_entries(page_data):
1266             for entry in page_data.get('data', []):
1267                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1268
1269         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1270         return self.playlist_result(paged_list, playlist_id)
1271
1272
1273 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1274     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1275         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1276             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1277
1278     def _get_uploader(self, uid, playlist_id):
1279         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1280         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1281
1282     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1283         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1284         metadata.pop('page_count', None)
1285         metadata.pop('page_size', None)
1286         return metadata, page_list
1287
1288
1289 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1290     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1291     _TESTS = [{
1292         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1293         'info_dict': {
1294             'id': '2142762_57445',
1295             'title': '【完结】《底特律 变人》全结局流程解说',
1296             'description': '',
1297             'uploader': '老戴在此',
1298             'uploader_id': '2142762',
1299             'timestamp': int,
1300             'upload_date': str,
1301             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1302         },
1303         'playlist_mincount': 31,
1304     }]
1305
1306     def _real_extract(self, url):
1307         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1308         playlist_id = f'{mid}_{sid}'
1309
1310         def fetch_page(page_idx):
1311             return self._download_json(
1312                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1313                 playlist_id, note=f'Downloading page {page_idx}',
1314                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1315
1316         def get_metadata(page_data):
1317             page_size = page_data['page']['page_size']
1318             entry_count = page_data['page']['total']
1319             return {
1320                 'page_count': math.ceil(entry_count / page_size),
1321                 'page_size': page_size,
1322                 'uploader': self._get_uploader(mid, playlist_id),
1323                 **traverse_obj(page_data, {
1324                     'title': ('meta', 'name', {str}),
1325                     'description': ('meta', 'description', {str}),
1326                     'uploader_id': ('meta', 'mid', {str_or_none}),
1327                     'timestamp': ('meta', 'ptime', {int_or_none}),
1328                     'thumbnail': ('meta', 'cover', {url_or_none}),
1329                 }),
1330             }
1331
1332         def get_entries(page_data):
1333             return self._get_entries(page_data, 'archives')
1334
1335         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1336         return self.playlist_result(paged_list, playlist_id, **metadata)
1337
1338
1339 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1340     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1341     _TESTS = [{
1342         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1343         'info_dict': {
1344             'id': '1958703906_547718',
1345             'title': '直播回放',
1346             'description': '直播回放',
1347             'uploader': '靡烟miya',
1348             'uploader_id': '1958703906',
1349             'timestamp': 1637985853,
1350             'upload_date': '20211127',
1351             'modified_timestamp': int,
1352             'modified_date': str,
1353         },
1354         'playlist_mincount': 513,
1355     }]
1356
1357     def _real_extract(self, url):
1358         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1359         playlist_id = f'{mid}_{sid}'
1360         playlist_meta = traverse_obj(self._download_json(
1361             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1362         ), {
1363             'title': ('data', 'meta', 'name', {str}),
1364             'description': ('data', 'meta', 'description', {str}),
1365             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1366             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1367             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1368         })
1369
1370         def fetch_page(page_idx):
1371             return self._download_json(
1372                 'https://api.bilibili.com/x/series/archives',
1373                 playlist_id, note=f'Downloading page {page_idx}',
1374                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1375
1376         def get_metadata(page_data):
1377             page_size = page_data['page']['size']
1378             entry_count = page_data['page']['total']
1379             return {
1380                 'page_count': math.ceil(entry_count / page_size),
1381                 'page_size': page_size,
1382                 'uploader': self._get_uploader(mid, playlist_id),
1383                 **playlist_meta,
1384             }
1385
1386         def get_entries(page_data):
1387             return self._get_entries(page_data, 'archives')
1388
1389         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1390         return self.playlist_result(paged_list, playlist_id, **metadata)
1391
1392
1393 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1394     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1395     _TESTS = [{
1396         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1397         'info_dict': {
1398             'id': '1103407912',
1399             'title': '【V2】（旧）',
1400             'description': '',
1401             'uploader': '晓月春日',
1402             'uploader_id': '84912',
1403             'timestamp': 1604905176,
1404             'upload_date': '20201109',
1405             'modified_timestamp': int,
1406             'modified_date': str,
1407             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1408             'view_count': int,
1409             'like_count': int,
1410         },
1411         'playlist_mincount': 22,
1412     }, {
1413         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1414         'only_matching': True,
1415     }]
1416
1417     def _real_extract(self, url):
1418         fid = self._match_id(url)
1419
1420         list_info = self._download_json(
1421             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1422             fid, note='Downloading favlist metadata')
1423         if list_info['code'] == -403:
1424             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1425
1426         entries = self._get_entries(self._download_json(
1427             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1428             fid, note='Download favlist entries'), 'data')
1429
1430         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1431             'title': ('title', {str}),
1432             'description': ('intro', {str}),
1433             'uploader': ('upper', 'name', {str}),
1434             'uploader_id': ('upper', 'mid', {str_or_none}),
1435             'timestamp': ('ctime', {int_or_none}),
1436             'modified_timestamp': ('mtime', {int_or_none}),
1437             'thumbnail': ('cover', {url_or_none}),
1438             'view_count': ('cnt_info', 'play', {int_or_none}),
1439             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1440         })))
1441
1442
1443 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1444     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1445     _TESTS = [{
1446         'url': 'https://www.bilibili.com/watchlater/#/list',
1447         'info_dict': {
1448             'id': r're:\d+',
1449             'title': '稍后再看',
1450         },
1451         'playlist_mincount': 0,
1452         'skip': 'login required',
1453     }]
1454
1455     def _real_extract(self, url):
1456         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1457         watchlater_info = self._download_json(
1458             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1459         if watchlater_info['code'] == -101:
1460             self.raise_login_required(msg='You need to login to access your watchlater list')
1461         entries = self._get_entries(watchlater_info, ('data', 'list'))
1462         return self.playlist_result(entries, id=list_id, title='稍后再看')
1463
1464
1465 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1466     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1467     _TESTS = [{
1468         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1469         'info_dict': {
1470             'id': '5_547718',
1471             'title': '直播回放',
1472             'uploader': '靡烟miya',
1473             'uploader_id': '1958703906',
1474             'timestamp': 1637985853,
1475             'upload_date': '20211127',
1476         },
1477         'playlist_mincount': 513,
1478     }, {
1479         'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1480         'info_dict': {
1481             'id': 'BV1DU4y1r7tz',
1482             'ext': 'mp4',
1483             'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1484             'upload_date': '20220820',
1485             'description': '',
1486             'timestamp': 1661016330,
1487             'uploader_id': '1958703906',
1488             'uploader': '靡烟miya',
1489             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1490             'duration': 9552.903,
1491             'tags': list,
1492             'comment_count': int,
1493             'view_count': int,
1494             'like_count': int,
1495             '_old_archive_ids': ['bilibili 687146339_part1'],
1496         },
1497         'params': {'noplaylist': True},
1498     }, {
1499         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1500         'info_dict': {
1501             'id': '5_547718',
1502         },
1503         'playlist_mincount': 513,
1504         'skip': 'redirect url',
1505     }, {
1506         'url': 'https://www.bilibili.com/list/ml1103407912',
1507         'info_dict': {
1508             'id': '3_1103407912',
1509             'title': '【V2】（旧）',
1510             'uploader': '晓月春日',
1511             'uploader_id': '84912',
1512             'timestamp': 1604905176,
1513             'upload_date': '20201109',
1514             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1515         },
1516         'playlist_mincount': 22,
1517     }, {
1518         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1519         'info_dict': {
1520             'id': '3_1103407912',
1521         },
1522         'playlist_mincount': 22,
1523         'skip': 'redirect url',
1524     }, {
1525         'url': 'https://www.bilibili.com/list/watchlater',
1526         'info_dict': {
1527             'id': r're:2_\d+',
1528             'title': '稍后再看',
1529             'uploader': str,
1530             'uploader_id': str,
1531         },
1532         'playlist_mincount': 0,
1533         'skip': 'login required',
1534     }, {
1535         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1536         'info_dict': {'id': 'watchlater'},
1537         'playlist_mincount': 0,
1538         'skip': 'redirect url & login required',
1539     }]
1540
1541     def _extract_medialist(self, query, list_id):
1542         for page_num in itertools.count(1):
1543             page_data = self._download_json(
1544                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1545                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1546             )['data']
1547             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1548             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1549             if not page_data.get('has_more', False):
1550                 break
1551
1552     def _real_extract(self, url):
1553         list_id = self._match_id(url)
1554
1555         bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1556         if not self._yes_playlist(list_id, bvid):
1557             return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1558
1559         webpage = self._download_webpage(url, list_id)
1560         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1561         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1562             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1563             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1564             if error_code == -400 and list_id == 'watchlater':
1565                 self.raise_login_required('You need to login to access your watchlater playlist')
1566             elif error_code == -403:
1567                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1568             elif error_code == 11010:
1569                 raise ExtractorError('Playlist is no longer available', expected=True)
1570             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1571
1572         query = {
1573             'ps': 20,
1574             'with_current': False,
1575             **traverse_obj(initial_state, {
1576                 'type': ('playlist', 'type', {int_or_none}),
1577                 'biz_id': ('playlist', 'id', {int_or_none}),
1578                 'tid': ('tid', {int_or_none}),
1579                 'sort_field': ('sortFiled', {int_or_none}),
1580                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1581             }),
1582         }
1583         metadata = {
1584             'id': f'{query["type"]}_{query["biz_id"]}',
1585             **traverse_obj(initial_state, ('mediaListInfo', {
1586                 'title': ('title', {str}),
1587                 'uploader': ('upper', 'name', {str}),
1588                 'uploader_id': ('upper', 'mid', {str_or_none}),
1589                 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
1590                 'thumbnail': ('cover', {url_or_none}),
1591             })),
1592         }
1593         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1594
1595
1596 class BilibiliCategoryIE(InfoExtractor):
1597     IE_NAME = 'Bilibili category extractor'
1598     _MAX_RESULTS = 1000000
1599     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1600     _TESTS = [{
1601         'url': 'https://www.bilibili.com/v/kichiku/mad',
1602         'info_dict': {
1603             'id': 'kichiku: mad',
1604             'title': 'kichiku: mad',
1605         },
1606         'playlist_mincount': 45,
1607         'params': {
1608             'playlistend': 45,
1609         },
1610     }]
1611
1612     def _fetch_page(self, api_url, num_pages, query, page_num):
1613         parsed_json = self._download_json(
1614             api_url, query, query={'Search_key': query, 'pn': page_num},
1615             note=f'Extracting results from page {page_num} of {num_pages}')
1616
1617         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1618         if not video_list:
1619             raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1620
1621         for video in video_list:
1622             yield self.url_result(
1623                 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1624
1625     def _entries(self, category, subcategory, query):
1626         # map of categories : subcategories : RIDs
1627         rid_map = {
1628             'kichiku': {
1629                 'mad': 26,
1630                 'manual_vocaloid': 126,
1631                 'guide': 22,
1632                 'theatre': 216,
1633                 'course': 127,
1634             },
1635         }
1636
1637         if category not in rid_map:
1638             raise ExtractorError(
1639                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1640         if subcategory not in rid_map[category]:
1641             raise ExtractorError(
1642                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1643         rid_value = rid_map[category][subcategory]
1644
1645         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1646         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1647         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1648         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1649         if count is None or not size:
1650             raise ExtractorError('Failed to calculate either page count or size')
1651
1652         num_pages = math.ceil(count / size)
1653
1654         return OnDemandPagedList(functools.partial(
1655             self._fetch_page, api_url, num_pages, query), size)
1656
1657     def _real_extract(self, url):
1658         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1659         query = f'{category}: {subcategory}'
1660
1661         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1662
1663
1664 class BiliBiliSearchIE(SearchInfoExtractor):
1665     IE_DESC = 'Bilibili video search'
1666     _MAX_RESULTS = 100000
1667     _SEARCH_KEY = 'bilisearch'
1668     _TESTS = [{
1669         'url': 'bilisearch3:靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1670         'playlist_count': 3,
1671         'info_dict': {
1672             'id': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1673             'title': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1674         },
1675         'playlist': [{
1676             'info_dict': {
1677                 'id': 'BV1n44y1Q7sc',
1678                 'ext': 'mp4',
1679                 'title': '“出道一年，我怎么还在等你单推的女人睡觉后开播啊？”【一分钟了解靡烟miya】',
1680                 'timestamp': 1669889987,
1681                 'upload_date': '20221201',
1682                 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1683                 'tags': list,
1684                 'uploader': '靡烟miya',
1685                 'duration': 123.156,
1686                 'uploader_id': '1958703906',
1687                 'comment_count': int,
1688                 'view_count': int,
1689                 'like_count': int,
1690                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1691                 '_old_archive_ids': ['bilibili 988222410_part1'],
1692             },
1693         }],
1694     }]
1695
1696     def _search_results(self, query):
1697         if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1698             self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1699         for page_num in itertools.count(1):
1700             videos = self._download_json(
1701                 'https://api.bilibili.com/x/web-interface/search/type', query,
1702                 note=f'Extracting results from page {page_num}', query={
1703                     'Search_key': query,
1704                     'keyword': query,
1705                     'page': page_num,
1706                     'context': '',
1707                     'duration': 0,
1708                     'tids_2': '',
1709                     '__refresh__': 'true',
1710                     'search_type': 'video',
1711                     'tids': 0,
1712                     'highlight': 1,
1713                 })['data'].get('result')
1714             if not videos:
1715                 break
1716             for video in videos:
1717                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1718
1719
1720 class BilibiliAudioBaseIE(InfoExtractor):
1721     def _call_api(self, path, sid, query=None):
1722         if not query:
1723             query = {'sid': sid}
1724         return self._download_json(
1725             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1726             sid, query=query)['data']
1727
1728
1729 class BilibiliAudioIE(BilibiliAudioBaseIE):
1730     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1731     _TEST = {
1732         'url': 'https://www.bilibili.com/audio/au1003142',
1733         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1734         'info_dict': {
1735             'id': '1003142',
1736             'ext': 'm4a',
1737             'title': '【tsukimi】YELLOW / 神山羊',
1738             'artist': 'tsukimi',
1739             'comment_count': int,
1740             'description': 'YELLOW的mp3版！',
1741             'duration': 183,
1742             'subtitles': {
1743                 'origin': [{
1744                     'ext': 'lrc',
1745                 }],
1746             },
1747             'thumbnail': r're:^https?://.+\.jpg',
1748             'timestamp': 1564836614,
1749             'upload_date': '20190803',
1750             'uploader': 'tsukimi-つきみぐー',
1751             'view_count': int,
1752         },
1753     }
1754
1755     def _real_extract(self, url):
1756         au_id = self._match_id(url)
1757
1758         play_data = self._call_api('url', au_id)
1759         formats = [{
1760             'url': play_data['cdns'][0],
1761             'filesize': int_or_none(play_data.get('size')),
1762             'vcodec': 'none',
1763         }]
1764
1765         for a_format in formats:
1766             a_format.setdefault('http_headers', {}).update({
1767                 'Referer': url,
1768             })
1769
1770         song = self._call_api('song/info', au_id)
1771         title = song['title']
1772         statistic = song.get('statistic') or {}
1773
1774         subtitles = None
1775         lyric = song.get('lyric')
1776         if lyric:
1777             subtitles = {
1778                 'origin': [{
1779                     'url': lyric,
1780                 }],
1781             }
1782
1783         return {
1784             'id': au_id,
1785             'title': title,
1786             'formats': formats,
1787             'artist': song.get('author'),
1788             'comment_count': int_or_none(statistic.get('comment')),
1789             'description': song.get('intro'),
1790             'duration': int_or_none(song.get('duration')),
1791             'subtitles': subtitles,
1792             'thumbnail': song.get('cover'),
1793             'timestamp': int_or_none(song.get('passtime')),
1794             'uploader': song.get('uname'),
1795             'view_count': int_or_none(statistic.get('play')),
1796         }
1797
1798
1799 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1800     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1801     _TEST = {
1802         'url': 'https://www.bilibili.com/audio/am10624',
1803         'info_dict': {
1804             'id': '10624',
1805             'title': '每日新曲推荐（每日11:00更新）',
1806             'description': '每天11:00更新，为你推送最新音乐',
1807         },
1808         'playlist_count': 19,
1809     }
1810
1811     def _real_extract(self, url):
1812         am_id = self._match_id(url)
1813
1814         songs = self._call_api(
1815             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1816
1817         entries = []
1818         for song in songs:
1819             sid = str_or_none(song.get('id'))
1820             if not sid:
1821                 continue
1822             entries.append(self.url_result(
1823                 'https://www.bilibili.com/audio/au' + sid,
1824                 BilibiliAudioIE.ie_key(), sid))
1825
1826         if entries:
1827             album_data = self._call_api('menu/info', am_id) or {}
1828             album_title = album_data.get('title')
1829             if album_title:
1830                 for entry in entries:
1831                     entry['album'] = album_title
1832                 return self.playlist_result(
1833                     entries, am_id, album_title, album_data.get('intro'))
1834
1835         return self.playlist_result(entries, am_id)
1836
1837
1838 class BiliBiliPlayerIE(InfoExtractor):
1839     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1840     _TEST = {
1841         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1842         'only_matching': True,
1843     }
1844
1845     def _real_extract(self, url):
1846         video_id = self._match_id(url)
1847         return self.url_result(
1848             f'http://www.bilibili.tv/video/av{video_id}/',
1849             ie=BiliBiliIE.ie_key(), video_id=video_id)
1850
1851
1852 class BiliIntlBaseIE(InfoExtractor):
1853     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1854     _NETRC_MACHINE = 'biliintl'
1855     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1856
1857     def _call_api(self, endpoint, *args, **kwargs):
1858         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1859         if json.get('code'):
1860             if json['code'] in (10004004, 10004005, 10023006):
1861                 self.raise_login_required()
1862             elif json['code'] == 10004001:
1863                 self.raise_geo_restricted()
1864             else:
1865                 if json.get('message') and str(json['code']) != json['message']:
1866                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1867                 else:
1868                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1869                 if kwargs.get('fatal'):
1870                     raise ExtractorError(errmsg)
1871                 else:
1872                     self.report_warning(errmsg)
1873         return json.get('data')
1874
1875     def json2srt(self, json):
1876         return '\n\n'.join(
1877             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1878             for i, line in enumerate(traverse_obj(json, (
1879                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1880
1881     def _get_subtitles(self, *, ep_id=None, aid=None):
1882         sub_json = self._call_api(
1883             '/web/v2/subtitle', ep_id or aid, fatal=False,
1884             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1885             query=filter_dict({
1886                 'platform': 'web',
1887                 's_locale': 'en_US',
1888                 'episode_id': ep_id,
1889                 'aid': aid,
1890             })) or {}
1891         subtitles = {}
1892         fetched_urls = set()
1893         for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1894             for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1895                 if url in fetched_urls:
1896                     continue
1897                 fetched_urls.add(url)
1898                 sub_ext = determine_ext(url)
1899                 sub_lang = sub.get('lang_key') or 'en'
1900
1901                 if sub_ext == 'ass':
1902                     subtitles.setdefault(sub_lang, []).append({
1903                         'ext': 'ass',
1904                         'url': url,
1905                     })
1906                 elif sub_ext == 'json':
1907                     sub_data = self._download_json(
1908                         url, ep_id or aid, fatal=False,
1909                         note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1910                         errnote='Unable to download subtitles')
1911
1912                     if sub_data:
1913                         subtitles.setdefault(sub_lang, []).append({
1914                             'ext': 'srt',
1915                             'data': self.json2srt(sub_data),
1916                         })
1917                 else:
1918                     self.report_warning('Unexpected subtitle extension', ep_id or aid)
1919
1920         return subtitles
1921
1922     def _get_formats(self, *, ep_id=None, aid=None):
1923         video_json = self._call_api(
1924             '/web/playurl', ep_id or aid, note='Downloading video formats',
1925             errnote='Unable to download video formats', query=filter_dict({
1926                 'platform': 'web',
1927                 'ep_id': ep_id,
1928                 'aid': aid,
1929             }))
1930         video_json = video_json['playurl']
1931         formats = []
1932         for vid in video_json.get('video') or []:
1933             video_res = vid.get('video_resource') or {}
1934             video_info = vid.get('stream_info') or {}
1935             if not video_res.get('url'):
1936                 continue
1937             formats.append({
1938                 'url': video_res['url'],
1939                 'ext': 'mp4',
1940                 'format_note': video_info.get('desc_words'),
1941                 'width': video_res.get('width'),
1942                 'height': video_res.get('height'),
1943                 'vbr': video_res.get('bandwidth'),
1944                 'acodec': 'none',
1945                 'vcodec': video_res.get('codecs'),
1946                 'filesize': video_res.get('size'),
1947             })
1948         for aud in video_json.get('audio_resource') or []:
1949             if not aud.get('url'):
1950                 continue
1951             formats.append({
1952                 'url': aud['url'],
1953                 'ext': 'mp4',
1954                 'abr': aud.get('bandwidth'),
1955                 'acodec': aud.get('codecs'),
1956                 'vcodec': 'none',
1957                 'filesize': aud.get('size'),
1958             })
1959
1960         return formats
1961
1962     def _parse_video_metadata(self, video_data):
1963         return {
1964             'title': video_data.get('title_display') or video_data.get('title'),
1965             'description': video_data.get('desc'),
1966             'thumbnail': video_data.get('cover'),
1967             'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1968             'episode_number': int_or_none(self._search_regex(
1969                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1970         }
1971
1972     def _perform_login(self, username, password):
1973         if not Cryptodome.RSA:
1974             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1975
1976         key_data = self._download_json(
1977             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1978             note='Downloading login key', errnote='Unable to download login key')['data']
1979
1980         public_key = Cryptodome.RSA.importKey(key_data['key'])
1981         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1982         login_post = self._download_json(
1983             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1984             data=urlencode_postdata({
1985                 'username': username,
1986                 'password': base64.b64encode(password_hash).decode('ascii'),
1987                 'keep_me': 'true',
1988                 's_locale': 'en_US',
1989                 'isTrusted': 'true',
1990             }), note='Logging in', errnote='Unable to log in')
1991         if login_post.get('code'):
1992             if login_post.get('message'):
1993                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1994             else:
1995                 raise ExtractorError('Unable to log in')
1996
1997
1998 class BiliIntlIE(BiliIntlBaseIE):
1999     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
2000     _TESTS = [{
2001         # Bstation page
2002         'url': 'https://www.bilibili.tv/en/play/34613/341736',
2003         'info_dict': {
2004             'id': '341736',
2005             'ext': 'mp4',
2006             'title': 'E2 - The First Night',
2007             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2008             'episode_number': 2,
2009             'upload_date': '20201009',
2010             'episode': 'Episode 2',
2011             'timestamp': 1602259500,
2012             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2013             'chapters': [{
2014                 'start_time': 0,
2015                 'end_time': 76.242,
2016                 'title': '<Untitled Chapter 1>',
2017             }, {
2018                 'start_time': 76.242,
2019                 'end_time': 161.161,
2020                 'title': 'Intro',
2021             }, {
2022                 'start_time': 1325.742,
2023                 'end_time': 1403.903,
2024                 'title': 'Outro',
2025             }],
2026         },
2027     }, {
2028         # Non-Bstation page
2029         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2030         'info_dict': {
2031             'id': '11005006',
2032             'ext': 'mp4',
2033             'title': 'E3 - Who?',
2034             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2035             'episode_number': 3,
2036             'description': 'md5:e1a775e71a35c43f141484715470ad09',
2037             'episode': 'Episode 3',
2038             'upload_date': '20211219',
2039             'timestamp': 1639928700,
2040             'chapters': [{
2041                 'start_time': 0,
2042                 'end_time': 88.0,
2043                 'title': '<Untitled Chapter 1>',
2044             }, {
2045                 'start_time': 88.0,
2046                 'end_time': 156.0,
2047                 'title': 'Intro',
2048             }, {
2049                 'start_time': 1173.0,
2050                 'end_time': 1259.535,
2051                 'title': 'Outro',
2052             }],
2053         },
2054     }, {
2055         # Subtitle with empty content
2056         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2057         'info_dict': {
2058             'id': '10131790',
2059             'ext': 'mp4',
2060             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2061             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2062             'episode_number': 140,
2063         },
2064         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2065     }, {
2066         # episode comment extraction
2067         'url': 'https://www.bilibili.tv/en/play/34580/340317',
2068         'info_dict': {
2069             'id': '340317',
2070             'ext': 'mp4',
2071             'timestamp': 1604057820,
2072             'upload_date': '20201030',
2073             'episode_number': 5,
2074             'title': 'E5 - My Own Steel',
2075             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2076             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2077             'episode': 'Episode 5',
2078             'comment_count': int,
2079             'chapters': [{
2080                 'start_time': 0,
2081                 'end_time': 61.0,
2082                 'title': '<Untitled Chapter 1>',
2083             }, {
2084                 'start_time': 61.0,
2085                 'end_time': 134.0,
2086                 'title': 'Intro',
2087             }, {
2088                 'start_time': 1290.0,
2089                 'end_time': 1379.0,
2090                 'title': 'Outro',
2091             }],
2092         },
2093         'params': {
2094             'getcomments': True,
2095         },
2096     }, {
2097         # user generated content comment extraction
2098         'url': 'https://www.bilibili.tv/en/video/2045730385',
2099         'info_dict': {
2100             'id': '2045730385',
2101             'ext': 'mp4',
2102             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2103             'timestamp': 1667891924,
2104             'upload_date': '20221108',
2105             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2106             'comment_count': int,
2107             'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2108         },
2109         'params': {
2110             'getcomments': True,
2111         },
2112     }, {
2113         # episode id without intro and outro
2114         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2115         'info_dict': {
2116             'id': '11246489',
2117             'ext': 'mp4',
2118             'title': 'E1 - Operation \'Strix\' <Owl>',
2119             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2120             'timestamp': 1649516400,
2121             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2122             'episode': 'Episode 1',
2123             'episode_number': 1,
2124             'upload_date': '20220409',
2125         },
2126     }, {
2127         'url': 'https://www.biliintl.com/en/play/34613/341736',
2128         'only_matching': True,
2129     }, {
2130         # User-generated content (as opposed to a series licensed from a studio)
2131         'url': 'https://bilibili.tv/en/video/2019955076',
2132         'only_matching': True,
2133     }, {
2134         # No language in URL
2135         'url': 'https://www.bilibili.tv/video/2019955076',
2136         'only_matching': True,
2137     }, {
2138         # Uppercase language in URL
2139         'url': 'https://www.bilibili.tv/EN/video/2019955076',
2140         'only_matching': True,
2141     }]
2142
2143     @staticmethod
2144     def _make_url(video_id, series_id=None):
2145         if series_id:
2146             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2147         return f'https://www.bilibili.tv/en/video/{video_id}'
2148
2149     def _extract_video_metadata(self, url, video_id, season_id):
2150         url, smuggled_data = unsmuggle_url(url, {})
2151         if smuggled_data.get('title'):
2152             return smuggled_data
2153
2154         webpage = self._download_webpage(url, video_id)
2155         # Bstation layout
2156         initial_data = (
2157             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2158             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2159         video_data = traverse_obj(
2160             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2161
2162         if season_id and not video_data:
2163             # Non-Bstation layout, read through episode list
2164             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2165             video_data = traverse_obj(season_json, (
2166                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2167             ), expected_type=dict, get_all=False)
2168
2169         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2170         return merge_dicts(
2171             self._parse_video_metadata(video_data), {
2172                 'title': get_element_by_class(
2173                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2174                 'description': get_element_by_class(
2175                     'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2176             }, self._search_json_ld(webpage, video_id, default={}))
2177
2178     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2179         comment_api_raw_data = self._download_json(
2180             'https://api.bilibili.tv/reply/web/detail', display_id,
2181             note=f'Downloading reply comment of {root_id} - {next_id}',
2182             query={
2183                 'platform': 'web',
2184                 'ps': 20,  # comment's reply per page (default: 3)
2185                 'root': root_id,
2186                 'next': next_id,
2187             })
2188
2189         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2190             yield {
2191                 'author': traverse_obj(replies, ('member', 'name')),
2192                 'author_id': traverse_obj(replies, ('member', 'mid')),
2193                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2194                 'text': traverse_obj(replies, ('content', 'message')),
2195                 'id': replies.get('rpid'),
2196                 'like_count': int_or_none(replies.get('like_count')),
2197                 'parent': replies.get('parent'),
2198                 'timestamp': unified_timestamp(replies.get('ctime_text')),
2199             }
2200
2201         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2202             yield from self._get_comments_reply(
2203                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2204
2205     def _get_comments(self, video_id, ep_id):
2206         for i in itertools.count(0):
2207             comment_api_raw_data = self._download_json(
2208                 'https://api.bilibili.tv/reply/web/root', video_id,
2209                 note=f'Downloading comment page {i + 1}',
2210                 query={
2211                     'platform': 'web',
2212                     'pn': i,  # page number
2213                     'ps': 20,  # comment per page (default: 20)
2214                     'oid': video_id,
2215                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
2216                     'sort_type': 1,  # 1: best, 2: recent
2217                 })
2218
2219             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2220                 yield {
2221                     'author': traverse_obj(replies, ('member', 'name')),
2222                     'author_id': traverse_obj(replies, ('member', 'mid')),
2223                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2224                     'text': traverse_obj(replies, ('content', 'message')),
2225                     'id': replies.get('rpid'),
2226                     'like_count': int_or_none(replies.get('like_count')),
2227                     'timestamp': unified_timestamp(replies.get('ctime_text')),
2228                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2229                 }
2230                 if replies.get('count'):
2231                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2232
2233             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2234                 break
2235
2236     def _real_extract(self, url):
2237         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2238         video_id = ep_id or aid
2239         chapters = None
2240
2241         if ep_id:
2242             intro_ending_json = self._call_api(
2243                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2244                 video_id, fatal=False) or {}
2245             if intro_ending_json.get('skip'):
2246                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2247                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2248                 chapters = [{
2249                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2250                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2251                     'title': 'Intro',
2252                 }, {
2253                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2254                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2255                     'title': 'Outro',
2256                 }]
2257
2258         return {
2259             'id': video_id,
2260             **self._extract_video_metadata(url, video_id, season_id),
2261             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2262             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2263             'chapters': chapters,
2264             '__post_extractor': self.extract_comments(video_id, ep_id),
2265             'http_headers': self._HEADERS,
2266         }
2267
2268
2269 class BiliIntlSeriesIE(BiliIntlBaseIE):
2270     IE_NAME = 'biliIntl:series'
2271     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2272     _TESTS = [{
2273         'url': 'https://www.bilibili.tv/en/play/34613',
2274         'playlist_mincount': 15,
2275         'info_dict': {
2276             'id': '34613',
2277             'title': 'TONIKAWA: Over the Moon For You',
2278             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2279             'categories': ['Slice of life', 'Comedy', 'Romance'],
2280             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2281             'view_count': int,
2282         },
2283         'params': {
2284             'skip_download': True,
2285         },
2286     }, {
2287         'url': 'https://www.bilibili.tv/en/media/1048837',
2288         'info_dict': {
2289             'id': '1048837',
2290             'title': 'SPY×FAMILY',
2291             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2292             'categories': ['Adventure', 'Action', 'Comedy'],
2293             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2294             'view_count': int,
2295         },
2296         'playlist_mincount': 25,
2297     }, {
2298         'url': 'https://www.biliintl.com/en/play/34613',
2299         'only_matching': True,
2300     }, {
2301         'url': 'https://www.biliintl.com/EN/play/34613',
2302         'only_matching': True,
2303     }]
2304
2305     def _entries(self, series_id):
2306         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2307         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2308             episode_id = str(episode['episode_id'])
2309             yield self.url_result(smuggle_url(
2310                 BiliIntlIE._make_url(episode_id, series_id),
2311                 self._parse_video_metadata(episode),
2312             ), BiliIntlIE, episode_id)
2313
2314     def _real_extract(self, url):
2315         series_id = self._match_id(url)
2316         series_info = self._call_api(
2317             f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2318         return self.playlist_result(
2319             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2320             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2321             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2322
2323
2324 class BiliLiveIE(InfoExtractor):
2325     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2326
2327     _TESTS = [{
2328         'url': 'https://live.bilibili.com/196',
2329         'info_dict': {
2330             'id': '33989',
2331             'description': '周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）',
2332             'ext': 'flv',
2333             'title': '太空狼人杀联动，不被爆杀就算赢',
2334             'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2335             'timestamp': 1650802769,
2336         },
2337         'skip': 'not live',
2338     }, {
2339         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2340         'only_matching': True,
2341     }, {
2342         'url': 'https://live.bilibili.com/blanc/196',
2343         'only_matching': True,
2344     }]
2345
2346     _FORMATS = {
2347         80: {'format_id': 'low', 'format_note': '流畅'},
2348         150: {'format_id': 'high_res', 'format_note': '高清'},
2349         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2350         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2351         10000: {'format_id': 'source', 'format_note': '原画'},
2352         20000: {'format_id': '4K', 'format_note': '4K'},
2353         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2354     }
2355
2356     _quality = staticmethod(qualities(list(_FORMATS)))
2357
2358     def _call_api(self, path, room_id, query):
2359         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2360         if api_result.get('code') != 0:
2361             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2362         return api_result.get('data') or {}
2363
2364     def _parse_formats(self, qn, fmt):
2365         for codec in fmt.get('codec') or []:
2366             if codec.get('current_qn') != qn:
2367                 continue
2368             for url_info in codec['url_info']:
2369                 yield {
2370                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2371                     'ext': fmt.get('format_name'),
2372                     'vcodec': codec.get('codec_name'),
2373                     'quality': self._quality(qn),
2374                     **self._FORMATS[qn],
2375                 }
2376
2377     def _real_extract(self, url):
2378         room_id = self._match_id(url)
2379         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2380         if room_data.get('live_status') == 0:
2381             raise ExtractorError('Streamer is not live', expected=True)
2382
2383         formats = []
2384         for qn in self._FORMATS:
2385             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2386                 'room_id': room_id,
2387                 'qn': qn,
2388                 'codec': '0,1',
2389                 'format': '0,2',
2390                 'mask': '0',
2391                 'no_playurl': '0',
2392                 'platform': 'web',
2393                 'protocol': '0,1',
2394             })
2395             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2396                 formats.extend(self._parse_formats(qn, fmt))
2397
2398         return {
2399             'id': room_id,
2400             'title': room_data.get('title'),
2401             'description': room_data.get('description'),
2402             'thumbnail': room_data.get('user_cover'),
2403             'timestamp': stream_data.get('live_time'),
2404             'formats': formats,
2405             'is_live': True,
2406             'http_headers': {
2407                 'Referer': url,
2408             },
2409         }