[ie/wistia] Support password-protected videos (#11100)
[yt-dlp3.git] / yt_dlp / extractor / bilibili.py
blob62f68fbc6d0bd15e1a170163f80ba09de0a4017f
1 import base64
2 import functools
3 import hashlib
4 import itertools
5 import json
6 import math
7 import re
8 import time
9 import urllib.parse
10 import uuid
12 from .common import InfoExtractor, SearchInfoExtractor
13 from ..dependencies import Cryptodome
14 from ..networking.exceptions import HTTPError
15 from ..utils import (
16 ExtractorError,
17 GeoRestrictedError,
18 InAdvancePagedList,
19 OnDemandPagedList,
20 bool_or_none,
21 clean_html,
22 determine_ext,
23 filter_dict,
24 float_or_none,
25 format_field,
26 get_element_by_class,
27 int_or_none,
28 join_nonempty,
29 make_archive_id,
30 merge_dicts,
31 mimetype2ext,
32 parse_count,
33 parse_qs,
34 parse_resolution,
35 qualities,
36 smuggle_url,
37 srt_subtitles_timecode,
38 str_or_none,
39 traverse_obj,
40 unified_timestamp,
41 unsmuggle_url,
42 url_or_none,
43 urlencode_postdata,
44 variadic,
48 class BilibiliBaseIE(InfoExtractor):
49 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
50 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
51 _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
52 _wbi_key_cache = {}
54 @property
55 def is_logged_in(self):
56 return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
58 def _check_missing_formats(self, play_info, formats):
59 parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
60 missing_formats = join_nonempty(*[
61 traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
62 for fmt in traverse_obj(play_info, (
63 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
64 if missing_formats:
65 self.to_screen(
66 f'Format(s) {missing_formats} are missing; you have to login or '
67 f'become a premium member to download them. {self._login_hint()}')
69 def extract_formats(self, play_info):
70 format_names = {
71 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
72 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
75 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
76 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
77 if flac_audio:
78 audios.append(flac_audio)
79 formats = [{
80 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
81 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
82 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
83 'vcodec': 'none',
84 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
85 'filesize': int_or_none(audio.get('size')),
86 'format_id': str_or_none(audio.get('id')),
87 } for audio in audios]
89 formats.extend({
90 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
91 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
92 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
93 'width': int_or_none(video.get('width')),
94 'height': int_or_none(video.get('height')),
95 'vcodec': video.get('codecs'),
96 'acodec': 'none' if audios else None,
97 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
98 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
99 'filesize': int_or_none(video.get('size')),
100 'quality': int_or_none(video.get('id')),
101 'format_id': traverse_obj(
102 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
103 ('id', {str_or_none}), get_all=False),
104 'format': format_names.get(video.get('id')),
105 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
107 if formats:
108 self._check_missing_formats(play_info, formats)
110 fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
111 'url': ('url', {url_or_none}),
112 'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
113 'filesize': ('size', {int_or_none}),
115 if fragments:
116 formats.append({
117 'url': fragments[0]['url'],
118 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
119 **({
120 'fragments': fragments,
121 'protocol': 'http_dash_segments',
122 } if len(fragments) > 1 else {}),
123 **traverse_obj(play_info, {
124 'quality': ('quality', {int_or_none}),
125 'format_id': ('quality', {str_or_none}),
126 'format_note': ('quality', {lambda x: format_names.get(x)}),
127 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
129 **parse_resolution(format_names.get(play_info.get('quality'))),
131 return formats
133 def _get_wbi_key(self, video_id):
134 if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
135 return self._wbi_key_cache['key']
137 session_data = self._download_json(
138 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
140 lookup = ''.join(traverse_obj(session_data, (
141 'data', 'wbi_img', ('img_url', 'sub_url'),
142 {lambda x: x.rpartition('/')[2].partition('.')[0]})))
144 # from getMixinKey() in the vendor js
145 mixin_key_enc_tab = [
146 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
147 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
148 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
149 36, 20, 34, 44, 52,
152 self._wbi_key_cache.update({
153 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
154 'ts': time.time(),
156 return self._wbi_key_cache['key']
158 def _sign_wbi(self, params, video_id):
159 params['wts'] = round(time.time())
160 params = {
161 k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
162 for k, v in sorted(params.items())
164 query = urllib.parse.urlencode(params)
165 params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
166 return params
168 def _download_playinfo(self, bvid, cid, headers=None, qn=None):
169 params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
170 if qn:
171 params['qn'] = qn
172 return self._download_json(
173 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
174 query=self._sign_wbi(params, bvid), headers=headers,
175 note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
177 def json2srt(self, json_data):
178 srt_data = ''
179 for idx, line in enumerate(json_data.get('body') or []):
180 srt_data += (f'{idx + 1}\n'
181 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
182 f'{line["content"]}\n\n')
183 return srt_data
185 def _get_subtitles(self, video_id, cid, aid=None):
186 subtitles = {
187 'danmaku': [{
188 'ext': 'xml',
189 'url': f'https://comment.bilibili.com/{cid}.xml',
193 video_info = self._download_json(
194 'https://api.bilibili.com/x/player/v2', video_id,
195 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
196 note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
197 if traverse_obj(video_info, ('data', 'need_login_subtitle')):
198 self.report_warning(
199 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
200 for s in traverse_obj(video_info, (
201 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
202 subtitles.setdefault(s['lan'], []).append({
203 'ext': 'srt',
204 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
206 return subtitles
208 def _get_chapters(self, aid, cid):
209 chapters = aid and cid and self._download_json(
210 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
211 note='Extracting chapters', fatal=False, headers=self._HEADERS)
212 return traverse_obj(chapters, ('data', 'view_points', ..., {
213 'title': 'content',
214 'start_time': 'from',
215 'end_time': 'to',
216 })) or None
218 def _get_comments(self, aid):
219 for idx in itertools.count(1):
220 replies = traverse_obj(
221 self._download_json(
222 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
223 aid, note=f'Extracting comments from page {idx}', fatal=False),
224 ('data', 'replies'))
225 if not replies:
226 return
227 for children in map(self._get_all_children, replies):
228 yield from children
230 def _get_all_children(self, reply):
231 yield {
232 'author': traverse_obj(reply, ('member', 'uname')),
233 'author_id': traverse_obj(reply, ('member', 'mid')),
234 'id': reply.get('rpid'),
235 'text': traverse_obj(reply, ('content', 'message')),
236 'timestamp': reply.get('ctime'),
237 'parent': reply.get('parent') or 'root',
239 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
240 yield from children
242 def _get_episodes_from_season(self, ss_id, url):
243 season_info = self._download_json(
244 'https://api.bilibili.com/pgc/web/season/section', ss_id,
245 note='Downloading season info', query={'season_id': ss_id},
246 headers={'Referer': url, **self.geo_verification_headers()})
248 for entry in traverse_obj(season_info, (
249 'result', 'main_section', 'episodes',
250 lambda _, v: url_or_none(v['share_url']) and v['id'])):
251 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
253 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
254 cid_edges = cid_edges or {}
255 division_data = self._download_json(
256 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
257 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
258 note=f'Extracting divisions from edge {edge_id}')
259 edges.setdefault(edge_id, {}).update(
260 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
261 'title': ('title', {str}),
262 'cid': ('cid', {int_or_none}),
263 }), get_all=False))
265 edges[edge_id].update(traverse_obj(division_data, ('data', {
266 'title': ('title', {str}),
267 'choices': ('edges', 'questions', ..., 'choices', ..., {
268 'edge_id': ('id', {int_or_none}),
269 'cid': ('cid', {int_or_none}),
270 'text': ('option', {str}),
272 })))
273 # use dict to combine edges that use the same video section (same cid)
274 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
275 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
276 if choice['edge_id'] not in edges:
277 edges[choice['edge_id']] = {'cid': choice['cid']}
278 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
279 return cid_edges
281 def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
282 graph_version = traverse_obj(
283 self._download_json(
284 'https://api.bilibili.com/x/player/wbi/v2', video_id,
285 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
286 ('data', 'interaction', 'graph_version', {int_or_none}))
287 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
288 for cid, edges in cid_edges.items():
289 play_info = self._download_playinfo(video_id, cid, headers=headers)
290 yield {
291 **metainfo,
292 'id': f'{video_id}_{cid}',
293 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
294 'formats': self.extract_formats(play_info),
295 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
296 'duration': float_or_none(play_info.get('timelength'), scale=1000),
297 'subtitles': self.extract_subtitles(video_id, cid),
301 class BiliBiliIE(BilibiliBaseIE):
302 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
304 _TESTS = [{
305 'url': 'https://www.bilibili.com/video/BV13x41117TL',
306 'info_dict': {
307 'id': 'BV13x41117TL',
308 'title': '阿滴英文|英文歌分享#6 "Closer',
309 'ext': 'mp4',
310 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
311 'uploader_id': '65880958',
312 'uploader': '阿滴英文',
313 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
314 'duration': 554.117,
315 'tags': list,
316 'comment_count': int,
317 'upload_date': '20170301',
318 'timestamp': 1488353834,
319 'like_count': int,
320 'view_count': int,
321 '_old_archive_ids': ['bilibili 8903802_part1'],
323 }, {
324 'note': 'old av URL version',
325 'url': 'http://www.bilibili.com/video/av1074402/',
326 'info_dict': {
327 'id': 'BV11x411K7CN',
328 'ext': 'mp4',
329 'title': '【金坷垃】金泡沫',
330 'uploader': '菊子桑',
331 'uploader_id': '156160',
332 'duration': 308.36,
333 'upload_date': '20140420',
334 'timestamp': 1397983878,
335 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
336 'like_count': int,
337 'comment_count': int,
338 'view_count': int,
339 'tags': list,
340 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
341 '_old_archive_ids': ['bilibili 1074402_part1'],
343 'params': {'skip_download': True},
344 }, {
345 'note': 'Anthology',
346 'url': 'https://www.bilibili.com/video/BV1bK411W797',
347 'info_dict': {
348 'id': 'BV1bK411W797',
349 'title': '物语中的人物是如何吐槽自己的OP的',
351 'playlist_count': 18,
352 'playlist': [{
353 'info_dict': {
354 'id': 'BV1bK411W797_p1',
355 'ext': 'mp4',
356 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
357 'tags': 'count:10',
358 'timestamp': 1589601697,
359 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
360 'uploader': '打牌还是打桩',
361 'uploader_id': '150259984',
362 'like_count': int,
363 'comment_count': int,
364 'upload_date': '20200516',
365 'view_count': int,
366 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
367 'duration': 90.314,
368 '_old_archive_ids': ['bilibili 498159642_part1'],
371 }, {
372 'note': 'Specific page of Anthology',
373 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
374 'info_dict': {
375 'id': 'BV1bK411W797_p1',
376 'ext': 'mp4',
377 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
378 'tags': 'count:10',
379 'timestamp': 1589601697,
380 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
381 'uploader': '打牌还是打桩',
382 'uploader_id': '150259984',
383 'like_count': int,
384 'comment_count': int,
385 'upload_date': '20200516',
386 'view_count': int,
387 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
388 'duration': 90.314,
389 '_old_archive_ids': ['bilibili 498159642_part1'],
391 }, {
392 'url': 'https://www.bilibili.com/video/av8903802/',
393 'info_dict': {
394 'id': 'BV13x41117TL',
395 'ext': 'mp4',
396 'title': '阿滴英文|英文歌分享#6 "Closer',
397 'upload_date': '20170301',
398 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
399 'timestamp': 1488353834,
400 'uploader_id': '65880958',
401 'uploader': '阿滴英文',
402 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
403 'duration': 554.117,
404 'tags': list,
405 'comment_count': int,
406 'view_count': int,
407 'like_count': int,
408 '_old_archive_ids': ['bilibili 8903802_part1'],
410 'params': {
411 'skip_download': True,
413 }, {
414 'note': 'video has chapter',
415 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
416 'info_dict': {
417 'id': 'BV1vL411G7N7',
418 'ext': 'mp4',
419 'title': '如何为你的B站视频添加进度条分段',
420 'timestamp': 1634554558,
421 'upload_date': '20211018',
422 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
423 'tags': list,
424 'uploader': '爱喝咖啡的当麻',
425 'duration': 669.482,
426 'uploader_id': '1680903',
427 'chapters': 'count:6',
428 'comment_count': int,
429 'view_count': int,
430 'like_count': int,
431 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
432 '_old_archive_ids': ['bilibili 463665680_part1'],
434 'params': {'skip_download': True},
435 }, {
436 'note': 'video redirects to festival page',
437 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
438 'info_dict': {
439 'id': 'BV1wP4y1P72h',
440 'ext': 'mp4',
441 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
442 'timestamp': 1643947497,
443 'upload_date': '20220204',
444 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
445 'uploader': '叨叨冯聊音乐',
446 'duration': 246.719,
447 'uploader_id': '528182630',
448 'view_count': int,
449 'like_count': int,
450 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
451 '_old_archive_ids': ['bilibili 893839363_part1'],
453 }, {
454 'note': 'newer festival video',
455 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
456 'info_dict': {
457 'id': 'BV1ay4y1d77f',
458 'ext': 'mp4',
459 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
460 'timestamp': 1674273600,
461 'upload_date': '20230121',
462 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
463 'uploader': '果蝇轰',
464 'duration': 1111.722,
465 'uploader_id': '8469526',
466 'view_count': int,
467 'like_count': int,
468 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
469 '_old_archive_ids': ['bilibili 778246196_part1'],
471 }, {
472 'note': 'legacy flv/mp4 video',
473 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
474 'info_dict': {
475 'id': 'BV1ms411Q7vw_p4',
476 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
477 'timestamp': 1458222815,
478 'upload_date': '20160317',
479 'description': '云南方言快乐生产线出品',
480 'duration': float,
481 'uploader': '一笑颠天',
482 'uploader_id': '3916081',
483 'view_count': int,
484 'comment_count': int,
485 'like_count': int,
486 'tags': list,
487 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
488 '_old_archive_ids': ['bilibili 4120229_part4'],
490 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
491 'playlist_count': 19,
492 'playlist': [{
493 'info_dict': {
494 'id': 'BV1ms411Q7vw_p4_0',
495 'ext': 'flv',
496 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
497 'duration': 399.102,
500 }, {
501 'note': 'legacy mp4-only video',
502 'url': 'https://www.bilibili.com/video/BV1nx411u79K',
503 'info_dict': {
504 'id': 'BV1nx411u79K',
505 'ext': 'mp4',
506 'title': '【练习室】201603声乐练习《No Air》with VigoVan',
507 'timestamp': 1508893551,
508 'upload_date': '20171025',
509 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
510 'duration': 80.384,
511 'uploader': '伯远',
512 'uploader_id': '10584494',
513 'comment_count': int,
514 'view_count': int,
515 'like_count': int,
516 'tags': list,
517 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
518 '_old_archive_ids': ['bilibili 15700301_part1'],
520 }, {
521 'note': 'interactive/split-path video',
522 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
523 'info_dict': {
524 'id': 'BV1af4y1H7ga',
525 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
526 'timestamp': 1630500414,
527 'upload_date': '20210901',
528 'description': 'md5:01113e39ab06e28042d74ac356a08786',
529 'tags': list,
530 'uploader': '钉宫妮妮Ninico',
531 'duration': 1503,
532 'uploader_id': '8881297',
533 'comment_count': int,
534 'view_count': int,
535 'like_count': int,
536 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
537 '_old_archive_ids': ['bilibili 292734508_part1'],
539 'playlist_count': 33,
540 'playlist': [{
541 'info_dict': {
542 'id': 'BV1af4y1H7ga_400950101',
543 'ext': 'mp4',
544 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
545 'timestamp': 1630500414,
546 'upload_date': '20210901',
547 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
548 'tags': list,
549 'uploader': '钉宫妮妮Ninico',
550 'duration': 11.605,
551 'uploader_id': '8881297',
552 'comment_count': int,
553 'view_count': int,
554 'like_count': int,
555 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
556 '_old_archive_ids': ['bilibili 292734508_part1'],
559 }, {
560 'note': '301 redirect to bangumi link',
561 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
562 'info_dict': {
563 'id': '288525',
564 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
565 'ext': 'mp4',
566 'series': '我和我的祖国',
567 'series_id': '4780',
568 'season': '幕后纪实',
569 'season_id': '28609',
570 'season_number': 1,
571 'episode': '钱学森弹道和乘波体飞行器是什么?',
572 'episode_id': '288525',
573 'episode_number': 105,
574 'duration': 1183.957,
575 'timestamp': 1571648124,
576 'upload_date': '20191021',
577 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
579 }, {
580 'note': 'video has subtitles, which requires login',
581 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
582 'info_dict': {
583 'id': 'BV12N4y1M7rh',
584 'ext': 'mp4',
585 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
586 'tags': list,
587 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
588 'duration': 313.557,
589 'upload_date': '20220709',
590 'uploader': '小夫太渴',
591 'timestamp': 1657347907,
592 'uploader_id': '1326814124',
593 'comment_count': int,
594 'view_count': int,
595 'like_count': int,
596 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
597 'subtitles': 'count:2', # login required for CC subtitle
598 '_old_archive_ids': ['bilibili 898179753_part1'],
600 'params': {'listsubtitles': True},
601 'skip': 'login required for subtitle',
602 }, {
603 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
604 'info_dict': {
605 'id': 'BV1jL41167ZG',
606 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
607 'ext': 'mp4',
609 'skip': 'supporter-only video',
610 }, {
611 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
612 'info_dict': {
613 'id': 'BV1Ks411f7aQ',
614 'title': '【BD1080P】狼与香辛料I【华盟】',
615 'ext': 'mp4',
617 'skip': 'login required',
618 }, {
619 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
620 'info_dict': {
621 'id': 'BV1GJ411x7h7',
622 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
623 'ext': 'mp4',
625 'skip': 'geo-restricted',
626 }, {
627 'note': 'has - in the last path segment of the url',
628 'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
629 'only_matching': True,
632 def _real_extract(self, url):
633 video_id = self._match_id(url)
634 headers = self.geo_verification_headers()
635 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
636 if not self._match_valid_url(urlh.url):
637 return self.url_result(urlh.url)
639 headers['Referer'] = url
641 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
642 is_festival = 'videoData' not in initial_state
643 if is_festival:
644 video_data = initial_state['videoInfo']
645 else:
646 play_info_obj = self._search_json(
647 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
648 if not play_info_obj:
649 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
650 self.raise_login_required()
651 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
652 raise ExtractorError(
653 'This video may be deleted or geo-restricted. '
654 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
655 play_info = traverse_obj(play_info_obj, ('data', {dict}))
656 if not play_info:
657 if traverse_obj(play_info_obj, 'code') == 87007:
658 toast = get_element_by_class('tips-toast', webpage) or ''
659 msg = clean_html(
660 f'{get_element_by_class("belongs-to", toast) or ""},'
661 + (get_element_by_class('level', toast) or ''))
662 raise ExtractorError(
663 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
664 raise ExtractorError('Failed to extract play info')
665 video_data = initial_state['videoData']
667 video_id, title = video_data['bvid'], video_data.get('title')
669 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
670 page_list_json = not is_festival and traverse_obj(
671 self._download_json(
672 'https://api.bilibili.com/x/player/pagelist', video_id,
673 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
674 note='Extracting videos in anthology', headers=headers),
675 'data', expected_type=list) or []
676 is_anthology = len(page_list_json) > 1
678 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
679 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
680 return self.playlist_from_matches(
681 page_list_json, video_id, title, ie=BiliBiliIE,
682 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
684 if is_anthology:
685 part_id = part_id or 1
686 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
688 aid = video_data.get('aid')
689 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
690 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
692 festival_info = {}
693 if is_festival:
694 play_info = self._download_playinfo(video_id, cid, headers=headers)
696 festival_info = traverse_obj(initial_state, {
697 'uploader': ('videoInfo', 'upName'),
698 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
699 'like_count': ('videoStatus', 'like', {int_or_none}),
700 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
701 }, get_all=False)
703 metainfo = {
704 **traverse_obj(initial_state, {
705 'uploader': ('upData', 'name'),
706 'uploader_id': ('upData', 'mid', {str_or_none}),
707 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
708 'tags': ('tags', ..., 'tag_name'),
709 'thumbnail': ('videoData', 'pic', {url_or_none}),
711 **festival_info,
712 **traverse_obj(video_data, {
713 'description': 'desc',
714 'timestamp': ('pubdate', {int_or_none}),
715 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
716 'comment_count': ('stat', 'reply', {int_or_none}),
717 }, get_all=False),
718 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
719 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
720 'title': title,
721 'http_headers': {'Referer': url},
724 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
725 if is_interactive:
726 return self.playlist_result(
727 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
728 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
729 __post_extractor=self.extract_comments(aid))
730 else:
731 formats = self.extract_formats(play_info)
733 if not traverse_obj(play_info, ('dash')):
734 # we only have legacy formats and need additional work
735 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
736 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
737 formats.extend(traverse_obj(
738 self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
739 lambda _, v: not has_qn(v['quality'])))
740 self._check_missing_formats(play_info, formats)
741 flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
742 if flv_formats and len(flv_formats) < len(formats):
743 # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
744 if not self._configuration_arg('prefer_multi_flv'):
745 dropped_fmts = ', '.join(
746 f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
747 formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
748 if dropped_fmts:
749 self.to_screen(
750 f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
751 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
752 else:
753 formats = traverse_obj(
754 # XXX: Filtering by extractor-arg is for testing purposes
755 formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
756 ) or [max(flv_formats, key=lambda x: x['quality'])]
758 if traverse_obj(formats, (0, 'fragments')):
759 # We have flv formats, which are individual short videos with their own timestamps and metainfo
760 # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
761 return {
762 **metainfo,
763 '_type': 'multi_video',
764 'entries': [{
765 'id': f'{metainfo["id"]}_{idx}',
766 'title': metainfo['title'],
767 'http_headers': metainfo['http_headers'],
768 'formats': [{
769 **fragment,
770 'format_id': formats[0].get('format_id'),
772 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
773 '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
774 } for idx, fragment in enumerate(formats[0]['fragments'])],
775 'duration': float_or_none(play_info.get('timelength'), scale=1000),
777 else:
778 return {
779 **metainfo,
780 'formats': formats,
781 'duration': float_or_none(play_info.get('timelength'), scale=1000),
782 'chapters': self._get_chapters(aid, cid),
783 'subtitles': self.extract_subtitles(video_id, cid),
784 '__post_extractor': self.extract_comments(aid),
788 class BiliBiliBangumiIE(BilibiliBaseIE):
789 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
791 _TESTS = [{
792 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
793 'info_dict': {
794 'id': '21495',
795 'ext': 'mp4',
796 'series': '悠久之翼',
797 'series_id': '774',
798 'season': '第二季',
799 'season_id': '1182',
800 'season_number': 2,
801 'episode': 'forever/ef',
802 'episode_id': '21495',
803 'episode_number': 12,
804 'title': '12 forever/ef',
805 'duration': 1420.791,
806 'timestamp': 1320412200,
807 'upload_date': '20111104',
808 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
810 }, {
811 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
812 'info_dict': {
813 'id': '267851',
814 'ext': 'mp4',
815 'series': '鬼灭之刃',
816 'series_id': '4358',
817 'season': '立志篇',
818 'season_id': '26801',
819 'season_number': 1,
820 'episode': '残酷',
821 'episode_id': '267851',
822 'episode_number': 1,
823 'title': '1 残酷',
824 'duration': 1425.256,
825 'timestamp': 1554566400,
826 'upload_date': '20190406',
827 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
829 'skip': 'Geo-restricted',
830 }, {
831 'note': 'a making-of which falls outside main section',
832 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
833 'info_dict': {
834 'id': '345120',
835 'ext': 'mp4',
836 'series': '鬼灭之刃',
837 'series_id': '4358',
838 'season': '立志篇',
839 'season_id': '26801',
840 'season_number': 1,
841 'episode': '炭治郎篇',
842 'episode_id': '345120',
843 'episode_number': 27,
844 'title': '#1 炭治郎篇',
845 'duration': 1922.129,
846 'timestamp': 1602853860,
847 'upload_date': '20201016',
848 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
852 def _real_extract(self, url):
853 episode_id = self._match_id(url)
854 headers = self.geo_verification_headers()
855 webpage = self._download_webpage(url, episode_id, headers=headers)
857 if '您所在的地区无法观看本片' in webpage:
858 raise GeoRestrictedError('This video is restricted')
859 elif '正在观看预览,大会员免费看全片' in webpage:
860 self.raise_login_required('This video is for premium members only')
862 headers['Referer'] = url
863 play_info = self._download_json(
864 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
865 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
866 headers=headers)
867 premium_only = play_info.get('code') == -10403
868 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
870 formats = self.extract_formats(play_info)
871 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
872 self.raise_login_required('This video is for premium members only')
874 bangumi_info = self._download_json(
875 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
876 query={'ep_id': episode_id}, headers=headers)['result']
878 episode_number, episode_info = next((
879 (idx, ep) for idx, ep in enumerate(traverse_obj(
880 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
881 if str_or_none(ep.get('id')) == episode_id), (1, {}))
883 season_id = bangumi_info.get('season_id')
884 season_number, season_title = season_id and next((
885 (idx + 1, e.get('season_title')) for idx, e in enumerate(
886 traverse_obj(bangumi_info, ('seasons', ...)))
887 if e.get('season_id') == season_id
888 ), (None, None))
890 aid = episode_info.get('aid')
892 return {
893 'id': episode_id,
894 'formats': formats,
895 **traverse_obj(bangumi_info, {
896 'series': ('series', 'series_title', {str}),
897 'series_id': ('series', 'series_id', {str_or_none}),
898 'thumbnail': ('square_cover', {url_or_none}),
900 **traverse_obj(episode_info, {
901 'episode': ('long_title', {str}),
902 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
903 'timestamp': ('pub_time', {int_or_none}),
904 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
906 'episode_id': episode_id,
907 'season': str_or_none(season_title),
908 'season_id': str_or_none(season_id),
909 'season_number': season_number,
910 'duration': float_or_none(play_info.get('timelength'), scale=1000),
911 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
912 '__post_extractor': self.extract_comments(aid),
913 'http_headers': {'Referer': url},
917 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
918 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
919 _TESTS = [{
920 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
921 'info_dict': {
922 'id': '24097891',
923 'title': 'CAROLE & TUESDAY',
924 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
926 'playlist_mincount': 25,
927 }, {
928 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
929 'info_dict': {
930 'id': '1565',
931 'title': '攻壳机动队 S.A.C. 2nd GIG',
932 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
934 'playlist_count': 26,
935 'playlist': [{
936 'info_dict': {
937 'id': '68540',
938 'ext': 'mp4',
939 'series': '攻壳机动队',
940 'series_id': '1077',
941 'season': '第二季',
942 'season_id': '1565',
943 'season_number': 2,
944 'episode': '再启动 REEMBODY',
945 'episode_id': '68540',
946 'episode_number': 1,
947 'title': '1 再启动 REEMBODY',
948 'duration': 1525.777,
949 'timestamp': 1425074413,
950 'upload_date': '20150227',
951 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
956 def _real_extract(self, url):
957 media_id = self._match_id(url)
958 webpage = self._download_webpage(url, media_id)
960 initial_state = self._search_json(
961 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
962 ss_id = initial_state['mediaInfo']['season_id']
964 return self.playlist_result(
965 self._get_episodes_from_season(ss_id, url), media_id,
966 **traverse_obj(initial_state, ('mediaInfo', {
967 'title': ('title', {str}),
968 'description': ('evaluate', {str}),
969 })))
972 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
973 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
974 _TESTS = [{
975 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
976 'info_dict': {
977 'id': '26801',
978 'title': '鬼灭之刃',
979 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
981 'playlist_mincount': 26,
982 }, {
983 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
984 'info_dict': {
985 'id': '2251',
986 'title': '玲音',
987 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
989 'playlist_count': 13,
990 'playlist': [{
991 'info_dict': {
992 'id': '50188',
993 'ext': 'mp4',
994 'series': '玲音',
995 'series_id': '1526',
996 'season': 'TV',
997 'season_id': '2251',
998 'season_number': 1,
999 'episode': 'WEIRD',
1000 'episode_id': '50188',
1001 'episode_number': 1,
1002 'title': '1 WEIRD',
1003 'duration': 1436.992,
1004 'timestamp': 1343185080,
1005 'upload_date': '20120725',
1006 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1011 def _real_extract(self, url):
1012 ss_id = self._match_id(url)
1013 webpage = self._download_webpage(url, ss_id)
1014 metainfo = traverse_obj(
1015 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1016 ('itemListElement', ..., {
1017 'title': ('name', {str}),
1018 'description': ('description', {str}),
1019 }), get_all=False)
1021 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1024 class BilibiliCheeseBaseIE(BilibiliBaseIE):
1025 def _extract_episode(self, season_info, ep_id):
1026 episode_info = traverse_obj(season_info, (
1027 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1028 aid, cid = episode_info['aid'], episode_info['cid']
1030 if traverse_obj(episode_info, 'ep_status') == -1:
1031 raise ExtractorError('This course episode is not yet available.', expected=True)
1032 if not traverse_obj(episode_info, 'playable'):
1033 self.raise_login_required('You need to purchase the course to download this episode')
1035 play_info = self._download_json(
1036 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1037 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1038 headers=self._HEADERS, note='Downloading playinfo')['data']
1040 return {
1041 'id': str_or_none(ep_id),
1042 'episode_id': str_or_none(ep_id),
1043 'formats': self.extract_formats(play_info),
1044 'extractor_key': BilibiliCheeseIE.ie_key(),
1045 'extractor': BilibiliCheeseIE.IE_NAME,
1046 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1047 **traverse_obj(episode_info, {
1048 'episode': ('title', {str}),
1049 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1050 'alt_title': ('subtitle', {str}),
1051 'duration': ('duration', {int_or_none}),
1052 'episode_number': ('index', {int_or_none}),
1053 'thumbnail': ('cover', {url_or_none}),
1054 'timestamp': ('release_date', {int_or_none}),
1055 'view_count': ('play', {int_or_none}),
1057 **traverse_obj(season_info, {
1058 'uploader': ('up_info', 'uname', {str}),
1059 'uploader_id': ('up_info', 'mid', {str_or_none}),
1061 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1062 '__post_extractor': self.extract_comments(aid),
1063 'http_headers': self._HEADERS,
1066 def _download_season_info(self, query_key, video_id):
1067 return self._download_json(
1068 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1069 headers=self._HEADERS, note='Downloading season info')['data']
1072 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1073 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1074 _TESTS = [{
1075 'url': 'https://www.bilibili.com/cheese/play/ep229832',
1076 'info_dict': {
1077 'id': '229832',
1078 'ext': 'mp4',
1079 'title': '1 - 课程先导片',
1080 'alt_title': '视频课 · 3分41秒',
1081 'uploader': '马督工',
1082 'uploader_id': '316568752',
1083 'episode': '课程先导片',
1084 'episode_id': '229832',
1085 'episode_number': 1,
1086 'duration': 221,
1087 'timestamp': 1695549606,
1088 'upload_date': '20230924',
1089 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1090 'view_count': int,
1094 def _real_extract(self, url):
1095 ep_id = self._match_id(url)
1096 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1099 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1100 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1101 _TESTS = [{
1102 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1103 'info_dict': {
1104 'id': '5918',
1105 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1106 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1108 'playlist': [{
1109 'info_dict': {
1110 'id': '229832',
1111 'ext': 'mp4',
1112 'title': '1 - 课程先导片',
1113 'alt_title': '视频课 · 3分41秒',
1114 'uploader': '马督工',
1115 'uploader_id': '316568752',
1116 'episode': '课程先导片',
1117 'episode_id': '229832',
1118 'episode_number': 1,
1119 'duration': 221,
1120 'timestamp': 1695549606,
1121 'upload_date': '20230924',
1122 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1123 'view_count': int,
1126 'params': {'playlist_items': '1'},
1127 }, {
1128 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1129 'info_dict': {
1130 'id': '5918',
1131 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1132 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1134 'playlist_mincount': 5,
1135 'skip': 'paid video in list',
1138 def _get_cheese_entries(self, season_info):
1139 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1140 yield self._extract_episode(season_info, ep_id)
1142 def _real_extract(self, url):
1143 season_id = self._match_id(url)
1144 season_info = self._download_season_info('season_id', season_id)
1146 return self.playlist_result(
1147 self._get_cheese_entries(season_info), season_id,
1148 **traverse_obj(season_info, {
1149 'title': ('title', {str}),
1150 'description': ('subtitle', {str}),
1154 class BilibiliSpaceBaseIE(BilibiliBaseIE):
1155 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1156 first_page = fetch_page(0)
1157 metadata = get_metadata(first_page)
1159 paged_list = InAdvancePagedList(
1160 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1161 metadata['page_count'], metadata['page_size'])
1163 return metadata, paged_list
1166 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1167 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1168 _TESTS = [{
1169 'url': 'https://space.bilibili.com/3985676/video',
1170 'info_dict': {
1171 'id': '3985676',
1173 'playlist_mincount': 178,
1174 'skip': 'login required',
1175 }, {
1176 'url': 'https://space.bilibili.com/313580179/video',
1177 'info_dict': {
1178 'id': '313580179',
1180 'playlist_mincount': 92,
1181 'skip': 'login required',
1184 def _real_extract(self, url):
1185 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1186 if not is_video_url:
1187 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1188 'To download audios, add a "/audio" to the URL')
1190 def fetch_page(page_idx):
1191 query = {
1192 'keyword': '',
1193 'mid': playlist_id,
1194 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1195 'order_avoided': 'true',
1196 'platform': 'web',
1197 'pn': page_idx + 1,
1198 'ps': 30,
1199 'tid': 0,
1200 'web_location': 1550101,
1203 try:
1204 response = self._download_json(
1205 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1206 query=self._sign_wbi(query, playlist_id),
1207 note=f'Downloading space page {page_idx}', headers={'Referer': url})
1208 except ExtractorError as e:
1209 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1210 raise ExtractorError(
1211 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1212 raise
1213 status_code = response['code']
1214 if status_code == -401:
1215 raise ExtractorError(
1216 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1217 elif status_code == -352 and not self.is_logged_in:
1218 self.raise_login_required('Request is rejected, you need to login to access playlist')
1219 elif status_code != 0:
1220 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1221 return response['data']
1223 def get_metadata(page_data):
1224 page_size = page_data['page']['ps']
1225 entry_count = page_data['page']['count']
1226 return {
1227 'page_count': math.ceil(entry_count / page_size),
1228 'page_size': page_size,
1231 def get_entries(page_data):
1232 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1233 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1235 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1236 return self.playlist_result(paged_list, playlist_id)
1239 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1240 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1241 _TESTS = [{
1242 'url': 'https://space.bilibili.com/313580179/audio',
1243 'info_dict': {
1244 'id': '313580179',
1246 'playlist_mincount': 1,
1249 def _real_extract(self, url):
1250 playlist_id = self._match_id(url)
1252 def fetch_page(page_idx):
1253 return self._download_json(
1254 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1255 note=f'Downloading page {page_idx}',
1256 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1258 def get_metadata(page_data):
1259 return {
1260 'page_count': page_data['pageCount'],
1261 'page_size': page_data['pageSize'],
1264 def get_entries(page_data):
1265 for entry in page_data.get('data', []):
1266 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1268 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1269 return self.playlist_result(paged_list, playlist_id)
1272 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1273 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1274 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1275 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1277 def _get_uploader(self, uid, playlist_id):
1278 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1279 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1281 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1282 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1283 metadata.pop('page_count', None)
1284 metadata.pop('page_size', None)
1285 return metadata, page_list
1288 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1289 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1290 _TESTS = [{
1291 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1292 'info_dict': {
1293 'id': '2142762_57445',
1294 'title': '【完结】《底特律 变人》全结局流程解说',
1295 'description': '',
1296 'uploader': '老戴在此',
1297 'uploader_id': '2142762',
1298 'timestamp': int,
1299 'upload_date': str,
1300 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1302 'playlist_mincount': 31,
1305 def _real_extract(self, url):
1306 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1307 playlist_id = f'{mid}_{sid}'
1309 def fetch_page(page_idx):
1310 return self._download_json(
1311 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1312 playlist_id, note=f'Downloading page {page_idx}',
1313 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1315 def get_metadata(page_data):
1316 page_size = page_data['page']['page_size']
1317 entry_count = page_data['page']['total']
1318 return {
1319 'page_count': math.ceil(entry_count / page_size),
1320 'page_size': page_size,
1321 'uploader': self._get_uploader(mid, playlist_id),
1322 **traverse_obj(page_data, {
1323 'title': ('meta', 'name', {str}),
1324 'description': ('meta', 'description', {str}),
1325 'uploader_id': ('meta', 'mid', {str_or_none}),
1326 'timestamp': ('meta', 'ptime', {int_or_none}),
1327 'thumbnail': ('meta', 'cover', {url_or_none}),
1331 def get_entries(page_data):
1332 return self._get_entries(page_data, 'archives')
1334 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1335 return self.playlist_result(paged_list, playlist_id, **metadata)
1338 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1339 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1340 _TESTS = [{
1341 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1342 'info_dict': {
1343 'id': '1958703906_547718',
1344 'title': '直播回放',
1345 'description': '直播回放',
1346 'uploader': '靡烟miya',
1347 'uploader_id': '1958703906',
1348 'timestamp': 1637985853,
1349 'upload_date': '20211127',
1350 'modified_timestamp': int,
1351 'modified_date': str,
1353 'playlist_mincount': 513,
1356 def _real_extract(self, url):
1357 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1358 playlist_id = f'{mid}_{sid}'
1359 playlist_meta = traverse_obj(self._download_json(
1360 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1361 ), {
1362 'title': ('data', 'meta', 'name', {str}),
1363 'description': ('data', 'meta', 'description', {str}),
1364 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1365 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1366 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1369 def fetch_page(page_idx):
1370 return self._download_json(
1371 'https://api.bilibili.com/x/series/archives',
1372 playlist_id, note=f'Downloading page {page_idx}',
1373 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1375 def get_metadata(page_data):
1376 page_size = page_data['page']['size']
1377 entry_count = page_data['page']['total']
1378 return {
1379 'page_count': math.ceil(entry_count / page_size),
1380 'page_size': page_size,
1381 'uploader': self._get_uploader(mid, playlist_id),
1382 **playlist_meta,
1385 def get_entries(page_data):
1386 return self._get_entries(page_data, 'archives')
1388 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1389 return self.playlist_result(paged_list, playlist_id, **metadata)
1392 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1393 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1394 _TESTS = [{
1395 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1396 'info_dict': {
1397 'id': '1103407912',
1398 'title': '【V2】(旧)',
1399 'description': '',
1400 'uploader': '晓月春日',
1401 'uploader_id': '84912',
1402 'timestamp': 1604905176,
1403 'upload_date': '20201109',
1404 'modified_timestamp': int,
1405 'modified_date': str,
1406 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1407 'view_count': int,
1408 'like_count': int,
1410 'playlist_mincount': 22,
1411 }, {
1412 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1413 'only_matching': True,
1416 def _real_extract(self, url):
1417 fid = self._match_id(url)
1419 list_info = self._download_json(
1420 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1421 fid, note='Downloading favlist metadata')
1422 if list_info['code'] == -403:
1423 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1425 entries = self._get_entries(self._download_json(
1426 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1427 fid, note='Download favlist entries'), 'data')
1429 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1430 'title': ('title', {str}),
1431 'description': ('intro', {str}),
1432 'uploader': ('upper', 'name', {str}),
1433 'uploader_id': ('upper', 'mid', {str_or_none}),
1434 'timestamp': ('ctime', {int_or_none}),
1435 'modified_timestamp': ('mtime', {int_or_none}),
1436 'thumbnail': ('cover', {url_or_none}),
1437 'view_count': ('cnt_info', 'play', {int_or_none}),
1438 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1439 })))
1442 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1443 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1444 _TESTS = [{
1445 'url': 'https://www.bilibili.com/watchlater/#/list',
1446 'info_dict': {
1447 'id': r're:\d+',
1448 'title': '稍后再看',
1450 'playlist_mincount': 0,
1451 'skip': 'login required',
1454 def _real_extract(self, url):
1455 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1456 watchlater_info = self._download_json(
1457 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1458 if watchlater_info['code'] == -101:
1459 self.raise_login_required(msg='You need to login to access your watchlater list')
1460 entries = self._get_entries(watchlater_info, ('data', 'list'))
1461 return self.playlist_result(entries, id=list_id, title='稍后再看')
1464 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1465 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1466 _TESTS = [{
1467 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1468 'info_dict': {
1469 'id': '5_547718',
1470 'title': '直播回放',
1471 'uploader': '靡烟miya',
1472 'uploader_id': '1958703906',
1473 'timestamp': 1637985853,
1474 'upload_date': '20211127',
1476 'playlist_mincount': 513,
1477 }, {
1478 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1479 'info_dict': {
1480 'id': 'BV1DU4y1r7tz',
1481 'ext': 'mp4',
1482 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1483 'upload_date': '20220820',
1484 'description': '',
1485 'timestamp': 1661016330,
1486 'uploader_id': '1958703906',
1487 'uploader': '靡烟miya',
1488 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1489 'duration': 9552.903,
1490 'tags': list,
1491 'comment_count': int,
1492 'view_count': int,
1493 'like_count': int,
1494 '_old_archive_ids': ['bilibili 687146339_part1'],
1496 'params': {'noplaylist': True},
1497 }, {
1498 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1499 'info_dict': {
1500 'id': '5_547718',
1502 'playlist_mincount': 513,
1503 'skip': 'redirect url',
1504 }, {
1505 'url': 'https://www.bilibili.com/list/ml1103407912',
1506 'info_dict': {
1507 'id': '3_1103407912',
1508 'title': '【V2】(旧)',
1509 'uploader': '晓月春日',
1510 'uploader_id': '84912',
1511 'timestamp': 1604905176,
1512 'upload_date': '20201109',
1513 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1515 'playlist_mincount': 22,
1516 }, {
1517 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1518 'info_dict': {
1519 'id': '3_1103407912',
1521 'playlist_mincount': 22,
1522 'skip': 'redirect url',
1523 }, {
1524 'url': 'https://www.bilibili.com/list/watchlater',
1525 'info_dict': {
1526 'id': r're:2_\d+',
1527 'title': '稍后再看',
1528 'uploader': str,
1529 'uploader_id': str,
1531 'playlist_mincount': 0,
1532 'skip': 'login required',
1533 }, {
1534 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1535 'info_dict': {'id': 'watchlater'},
1536 'playlist_mincount': 0,
1537 'skip': 'redirect url & login required',
1540 def _extract_medialist(self, query, list_id):
1541 for page_num in itertools.count(1):
1542 page_data = self._download_json(
1543 'https://api.bilibili.com/x/v2/medialist/resource/list',
1544 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1545 )['data']
1546 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1547 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1548 if not page_data.get('has_more', False):
1549 break
1551 def _real_extract(self, url):
1552 list_id = self._match_id(url)
1554 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1555 if not self._yes_playlist(list_id, bvid):
1556 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1558 webpage = self._download_webpage(url, list_id)
1559 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1560 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1561 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1562 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1563 if error_code == -400 and list_id == 'watchlater':
1564 self.raise_login_required('You need to login to access your watchlater playlist')
1565 elif error_code == -403:
1566 self.raise_login_required('This is a private playlist. You need to login as its owner')
1567 elif error_code == 11010:
1568 raise ExtractorError('Playlist is no longer available', expected=True)
1569 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1571 query = {
1572 'ps': 20,
1573 'with_current': False,
1574 **traverse_obj(initial_state, {
1575 'type': ('playlist', 'type', {int_or_none}),
1576 'biz_id': ('playlist', 'id', {int_or_none}),
1577 'tid': ('tid', {int_or_none}),
1578 'sort_field': ('sortFiled', {int_or_none}),
1579 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1582 metadata = {
1583 'id': f'{query["type"]}_{query["biz_id"]}',
1584 **traverse_obj(initial_state, ('mediaListInfo', {
1585 'title': ('title', {str}),
1586 'uploader': ('upper', 'name', {str}),
1587 'uploader_id': ('upper', 'mid', {str_or_none}),
1588 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
1589 'thumbnail': ('cover', {url_or_none}),
1590 })),
1592 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1595 class BilibiliCategoryIE(InfoExtractor):
1596 IE_NAME = 'Bilibili category extractor'
1597 _MAX_RESULTS = 1000000
1598 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1599 _TESTS = [{
1600 'url': 'https://www.bilibili.com/v/kichiku/mad',
1601 'info_dict': {
1602 'id': 'kichiku: mad',
1603 'title': 'kichiku: mad',
1605 'playlist_mincount': 45,
1606 'params': {
1607 'playlistend': 45,
1611 def _fetch_page(self, api_url, num_pages, query, page_num):
1612 parsed_json = self._download_json(
1613 api_url, query, query={'Search_key': query, 'pn': page_num},
1614 note=f'Extracting results from page {page_num} of {num_pages}')
1616 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1617 if not video_list:
1618 raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1620 for video in video_list:
1621 yield self.url_result(
1622 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1624 def _entries(self, category, subcategory, query):
1625 # map of categories : subcategories : RIDs
1626 rid_map = {
1627 'kichiku': {
1628 'mad': 26,
1629 'manual_vocaloid': 126,
1630 'guide': 22,
1631 'theatre': 216,
1632 'course': 127,
1636 if category not in rid_map:
1637 raise ExtractorError(
1638 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1639 if subcategory not in rid_map[category]:
1640 raise ExtractorError(
1641 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1642 rid_value = rid_map[category][subcategory]
1644 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1645 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1646 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1647 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1648 if count is None or not size:
1649 raise ExtractorError('Failed to calculate either page count or size')
1651 num_pages = math.ceil(count / size)
1653 return OnDemandPagedList(functools.partial(
1654 self._fetch_page, api_url, num_pages, query), size)
1656 def _real_extract(self, url):
1657 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1658 query = f'{category}: {subcategory}'
1660 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1663 class BiliBiliSearchIE(SearchInfoExtractor):
1664 IE_DESC = 'Bilibili video search'
1665 _MAX_RESULTS = 100000
1666 _SEARCH_KEY = 'bilisearch'
1667 _TESTS = [{
1668 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1669 'playlist_count': 3,
1670 'info_dict': {
1671 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1672 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1674 'playlist': [{
1675 'info_dict': {
1676 'id': 'BV1n44y1Q7sc',
1677 'ext': 'mp4',
1678 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1679 'timestamp': 1669889987,
1680 'upload_date': '20221201',
1681 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1682 'tags': list,
1683 'uploader': '靡烟miya',
1684 'duration': 123.156,
1685 'uploader_id': '1958703906',
1686 'comment_count': int,
1687 'view_count': int,
1688 'like_count': int,
1689 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1690 '_old_archive_ids': ['bilibili 988222410_part1'],
1695 def _search_results(self, query):
1696 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1697 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1698 for page_num in itertools.count(1):
1699 videos = self._download_json(
1700 'https://api.bilibili.com/x/web-interface/search/type', query,
1701 note=f'Extracting results from page {page_num}', query={
1702 'Search_key': query,
1703 'keyword': query,
1704 'page': page_num,
1705 'context': '',
1706 'duration': 0,
1707 'tids_2': '',
1708 '__refresh__': 'true',
1709 'search_type': 'video',
1710 'tids': 0,
1711 'highlight': 1,
1712 })['data'].get('result')
1713 if not videos:
1714 break
1715 for video in videos:
1716 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1719 class BilibiliAudioBaseIE(InfoExtractor):
1720 def _call_api(self, path, sid, query=None):
1721 if not query:
1722 query = {'sid': sid}
1723 return self._download_json(
1724 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1725 sid, query=query)['data']
1728 class BilibiliAudioIE(BilibiliAudioBaseIE):
1729 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1730 _TEST = {
1731 'url': 'https://www.bilibili.com/audio/au1003142',
1732 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1733 'info_dict': {
1734 'id': '1003142',
1735 'ext': 'm4a',
1736 'title': '【tsukimi】YELLOW / 神山羊',
1737 'artist': 'tsukimi',
1738 'comment_count': int,
1739 'description': 'YELLOW的mp3版!',
1740 'duration': 183,
1741 'subtitles': {
1742 'origin': [{
1743 'ext': 'lrc',
1746 'thumbnail': r're:^https?://.+\.jpg',
1747 'timestamp': 1564836614,
1748 'upload_date': '20190803',
1749 'uploader': 'tsukimi-つきみぐー',
1750 'view_count': int,
1754 def _real_extract(self, url):
1755 au_id = self._match_id(url)
1757 play_data = self._call_api('url', au_id)
1758 formats = [{
1759 'url': play_data['cdns'][0],
1760 'filesize': int_or_none(play_data.get('size')),
1761 'vcodec': 'none',
1764 for a_format in formats:
1765 a_format.setdefault('http_headers', {}).update({
1766 'Referer': url,
1769 song = self._call_api('song/info', au_id)
1770 title = song['title']
1771 statistic = song.get('statistic') or {}
1773 subtitles = None
1774 lyric = song.get('lyric')
1775 if lyric:
1776 subtitles = {
1777 'origin': [{
1778 'url': lyric,
1782 return {
1783 'id': au_id,
1784 'title': title,
1785 'formats': formats,
1786 'artist': song.get('author'),
1787 'comment_count': int_or_none(statistic.get('comment')),
1788 'description': song.get('intro'),
1789 'duration': int_or_none(song.get('duration')),
1790 'subtitles': subtitles,
1791 'thumbnail': song.get('cover'),
1792 'timestamp': int_or_none(song.get('passtime')),
1793 'uploader': song.get('uname'),
1794 'view_count': int_or_none(statistic.get('play')),
1798 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1799 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1800 _TEST = {
1801 'url': 'https://www.bilibili.com/audio/am10624',
1802 'info_dict': {
1803 'id': '10624',
1804 'title': '每日新曲推荐(每日11:00更新)',
1805 'description': '每天11:00更新,为你推送最新音乐',
1807 'playlist_count': 19,
1810 def _real_extract(self, url):
1811 am_id = self._match_id(url)
1813 songs = self._call_api(
1814 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1816 entries = []
1817 for song in songs:
1818 sid = str_or_none(song.get('id'))
1819 if not sid:
1820 continue
1821 entries.append(self.url_result(
1822 'https://www.bilibili.com/audio/au' + sid,
1823 BilibiliAudioIE.ie_key(), sid))
1825 if entries:
1826 album_data = self._call_api('menu/info', am_id) or {}
1827 album_title = album_data.get('title')
1828 if album_title:
1829 for entry in entries:
1830 entry['album'] = album_title
1831 return self.playlist_result(
1832 entries, am_id, album_title, album_data.get('intro'))
1834 return self.playlist_result(entries, am_id)
1837 class BiliBiliPlayerIE(InfoExtractor):
1838 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1839 _TEST = {
1840 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1841 'only_matching': True,
1844 def _real_extract(self, url):
1845 video_id = self._match_id(url)
1846 return self.url_result(
1847 f'http://www.bilibili.tv/video/av{video_id}/',
1848 ie=BiliBiliIE.ie_key(), video_id=video_id)
1851 class BiliIntlBaseIE(InfoExtractor):
1852 _API_URL = 'https://api.bilibili.tv/intl/gateway'
1853 _NETRC_MACHINE = 'biliintl'
1854 _HEADERS = {'Referer': 'https://www.bilibili.tv/'}
1856 def _call_api(self, endpoint, *args, **kwargs):
1857 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1858 if json.get('code'):
1859 if json['code'] in (10004004, 10004005, 10023006):
1860 self.raise_login_required()
1861 elif json['code'] == 10004001:
1862 self.raise_geo_restricted()
1863 else:
1864 if json.get('message') and str(json['code']) != json['message']:
1865 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1866 else:
1867 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1868 if kwargs.get('fatal'):
1869 raise ExtractorError(errmsg)
1870 else:
1871 self.report_warning(errmsg)
1872 return json.get('data')
1874 def json2srt(self, json):
1875 return '\n\n'.join(
1876 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1877 for i, line in enumerate(traverse_obj(json, (
1878 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1880 def _get_subtitles(self, *, ep_id=None, aid=None):
1881 sub_json = self._call_api(
1882 '/web/v2/subtitle', ep_id or aid, fatal=False,
1883 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1884 query=filter_dict({
1885 'platform': 'web',
1886 's_locale': 'en_US',
1887 'episode_id': ep_id,
1888 'aid': aid,
1889 })) or {}
1890 subtitles = {}
1891 fetched_urls = set()
1892 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1893 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1894 if url in fetched_urls:
1895 continue
1896 fetched_urls.add(url)
1897 sub_ext = determine_ext(url)
1898 sub_lang = sub.get('lang_key') or 'en'
1900 if sub_ext == 'ass':
1901 subtitles.setdefault(sub_lang, []).append({
1902 'ext': 'ass',
1903 'url': url,
1905 elif sub_ext == 'json':
1906 sub_data = self._download_json(
1907 url, ep_id or aid, fatal=False,
1908 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1909 errnote='Unable to download subtitles')
1911 if sub_data:
1912 subtitles.setdefault(sub_lang, []).append({
1913 'ext': 'srt',
1914 'data': self.json2srt(sub_data),
1916 else:
1917 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1919 return subtitles
1921 def _get_formats(self, *, ep_id=None, aid=None):
1922 video_json = self._call_api(
1923 '/web/playurl', ep_id or aid, note='Downloading video formats',
1924 errnote='Unable to download video formats', query=filter_dict({
1925 'platform': 'web',
1926 'ep_id': ep_id,
1927 'aid': aid,
1929 video_json = video_json['playurl']
1930 formats = []
1931 for vid in video_json.get('video') or []:
1932 video_res = vid.get('video_resource') or {}
1933 video_info = vid.get('stream_info') or {}
1934 if not video_res.get('url'):
1935 continue
1936 formats.append({
1937 'url': video_res['url'],
1938 'ext': 'mp4',
1939 'format_note': video_info.get('desc_words'),
1940 'width': video_res.get('width'),
1941 'height': video_res.get('height'),
1942 'vbr': video_res.get('bandwidth'),
1943 'acodec': 'none',
1944 'vcodec': video_res.get('codecs'),
1945 'filesize': video_res.get('size'),
1947 for aud in video_json.get('audio_resource') or []:
1948 if not aud.get('url'):
1949 continue
1950 formats.append({
1951 'url': aud['url'],
1952 'ext': 'mp4',
1953 'abr': aud.get('bandwidth'),
1954 'acodec': aud.get('codecs'),
1955 'vcodec': 'none',
1956 'filesize': aud.get('size'),
1959 return formats
1961 def _parse_video_metadata(self, video_data):
1962 return {
1963 'title': video_data.get('title_display') or video_data.get('title'),
1964 'description': video_data.get('desc'),
1965 'thumbnail': video_data.get('cover'),
1966 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1967 'episode_number': int_or_none(self._search_regex(
1968 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1971 def _perform_login(self, username, password):
1972 if not Cryptodome.RSA:
1973 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1975 key_data = self._download_json(
1976 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1977 note='Downloading login key', errnote='Unable to download login key')['data']
1979 public_key = Cryptodome.RSA.importKey(key_data['key'])
1980 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1981 login_post = self._download_json(
1982 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1983 data=urlencode_postdata({
1984 'username': username,
1985 'password': base64.b64encode(password_hash).decode('ascii'),
1986 'keep_me': 'true',
1987 's_locale': 'en_US',
1988 'isTrusted': 'true',
1989 }), note='Logging in', errnote='Unable to log in')
1990 if login_post.get('code'):
1991 if login_post.get('message'):
1992 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1993 else:
1994 raise ExtractorError('Unable to log in')
1997 class BiliIntlIE(BiliIntlBaseIE):
1998 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1999 _TESTS = [{
2000 # Bstation page
2001 'url': 'https://www.bilibili.tv/en/play/34613/341736',
2002 'info_dict': {
2003 'id': '341736',
2004 'ext': 'mp4',
2005 'title': 'E2 - The First Night',
2006 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2007 'episode_number': 2,
2008 'upload_date': '20201009',
2009 'episode': 'Episode 2',
2010 'timestamp': 1602259500,
2011 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2012 'chapters': [{
2013 'start_time': 0,
2014 'end_time': 76.242,
2015 'title': '<Untitled Chapter 1>',
2016 }, {
2017 'start_time': 76.242,
2018 'end_time': 161.161,
2019 'title': 'Intro',
2020 }, {
2021 'start_time': 1325.742,
2022 'end_time': 1403.903,
2023 'title': 'Outro',
2026 }, {
2027 # Non-Bstation page
2028 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2029 'info_dict': {
2030 'id': '11005006',
2031 'ext': 'mp4',
2032 'title': 'E3 - Who?',
2033 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2034 'episode_number': 3,
2035 'description': 'md5:e1a775e71a35c43f141484715470ad09',
2036 'episode': 'Episode 3',
2037 'upload_date': '20211219',
2038 'timestamp': 1639928700,
2039 'chapters': [{
2040 'start_time': 0,
2041 'end_time': 88.0,
2042 'title': '<Untitled Chapter 1>',
2043 }, {
2044 'start_time': 88.0,
2045 'end_time': 156.0,
2046 'title': 'Intro',
2047 }, {
2048 'start_time': 1173.0,
2049 'end_time': 1259.535,
2050 'title': 'Outro',
2053 }, {
2054 # Subtitle with empty content
2055 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2056 'info_dict': {
2057 'id': '10131790',
2058 'ext': 'mp4',
2059 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2060 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2061 'episode_number': 140,
2063 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2064 }, {
2065 # episode comment extraction
2066 'url': 'https://www.bilibili.tv/en/play/34580/340317',
2067 'info_dict': {
2068 'id': '340317',
2069 'ext': 'mp4',
2070 'timestamp': 1604057820,
2071 'upload_date': '20201030',
2072 'episode_number': 5,
2073 'title': 'E5 - My Own Steel',
2074 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2075 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2076 'episode': 'Episode 5',
2077 'comment_count': int,
2078 'chapters': [{
2079 'start_time': 0,
2080 'end_time': 61.0,
2081 'title': '<Untitled Chapter 1>',
2082 }, {
2083 'start_time': 61.0,
2084 'end_time': 134.0,
2085 'title': 'Intro',
2086 }, {
2087 'start_time': 1290.0,
2088 'end_time': 1379.0,
2089 'title': 'Outro',
2092 'params': {
2093 'getcomments': True,
2095 }, {
2096 # user generated content comment extraction
2097 'url': 'https://www.bilibili.tv/en/video/2045730385',
2098 'info_dict': {
2099 'id': '2045730385',
2100 'ext': 'mp4',
2101 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2102 'timestamp': 1667891924,
2103 'upload_date': '20221108',
2104 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2105 'comment_count': int,
2106 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2108 'params': {
2109 'getcomments': True,
2111 }, {
2112 # episode id without intro and outro
2113 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2114 'info_dict': {
2115 'id': '11246489',
2116 'ext': 'mp4',
2117 'title': 'E1 - Operation \'Strix\' <Owl>',
2118 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2119 'timestamp': 1649516400,
2120 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2121 'episode': 'Episode 1',
2122 'episode_number': 1,
2123 'upload_date': '20220409',
2125 }, {
2126 'url': 'https://www.biliintl.com/en/play/34613/341736',
2127 'only_matching': True,
2128 }, {
2129 # User-generated content (as opposed to a series licensed from a studio)
2130 'url': 'https://bilibili.tv/en/video/2019955076',
2131 'only_matching': True,
2132 }, {
2133 # No language in URL
2134 'url': 'https://www.bilibili.tv/video/2019955076',
2135 'only_matching': True,
2136 }, {
2137 # Uppercase language in URL
2138 'url': 'https://www.bilibili.tv/EN/video/2019955076',
2139 'only_matching': True,
2142 @staticmethod
2143 def _make_url(video_id, series_id=None):
2144 if series_id:
2145 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2146 return f'https://www.bilibili.tv/en/video/{video_id}'
2148 def _extract_video_metadata(self, url, video_id, season_id):
2149 url, smuggled_data = unsmuggle_url(url, {})
2150 if smuggled_data.get('title'):
2151 return smuggled_data
2153 webpage = self._download_webpage(url, video_id)
2154 # Bstation layout
2155 initial_data = (
2156 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2157 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2158 video_data = traverse_obj(
2159 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2161 if season_id and not video_data:
2162 # Non-Bstation layout, read through episode list
2163 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2164 video_data = traverse_obj(season_json, (
2165 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2166 ), expected_type=dict, get_all=False)
2168 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2169 return merge_dicts(
2170 self._parse_video_metadata(video_data), {
2171 'title': get_element_by_class(
2172 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2173 'description': get_element_by_class(
2174 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2175 }, self._search_json_ld(webpage, video_id, default={}))
2177 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2178 comment_api_raw_data = self._download_json(
2179 'https://api.bilibili.tv/reply/web/detail', display_id,
2180 note=f'Downloading reply comment of {root_id} - {next_id}',
2181 query={
2182 'platform': 'web',
2183 'ps': 20, # comment's reply per page (default: 3)
2184 'root': root_id,
2185 'next': next_id,
2188 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2189 yield {
2190 'author': traverse_obj(replies, ('member', 'name')),
2191 'author_id': traverse_obj(replies, ('member', 'mid')),
2192 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2193 'text': traverse_obj(replies, ('content', 'message')),
2194 'id': replies.get('rpid'),
2195 'like_count': int_or_none(replies.get('like_count')),
2196 'parent': replies.get('parent'),
2197 'timestamp': unified_timestamp(replies.get('ctime_text')),
2200 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2201 yield from self._get_comments_reply(
2202 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2204 def _get_comments(self, video_id, ep_id):
2205 for i in itertools.count(0):
2206 comment_api_raw_data = self._download_json(
2207 'https://api.bilibili.tv/reply/web/root', video_id,
2208 note=f'Downloading comment page {i + 1}',
2209 query={
2210 'platform': 'web',
2211 'pn': i, # page number
2212 'ps': 20, # comment per page (default: 20)
2213 'oid': video_id,
2214 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2215 'sort_type': 1, # 1: best, 2: recent
2218 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2219 yield {
2220 'author': traverse_obj(replies, ('member', 'name')),
2221 'author_id': traverse_obj(replies, ('member', 'mid')),
2222 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2223 'text': traverse_obj(replies, ('content', 'message')),
2224 'id': replies.get('rpid'),
2225 'like_count': int_or_none(replies.get('like_count')),
2226 'timestamp': unified_timestamp(replies.get('ctime_text')),
2227 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2229 if replies.get('count'):
2230 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2232 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2233 break
2235 def _real_extract(self, url):
2236 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2237 video_id = ep_id or aid
2238 chapters = None
2240 if ep_id:
2241 intro_ending_json = self._call_api(
2242 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2243 video_id, fatal=False) or {}
2244 if intro_ending_json.get('skip'):
2245 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2246 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2247 chapters = [{
2248 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2249 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2250 'title': 'Intro',
2251 }, {
2252 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2253 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2254 'title': 'Outro',
2257 return {
2258 'id': video_id,
2259 **self._extract_video_metadata(url, video_id, season_id),
2260 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2261 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2262 'chapters': chapters,
2263 '__post_extractor': self.extract_comments(video_id, ep_id),
2264 'http_headers': self._HEADERS,
2268 class BiliIntlSeriesIE(BiliIntlBaseIE):
2269 IE_NAME = 'biliIntl:series'
2270 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2271 _TESTS = [{
2272 'url': 'https://www.bilibili.tv/en/play/34613',
2273 'playlist_mincount': 15,
2274 'info_dict': {
2275 'id': '34613',
2276 'title': 'TONIKAWA: Over the Moon For You',
2277 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2278 'categories': ['Slice of life', 'Comedy', 'Romance'],
2279 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2280 'view_count': int,
2282 'params': {
2283 'skip_download': True,
2285 }, {
2286 'url': 'https://www.bilibili.tv/en/media/1048837',
2287 'info_dict': {
2288 'id': '1048837',
2289 'title': 'SPY×FAMILY',
2290 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2291 'categories': ['Adventure', 'Action', 'Comedy'],
2292 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2293 'view_count': int,
2295 'playlist_mincount': 25,
2296 }, {
2297 'url': 'https://www.biliintl.com/en/play/34613',
2298 'only_matching': True,
2299 }, {
2300 'url': 'https://www.biliintl.com/EN/play/34613',
2301 'only_matching': True,
2304 def _entries(self, series_id):
2305 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2306 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2307 episode_id = str(episode['episode_id'])
2308 yield self.url_result(smuggle_url(
2309 BiliIntlIE._make_url(episode_id, series_id),
2310 self._parse_video_metadata(episode),
2311 ), BiliIntlIE, episode_id)
2313 def _real_extract(self, url):
2314 series_id = self._match_id(url)
2315 series_info = self._call_api(
2316 f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2317 return self.playlist_result(
2318 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2319 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2320 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2323 class BiliLiveIE(InfoExtractor):
2324 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2326 _TESTS = [{
2327 'url': 'https://live.bilibili.com/196',
2328 'info_dict': {
2329 'id': '33989',
2330 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
2331 'ext': 'flv',
2332 'title': '太空狼人杀联动,不被爆杀就算赢',
2333 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2334 'timestamp': 1650802769,
2336 'skip': 'not live',
2337 }, {
2338 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2339 'only_matching': True,
2340 }, {
2341 'url': 'https://live.bilibili.com/blanc/196',
2342 'only_matching': True,
2345 _FORMATS = {
2346 80: {'format_id': 'low', 'format_note': '流畅'},
2347 150: {'format_id': 'high_res', 'format_note': '高清'},
2348 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2349 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2350 10000: {'format_id': 'source', 'format_note': '原画'},
2351 20000: {'format_id': '4K', 'format_note': '4K'},
2352 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2355 _quality = staticmethod(qualities(list(_FORMATS)))
2357 def _call_api(self, path, room_id, query):
2358 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2359 if api_result.get('code') != 0:
2360 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2361 return api_result.get('data') or {}
2363 def _parse_formats(self, qn, fmt):
2364 for codec in fmt.get('codec') or []:
2365 if codec.get('current_qn') != qn:
2366 continue
2367 for url_info in codec['url_info']:
2368 yield {
2369 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2370 'ext': fmt.get('format_name'),
2371 'vcodec': codec.get('codec_name'),
2372 'quality': self._quality(qn),
2373 **self._FORMATS[qn],
2376 def _real_extract(self, url):
2377 room_id = self._match_id(url)
2378 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2379 if room_data.get('live_status') == 0:
2380 raise ExtractorError('Streamer is not live', expected=True)
2382 formats = []
2383 for qn in self._FORMATS:
2384 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2385 'room_id': room_id,
2386 'qn': qn,
2387 'codec': '0,1',
2388 'format': '0,2',
2389 'mask': '0',
2390 'no_playurl': '0',
2391 'platform': 'web',
2392 'protocol': '0,1',
2394 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2395 formats.extend(self._parse_formats(qn, fmt))
2397 return {
2398 'id': room_id,
2399 'title': room_data.get('title'),
2400 'description': room_data.get('description'),
2401 'thumbnail': room_data.get('user_cover'),
2402 'timestamp': stream_data.get('live_time'),
2403 'formats': formats,
2404 'is_live': True,
2405 'http_headers': {
2406 'Referer': url,