[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
blob2db951a6084d78abcebc932177555243b0bf8ad2
1 import base64
2 import functools
3 import hashlib
4 import itertools
5 import json
6 import math
7 import re
8 import time
9 import urllib.parse
10 import uuid
12 from .common import InfoExtractor, SearchInfoExtractor
13 from ..dependencies import Cryptodome
14 from ..networking.exceptions import HTTPError
15 from ..utils import (
16 ExtractorError,
17 GeoRestrictedError,
18 InAdvancePagedList,
19 OnDemandPagedList,
20 bool_or_none,
21 determine_ext,
22 filter_dict,
23 float_or_none,
24 format_field,
25 get_element_by_class,
26 int_or_none,
27 join_nonempty,
28 make_archive_id,
29 merge_dicts,
30 mimetype2ext,
31 parse_count,
32 parse_qs,
33 parse_resolution,
34 qualities,
35 smuggle_url,
36 srt_subtitles_timecode,
37 str_or_none,
38 traverse_obj,
39 unified_timestamp,
40 unsmuggle_url,
41 url_or_none,
42 urlencode_postdata,
43 variadic,
47 class BilibiliBaseIE(InfoExtractor):
48 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50 _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
51 _wbi_key_cache = {}
53 @property
54 def is_logged_in(self):
55 return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
57 def _check_missing_formats(self, play_info, formats):
58 parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
59 missing_formats = join_nonempty(*[
60 traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
61 for fmt in traverse_obj(play_info, (
62 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
63 if missing_formats:
64 self.to_screen(
65 f'Format(s) {missing_formats} are missing; you have to '
66 f'become a premium member to download them. {self._login_hint()}')
68 def extract_formats(self, play_info):
69 format_names = {
70 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
71 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
74 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
75 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
76 if flac_audio:
77 audios.append(flac_audio)
78 formats = [{
79 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
80 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
81 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
82 'vcodec': 'none',
83 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
84 'filesize': int_or_none(audio.get('size')),
85 'format_id': str_or_none(audio.get('id')),
86 } for audio in audios]
88 formats.extend({
89 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
90 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
91 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
92 'width': int_or_none(video.get('width')),
93 'height': int_or_none(video.get('height')),
94 'vcodec': video.get('codecs'),
95 'acodec': 'none' if audios else None,
96 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
97 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
98 'filesize': int_or_none(video.get('size')),
99 'quality': int_or_none(video.get('id')),
100 'format_id': traverse_obj(
101 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
102 ('id', {str_or_none}), get_all=False),
103 'format': format_names.get(video.get('id')),
104 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
106 if formats:
107 self._check_missing_formats(play_info, formats)
109 fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
110 'url': ('url', {url_or_none}),
111 'duration': ('length', {float_or_none(scale=1000)}),
112 'filesize': ('size', {int_or_none}),
114 if fragments:
115 formats.append({
116 'url': fragments[0]['url'],
117 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
118 **({
119 'fragments': fragments,
120 'protocol': 'http_dash_segments',
121 } if len(fragments) > 1 else {}),
122 **traverse_obj(play_info, {
123 'quality': ('quality', {int_or_none}),
124 'format_id': ('quality', {str_or_none}),
125 'format_note': ('quality', {lambda x: format_names.get(x)}),
126 'duration': ('timelength', {float_or_none(scale=1000)}),
128 **parse_resolution(format_names.get(play_info.get('quality'))),
130 return formats
132 def _get_wbi_key(self, video_id):
133 if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
134 return self._wbi_key_cache['key']
136 session_data = self._download_json(
137 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
139 lookup = ''.join(traverse_obj(session_data, (
140 'data', 'wbi_img', ('img_url', 'sub_url'),
141 {lambda x: x.rpartition('/')[2].partition('.')[0]})))
143 # from getMixinKey() in the vendor js
144 mixin_key_enc_tab = [
145 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
146 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
147 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
148 36, 20, 34, 44, 52,
151 self._wbi_key_cache.update({
152 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
153 'ts': time.time(),
155 return self._wbi_key_cache['key']
157 def _sign_wbi(self, params, video_id):
158 params['wts'] = round(time.time())
159 params = {
160 k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
161 for k, v in sorted(params.items())
163 query = urllib.parse.urlencode(params)
164 params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
165 return params
167 def _download_playinfo(self, bvid, cid, headers=None, query=None):
168 params = {'bvid': bvid, 'cid': cid, 'fnval': 4048, **(query or {})}
169 if self.is_logged_in:
170 params.pop('try_look', None)
171 if qn := params.get('qn'):
172 note = f'Downloading video format {qn} for cid {cid}'
173 else:
174 note = f'Downloading video formats for cid {cid}'
176 return self._download_json(
177 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
178 query=self._sign_wbi(params, bvid), headers=headers, note=note)['data']
180 def json2srt(self, json_data):
181 srt_data = ''
182 for idx, line in enumerate(json_data.get('body') or []):
183 srt_data += (f'{idx + 1}\n'
184 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
185 f'{line["content"]}\n\n')
186 return srt_data
188 def _get_subtitles(self, video_id, cid, aid=None):
189 subtitles = {
190 'danmaku': [{
191 'ext': 'xml',
192 'url': f'https://comment.bilibili.com/{cid}.xml',
196 video_info = self._download_json(
197 'https://api.bilibili.com/x/player/wbi/v2', video_id,
198 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
199 note=f'Extracting subtitle info {cid}', headers=self._HEADERS)
200 if traverse_obj(video_info, ('data', 'need_login_subtitle')):
201 self.report_warning(
202 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
203 for s in traverse_obj(video_info, (
204 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
205 subtitles.setdefault(s['lan'], []).append({
206 'ext': 'srt',
207 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
209 return subtitles
211 def _get_chapters(self, aid, cid):
212 chapters = aid and cid and self._download_json(
213 'https://api.bilibili.com/x/player/wbi/v2', aid, query={'aid': aid, 'cid': cid},
214 note='Extracting chapters', fatal=False, headers=self._HEADERS)
215 return traverse_obj(chapters, ('data', 'view_points', ..., {
216 'title': 'content',
217 'start_time': 'from',
218 'end_time': 'to',
219 })) or None
221 def _get_comments(self, aid):
222 for idx in itertools.count(1):
223 replies = traverse_obj(
224 self._download_json(
225 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
226 aid, note=f'Extracting comments from page {idx}', fatal=False),
227 ('data', 'replies'))
228 if not replies:
229 return
230 for children in map(self._get_all_children, replies):
231 yield from children
233 def _get_all_children(self, reply):
234 yield {
235 'author': traverse_obj(reply, ('member', 'uname')),
236 'author_id': traverse_obj(reply, ('member', 'mid')),
237 'id': reply.get('rpid'),
238 'text': traverse_obj(reply, ('content', 'message')),
239 'timestamp': reply.get('ctime'),
240 'parent': reply.get('parent') or 'root',
242 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
243 yield from children
245 def _get_episodes_from_season(self, ss_id, url):
246 season_info = self._download_json(
247 'https://api.bilibili.com/pgc/web/season/section', ss_id,
248 note='Downloading season info', query={'season_id': ss_id},
249 headers={'Referer': url, **self.geo_verification_headers()})
251 for entry in traverse_obj(season_info, (
252 'result', 'main_section', 'episodes',
253 lambda _, v: url_or_none(v['share_url']) and v['id'])):
254 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
256 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
257 cid_edges = cid_edges or {}
258 division_data = self._download_json(
259 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
260 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
261 note=f'Extracting divisions from edge {edge_id}')
262 edges.setdefault(edge_id, {}).update(
263 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
264 'title': ('title', {str}),
265 'cid': ('cid', {int_or_none}),
266 }), get_all=False))
268 edges[edge_id].update(traverse_obj(division_data, ('data', {
269 'title': ('title', {str}),
270 'choices': ('edges', 'questions', ..., 'choices', ..., {
271 'edge_id': ('id', {int_or_none}),
272 'cid': ('cid', {int_or_none}),
273 'text': ('option', {str}),
275 })))
276 # use dict to combine edges that use the same video section (same cid)
277 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
278 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
279 if choice['edge_id'] not in edges:
280 edges[choice['edge_id']] = {'cid': choice['cid']}
281 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
282 return cid_edges
284 def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
285 graph_version = traverse_obj(
286 self._download_json(
287 'https://api.bilibili.com/x/player/wbi/v2', video_id,
288 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
289 ('data', 'interaction', 'graph_version', {int_or_none}))
290 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
291 for cid, edges in cid_edges.items():
292 play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
293 yield {
294 **metainfo,
295 'id': f'{video_id}_{cid}',
296 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
297 'formats': self.extract_formats(play_info),
298 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
299 'duration': float_or_none(play_info.get('timelength'), scale=1000),
300 'subtitles': self.extract_subtitles(video_id, cid),
304 class BiliBiliIE(BilibiliBaseIE):
305 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
307 _TESTS = [{
308 'url': 'https://www.bilibili.com/video/BV13x41117TL',
309 'info_dict': {
310 'id': 'BV13x41117TL',
311 'title': '阿滴英文|英文歌分享#6 "Closer',
312 'ext': 'mp4',
313 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
314 'uploader_id': '65880958',
315 'uploader': '阿滴英文',
316 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
317 'duration': 554.117,
318 'tags': list,
319 'comment_count': int,
320 'upload_date': '20170301',
321 'timestamp': 1488353834,
322 'like_count': int,
323 'view_count': int,
324 '_old_archive_ids': ['bilibili 8903802_part1'],
326 }, {
327 'note': 'old av URL version',
328 'url': 'http://www.bilibili.com/video/av1074402/',
329 'info_dict': {
330 'id': 'BV11x411K7CN',
331 'ext': 'mp4',
332 'title': '【金坷垃】金泡沫',
333 'uploader': '菊子桑',
334 'uploader_id': '156160',
335 'duration': 308.36,
336 'upload_date': '20140420',
337 'timestamp': 1397983878,
338 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
339 'like_count': int,
340 'comment_count': int,
341 'view_count': int,
342 'tags': list,
343 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
344 '_old_archive_ids': ['bilibili 1074402_part1'],
346 'params': {'skip_download': True},
347 }, {
348 'note': 'Anthology',
349 'url': 'https://www.bilibili.com/video/BV1bK411W797',
350 'info_dict': {
351 'id': 'BV1bK411W797',
352 'title': '物语中的人物是如何吐槽自己的OP的',
354 'playlist_count': 18,
355 'playlist': [{
356 'info_dict': {
357 'id': 'BV1bK411W797_p1',
358 'ext': 'mp4',
359 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
360 'tags': 'count:10',
361 'timestamp': 1589601697,
362 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
363 'uploader': '打牌还是打桩',
364 'uploader_id': '150259984',
365 'like_count': int,
366 'comment_count': int,
367 'upload_date': '20200516',
368 'view_count': int,
369 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
370 'duration': 90.314,
371 '_old_archive_ids': ['bilibili 498159642_part1'],
374 }, {
375 'note': 'Specific page of Anthology',
376 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
377 'info_dict': {
378 'id': 'BV1bK411W797_p1',
379 'ext': 'mp4',
380 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
381 'tags': 'count:10',
382 'timestamp': 1589601697,
383 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
384 'uploader': '打牌还是打桩',
385 'uploader_id': '150259984',
386 'like_count': int,
387 'comment_count': int,
388 'upload_date': '20200516',
389 'view_count': int,
390 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
391 'duration': 90.314,
392 '_old_archive_ids': ['bilibili 498159642_part1'],
394 }, {
395 'url': 'https://www.bilibili.com/video/av8903802/',
396 'info_dict': {
397 'id': 'BV13x41117TL',
398 'ext': 'mp4',
399 'title': '阿滴英文|英文歌分享#6 "Closer',
400 'upload_date': '20170301',
401 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
402 'timestamp': 1488353834,
403 'uploader_id': '65880958',
404 'uploader': '阿滴英文',
405 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
406 'duration': 554.117,
407 'tags': list,
408 'comment_count': int,
409 'view_count': int,
410 'like_count': int,
411 '_old_archive_ids': ['bilibili 8903802_part1'],
413 'params': {
414 'skip_download': True,
416 }, {
417 'note': 'video has chapter',
418 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
419 'info_dict': {
420 'id': 'BV1vL411G7N7',
421 'ext': 'mp4',
422 'title': '如何为你的B站视频添加进度条分段',
423 'timestamp': 1634554558,
424 'upload_date': '20211018',
425 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
426 'tags': list,
427 'uploader': '爱喝咖啡的当麻',
428 'duration': 669.482,
429 'uploader_id': '1680903',
430 'chapters': 'count:6',
431 'comment_count': int,
432 'view_count': int,
433 'like_count': int,
434 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
435 '_old_archive_ids': ['bilibili 463665680_part1'],
437 'params': {'skip_download': True},
438 }, {
439 'note': 'video redirects to festival page',
440 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
441 'info_dict': {
442 'id': 'BV1wP4y1P72h',
443 'ext': 'mp4',
444 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
445 'timestamp': 1643947497,
446 'upload_date': '20220204',
447 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
448 'uploader': '叨叨冯聊音乐',
449 'duration': 246.719,
450 'uploader_id': '528182630',
451 'view_count': int,
452 'like_count': int,
453 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
454 '_old_archive_ids': ['bilibili 893839363_part1'],
456 }, {
457 'note': 'newer festival video',
458 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
459 'info_dict': {
460 'id': 'BV1ay4y1d77f',
461 'ext': 'mp4',
462 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
463 'timestamp': 1674273600,
464 'upload_date': '20230121',
465 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
466 'uploader': '果蝇轰',
467 'duration': 1111.722,
468 'uploader_id': '8469526',
469 'view_count': int,
470 'like_count': int,
471 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
472 '_old_archive_ids': ['bilibili 778246196_part1'],
474 }, {
475 'note': 'legacy flv/mp4 video',
476 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
477 'info_dict': {
478 'id': 'BV1ms411Q7vw_p4',
479 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
480 'timestamp': 1458222815,
481 'upload_date': '20160317',
482 'description': '云南方言快乐生产线出品',
483 'duration': float,
484 'uploader': '一笑颠天',
485 'uploader_id': '3916081',
486 'view_count': int,
487 'comment_count': int,
488 'like_count': int,
489 'tags': list,
490 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
491 '_old_archive_ids': ['bilibili 4120229_part4'],
493 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
494 'playlist_count': 19,
495 'playlist': [{
496 'info_dict': {
497 'id': 'BV1ms411Q7vw_p4_0',
498 'ext': 'flv',
499 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
500 'duration': 399.102,
503 }, {
504 'note': 'legacy mp4-only video',
505 'url': 'https://www.bilibili.com/video/BV1nx411u79K',
506 'info_dict': {
507 'id': 'BV1nx411u79K',
508 'ext': 'mp4',
509 'title': '【练习室】201603声乐练习《No Air》with VigoVan',
510 'timestamp': 1508893551,
511 'upload_date': '20171025',
512 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
513 'duration': 80.384,
514 'uploader': '伯远',
515 'uploader_id': '10584494',
516 'comment_count': int,
517 'view_count': int,
518 'like_count': int,
519 'tags': list,
520 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
521 '_old_archive_ids': ['bilibili 15700301_part1'],
523 }, {
524 'note': 'interactive/split-path video',
525 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
526 'info_dict': {
527 'id': 'BV1af4y1H7ga',
528 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
529 'timestamp': 1630500414,
530 'upload_date': '20210901',
531 'description': 'md5:01113e39ab06e28042d74ac356a08786',
532 'tags': list,
533 'uploader': '钉宫妮妮Ninico',
534 'duration': 1503,
535 'uploader_id': '8881297',
536 'comment_count': int,
537 'view_count': int,
538 'like_count': int,
539 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
540 '_old_archive_ids': ['bilibili 292734508_part1'],
542 'playlist_count': 33,
543 'playlist': [{
544 'info_dict': {
545 'id': 'BV1af4y1H7ga_400950101',
546 'ext': 'mp4',
547 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
548 'timestamp': 1630500414,
549 'upload_date': '20210901',
550 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
551 'tags': list,
552 'uploader': '钉宫妮妮Ninico',
553 'duration': 11.605,
554 'uploader_id': '8881297',
555 'comment_count': int,
556 'view_count': int,
557 'like_count': int,
558 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
559 '_old_archive_ids': ['bilibili 292734508_part1'],
562 }, {
563 'note': '301 redirect to bangumi link',
564 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
565 'info_dict': {
566 'id': '288525',
567 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
568 'ext': 'mp4',
569 'series': '我和我的祖国',
570 'series_id': '4780',
571 'season': '幕后纪实',
572 'season_id': '28609',
573 'season_number': 1,
574 'episode': '钱学森弹道和乘波体飞行器是什么?',
575 'episode_id': '288525',
576 'episode_number': 105,
577 'duration': 1183.957,
578 'timestamp': 1571648124,
579 'upload_date': '20191021',
580 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
582 }, {
583 'note': 'video has subtitles, which requires login',
584 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
585 'info_dict': {
586 'id': 'BV12N4y1M7rh',
587 'ext': 'mp4',
588 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
589 'tags': list,
590 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
591 'duration': 313.557,
592 'upload_date': '20220709',
593 'uploader': '小夫太渴',
594 'timestamp': 1657347907,
595 'uploader_id': '1326814124',
596 'comment_count': int,
597 'view_count': int,
598 'like_count': int,
599 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
600 'subtitles': 'count:2', # login required for CC subtitle
601 '_old_archive_ids': ['bilibili 898179753_part1'],
603 'params': {'listsubtitles': True},
604 'skip': 'login required for subtitle',
605 }, {
606 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
607 'info_dict': {
608 'id': 'BV1jL41167ZG',
609 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
610 'ext': 'mp4',
612 'skip': 'supporter-only video',
613 }, {
614 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
615 'info_dict': {
616 'id': 'BV1Ks411f7aQ',
617 'title': '【BD1080P】狼与香辛料I【华盟】',
618 'ext': 'mp4',
620 'skip': 'login required',
621 }, {
622 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
623 'info_dict': {
624 'id': 'BV1GJ411x7h7',
625 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
626 'ext': 'mp4',
628 'skip': 'geo-restricted',
629 }, {
630 'note': 'has - in the last path segment of the url',
631 'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
632 'only_matching': True,
635 def _real_extract(self, url):
636 video_id = self._match_id(url)
637 headers = self.geo_verification_headers()
638 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
639 if not self._match_valid_url(urlh.url):
640 return self.url_result(urlh.url)
642 headers['Referer'] = url
644 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
646 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
647 self.raise_login_required()
648 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
649 raise ExtractorError(
650 'This video may be deleted or geo-restricted. '
651 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
653 is_festival = 'videoData' not in initial_state
654 if is_festival:
655 video_data = initial_state['videoInfo']
656 else:
657 video_data = initial_state['videoData']
659 video_id, title = video_data['bvid'], video_data.get('title')
661 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
662 page_list_json = (not is_festival and traverse_obj(
663 self._download_json(
664 'https://api.bilibili.com/x/player/pagelist', video_id,
665 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
666 note='Extracting videos in anthology', headers=headers),
667 'data', expected_type=list)) or []
668 is_anthology = len(page_list_json) > 1
670 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
671 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
672 return self.playlist_from_matches(
673 page_list_json, video_id, title, ie=BiliBiliIE,
674 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
676 if is_anthology:
677 part_id = part_id or 1
678 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
680 aid = video_data.get('aid')
681 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
682 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
684 festival_info = {}
685 if is_festival:
686 festival_info = traverse_obj(initial_state, {
687 'uploader': ('videoInfo', 'upName'),
688 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
689 'like_count': ('videoStatus', 'like', {int_or_none}),
690 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
691 }, get_all=False)
693 metainfo = {
694 **traverse_obj(initial_state, {
695 'uploader': ('upData', 'name'),
696 'uploader_id': ('upData', 'mid', {str_or_none}),
697 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
698 'tags': ('tags', ..., 'tag_name'),
699 'thumbnail': ('videoData', 'pic', {url_or_none}),
701 **festival_info,
702 **traverse_obj(video_data, {
703 'description': 'desc',
704 'timestamp': ('pubdate', {int_or_none}),
705 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
706 'comment_count': ('stat', 'reply', {int_or_none}),
707 }, get_all=False),
708 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
709 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
710 'title': title,
711 'http_headers': {'Referer': url},
714 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
715 if is_interactive:
716 return self.playlist_result(
717 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
718 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
719 __post_extractor=self.extract_comments(aid))
721 play_info = None
722 if self.is_logged_in:
723 play_info = traverse_obj(
724 self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id, default=None),
725 ('data', {dict}))
726 if not play_info:
727 play_info = self._download_playinfo(video_id, cid, headers=headers, query={'try_look': 1})
728 formats = self.extract_formats(play_info)
730 if video_data.get('is_upower_exclusive'):
731 high_level = traverse_obj(initial_state, ('elecFullInfo', 'show_info', 'high_level', {dict})) or {}
732 msg = f'{join_nonempty("title", "sub_title", from_dict=high_level, delim=",")}. {self._login_hint()}'
733 if not formats:
734 raise ExtractorError(f'This is a supporter-only video: {msg}', expected=True)
735 if '试看' in traverse_obj(play_info, ('accept_description', ..., {str})):
736 self.report_warning(
737 f'This is a supporter-only video, only the preview will be extracted: {msg}',
738 video_id=video_id)
740 if not traverse_obj(play_info, 'dash'):
741 # we only have legacy formats and need additional work
742 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
743 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
744 formats.extend(traverse_obj(
745 self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, query={'qn': qn})),
746 lambda _, v: not has_qn(v['quality'])))
747 self._check_missing_formats(play_info, formats)
748 flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
749 if flv_formats and len(flv_formats) < len(formats):
750 # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
751 if not self._configuration_arg('prefer_multi_flv'):
752 dropped_fmts = ', '.join(
753 f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
754 formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
755 if dropped_fmts:
756 self.to_screen(
757 f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
758 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
759 else:
760 formats = traverse_obj(
761 # XXX: Filtering by extractor-arg is for testing purposes
762 formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
763 ) or [max(flv_formats, key=lambda x: x['quality'])]
765 if traverse_obj(formats, (0, 'fragments')):
766 # We have flv formats, which are individual short videos with their own timestamps and metainfo
767 # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
768 return {
769 **metainfo,
770 '_type': 'multi_video',
771 'entries': [{
772 'id': f'{metainfo["id"]}_{idx}',
773 'title': metainfo['title'],
774 'http_headers': metainfo['http_headers'],
775 'formats': [{
776 **fragment,
777 'format_id': formats[0].get('format_id'),
779 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
780 '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
781 } for idx, fragment in enumerate(formats[0]['fragments'])],
782 'duration': float_or_none(play_info.get('timelength'), scale=1000),
785 return {
786 **metainfo,
787 'formats': formats,
788 'duration': float_or_none(play_info.get('timelength'), scale=1000),
789 'chapters': self._get_chapters(aid, cid),
790 'subtitles': self.extract_subtitles(video_id, cid),
791 '__post_extractor': self.extract_comments(aid),
795 class BiliBiliBangumiIE(BilibiliBaseIE):
796 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
798 _TESTS = [{
799 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
800 'info_dict': {
801 'id': '21495',
802 'ext': 'mp4',
803 'series': '悠久之翼',
804 'series_id': '774',
805 'season': '第二季',
806 'season_id': '1182',
807 'season_number': 2,
808 'episode': 'forever/ef',
809 'episode_id': '21495',
810 'episode_number': 12,
811 'title': '12 forever/ef',
812 'duration': 1420.791,
813 'timestamp': 1320412200,
814 'upload_date': '20111104',
815 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
817 }, {
818 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
819 'info_dict': {
820 'id': '267851',
821 'ext': 'mp4',
822 'series': '鬼灭之刃',
823 'series_id': '4358',
824 'season': '立志篇',
825 'season_id': '26801',
826 'season_number': 1,
827 'episode': '残酷',
828 'episode_id': '267851',
829 'episode_number': 1,
830 'title': '1 残酷',
831 'duration': 1425.256,
832 'timestamp': 1554566400,
833 'upload_date': '20190406',
834 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
836 'skip': 'Geo-restricted',
837 }, {
838 'note': 'a making-of which falls outside main section',
839 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
840 'info_dict': {
841 'id': '345120',
842 'ext': 'mp4',
843 'series': '鬼灭之刃',
844 'series_id': '4358',
845 'season': '立志篇',
846 'season_id': '26801',
847 'season_number': 1,
848 'episode': '炭治郎篇',
849 'episode_id': '345120',
850 'episode_number': 27,
851 'title': '#1 炭治郎篇',
852 'duration': 1922.129,
853 'timestamp': 1602853860,
854 'upload_date': '20201016',
855 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
859 def _real_extract(self, url):
860 episode_id = self._match_id(url)
861 headers = self.geo_verification_headers()
862 webpage = self._download_webpage(url, episode_id, headers=headers)
864 if '您所在的地区无法观看本片' in webpage:
865 raise GeoRestrictedError('This video is restricted')
866 elif '正在观看预览,大会员免费看全片' in webpage:
867 self.raise_login_required('This video is for premium members only')
869 headers['Referer'] = url
871 play_info = (
872 self._search_json(
873 r'playurlSSRData\s*=', webpage, 'embedded page info', episode_id,
874 end_pattern='\n', default=None)
875 or self._download_json(
876 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
877 'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
878 headers=headers))
880 premium_only = play_info.get('code') == -10403
881 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
883 formats = self.extract_formats(play_info)
884 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
885 self.raise_login_required('This video is for premium members only')
887 bangumi_info = self._download_json(
888 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
889 query={'ep_id': episode_id}, headers=headers)['result']
891 episode_number, episode_info = next((
892 (idx, ep) for idx, ep in enumerate(traverse_obj(
893 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
894 if str_or_none(ep.get('id')) == episode_id), (1, {}))
896 season_id = bangumi_info.get('season_id')
897 season_number, season_title = season_id and next((
898 (idx + 1, e.get('season_title')) for idx, e in enumerate(
899 traverse_obj(bangumi_info, ('seasons', ...)))
900 if e.get('season_id') == season_id
901 ), (None, None))
903 aid = episode_info.get('aid')
905 return {
906 'id': episode_id,
907 'formats': formats,
908 **traverse_obj(bangumi_info, {
909 'series': ('series', 'series_title', {str}),
910 'series_id': ('series', 'series_id', {str_or_none}),
911 'thumbnail': ('square_cover', {url_or_none}),
913 **traverse_obj(episode_info, {
914 'episode': ('long_title', {str}),
915 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
916 'timestamp': ('pub_time', {int_or_none}),
917 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
919 'episode_id': episode_id,
920 'season': str_or_none(season_title),
921 'season_id': str_or_none(season_id),
922 'season_number': season_number,
923 'duration': float_or_none(play_info.get('timelength'), scale=1000),
924 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
925 '__post_extractor': self.extract_comments(aid),
926 'http_headers': {'Referer': url},
930 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
931 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
932 _TESTS = [{
933 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
934 'info_dict': {
935 'id': '24097891',
936 'title': 'CAROLE & TUESDAY',
937 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
939 'playlist_mincount': 25,
940 }, {
941 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
942 'info_dict': {
943 'id': '1565',
944 'title': '攻壳机动队 S.A.C. 2nd GIG',
945 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
947 'playlist_count': 26,
948 'playlist': [{
949 'info_dict': {
950 'id': '68540',
951 'ext': 'mp4',
952 'series': '攻壳机动队',
953 'series_id': '1077',
954 'season': '第二季',
955 'season_id': '1565',
956 'season_number': 2,
957 'episode': '再启动 REEMBODY',
958 'episode_id': '68540',
959 'episode_number': 1,
960 'title': '1 再启动 REEMBODY',
961 'duration': 1525.777,
962 'timestamp': 1425074413,
963 'upload_date': '20150227',
964 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
969 def _real_extract(self, url):
970 media_id = self._match_id(url)
971 webpage = self._download_webpage(url, media_id)
973 initial_state = self._search_json(
974 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
975 ss_id = initial_state['mediaInfo']['season_id']
977 return self.playlist_result(
978 self._get_episodes_from_season(ss_id, url), media_id,
979 **traverse_obj(initial_state, ('mediaInfo', {
980 'title': ('title', {str}),
981 'description': ('evaluate', {str}),
982 })))
985 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
986 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
987 _TESTS = [{
988 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
989 'info_dict': {
990 'id': '26801',
991 'title': '鬼灭之刃',
992 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
994 'playlist_mincount': 26,
995 }, {
996 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
997 'info_dict': {
998 'id': '2251',
999 'title': '玲音',
1000 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
1002 'playlist_count': 13,
1003 'playlist': [{
1004 'info_dict': {
1005 'id': '50188',
1006 'ext': 'mp4',
1007 'series': '玲音',
1008 'series_id': '1526',
1009 'season': 'TV',
1010 'season_id': '2251',
1011 'season_number': 1,
1012 'episode': 'WEIRD',
1013 'episode_id': '50188',
1014 'episode_number': 1,
1015 'title': '1 WEIRD',
1016 'duration': 1436.992,
1017 'timestamp': 1343185080,
1018 'upload_date': '20120725',
1019 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1024 def _real_extract(self, url):
1025 ss_id = self._match_id(url)
1026 webpage = self._download_webpage(url, ss_id)
1027 metainfo = traverse_obj(
1028 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1029 ('itemListElement', ..., {
1030 'title': ('name', {str}),
1031 'description': ('description', {str}),
1032 }), get_all=False)
1034 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1037 class BilibiliCheeseBaseIE(BilibiliBaseIE):
1038 def _extract_episode(self, season_info, ep_id):
1039 episode_info = traverse_obj(season_info, (
1040 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1041 aid, cid = episode_info['aid'], episode_info['cid']
1043 if traverse_obj(episode_info, 'ep_status') == -1:
1044 raise ExtractorError('This course episode is not yet available.', expected=True)
1045 if not traverse_obj(episode_info, 'playable'):
1046 self.raise_login_required('You need to purchase the course to download this episode')
1048 play_info = self._download_json(
1049 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1050 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1051 headers=self._HEADERS, note='Downloading playinfo')['data']
1053 return {
1054 'id': str_or_none(ep_id),
1055 'episode_id': str_or_none(ep_id),
1056 'formats': self.extract_formats(play_info),
1057 'extractor_key': BilibiliCheeseIE.ie_key(),
1058 'extractor': BilibiliCheeseIE.IE_NAME,
1059 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1060 **traverse_obj(episode_info, {
1061 'episode': ('title', {str}),
1062 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1063 'alt_title': ('subtitle', {str}),
1064 'duration': ('duration', {int_or_none}),
1065 'episode_number': ('index', {int_or_none}),
1066 'thumbnail': ('cover', {url_or_none}),
1067 'timestamp': ('release_date', {int_or_none}),
1068 'view_count': ('play', {int_or_none}),
1070 **traverse_obj(season_info, {
1071 'uploader': ('up_info', 'uname', {str}),
1072 'uploader_id': ('up_info', 'mid', {str_or_none}),
1074 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1075 '__post_extractor': self.extract_comments(aid),
1076 'http_headers': self._HEADERS,
1079 def _download_season_info(self, query_key, video_id):
1080 return self._download_json(
1081 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1082 headers=self._HEADERS, note='Downloading season info')['data']
1085 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1086 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1087 _TESTS = [{
1088 'url': 'https://www.bilibili.com/cheese/play/ep229832',
1089 'info_dict': {
1090 'id': '229832',
1091 'ext': 'mp4',
1092 'title': '1 - 课程先导片',
1093 'alt_title': '视频课 · 3分41秒',
1094 'uploader': '马督工',
1095 'uploader_id': '316568752',
1096 'episode': '课程先导片',
1097 'episode_id': '229832',
1098 'episode_number': 1,
1099 'duration': 221,
1100 'timestamp': 1695549606,
1101 'upload_date': '20230924',
1102 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1103 'view_count': int,
1107 def _real_extract(self, url):
1108 ep_id = self._match_id(url)
1109 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1112 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1113 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1114 _TESTS = [{
1115 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1116 'info_dict': {
1117 'id': '5918',
1118 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1119 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1121 'playlist': [{
1122 'info_dict': {
1123 'id': '229832',
1124 'ext': 'mp4',
1125 'title': '1 - 课程先导片',
1126 'alt_title': '视频课 · 3分41秒',
1127 'uploader': '马督工',
1128 'uploader_id': '316568752',
1129 'episode': '课程先导片',
1130 'episode_id': '229832',
1131 'episode_number': 1,
1132 'duration': 221,
1133 'timestamp': 1695549606,
1134 'upload_date': '20230924',
1135 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1136 'view_count': int,
1139 'params': {'playlist_items': '1'},
1140 }, {
1141 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1142 'info_dict': {
1143 'id': '5918',
1144 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1145 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1147 'playlist_mincount': 5,
1148 'skip': 'paid video in list',
1151 def _get_cheese_entries(self, season_info):
1152 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1153 yield self._extract_episode(season_info, ep_id)
1155 def _real_extract(self, url):
1156 season_id = self._match_id(url)
1157 season_info = self._download_season_info('season_id', season_id)
1159 return self.playlist_result(
1160 self._get_cheese_entries(season_info), season_id,
1161 **traverse_obj(season_info, {
1162 'title': ('title', {str}),
1163 'description': ('subtitle', {str}),
1167 class BilibiliSpaceBaseIE(BilibiliBaseIE):
1168 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1169 first_page = fetch_page(0)
1170 metadata = get_metadata(first_page)
1172 paged_list = InAdvancePagedList(
1173 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1174 metadata['page_count'], metadata['page_size'])
1176 return metadata, paged_list
1179 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1180 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1181 _TESTS = [{
1182 'url': 'https://space.bilibili.com/3985676/video',
1183 'info_dict': {
1184 'id': '3985676',
1186 'playlist_mincount': 178,
1187 'skip': 'login required',
1188 }, {
1189 'url': 'https://space.bilibili.com/313580179/video',
1190 'info_dict': {
1191 'id': '313580179',
1193 'playlist_mincount': 92,
1194 'skip': 'login required',
1197 def _real_extract(self, url):
1198 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1199 if not is_video_url:
1200 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1201 'To download audios, add a "/audio" to the URL')
1203 def fetch_page(page_idx):
1204 query = {
1205 'keyword': '',
1206 'mid': playlist_id,
1207 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1208 'order_avoided': 'true',
1209 'platform': 'web',
1210 'pn': page_idx + 1,
1211 'ps': 30,
1212 'tid': 0,
1213 'web_location': 1550101,
1216 try:
1217 response = self._download_json(
1218 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1219 query=self._sign_wbi(query, playlist_id),
1220 note=f'Downloading space page {page_idx}', headers={'Referer': url})
1221 except ExtractorError as e:
1222 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1223 raise ExtractorError(
1224 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1225 raise
1226 status_code = response['code']
1227 if status_code == -401:
1228 raise ExtractorError(
1229 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1230 elif status_code == -352 and not self.is_logged_in:
1231 self.raise_login_required('Request is rejected, you need to login to access playlist')
1232 elif status_code != 0:
1233 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1234 return response['data']
1236 def get_metadata(page_data):
1237 page_size = page_data['page']['ps']
1238 entry_count = page_data['page']['count']
1239 return {
1240 'page_count': math.ceil(entry_count / page_size),
1241 'page_size': page_size,
1244 def get_entries(page_data):
1245 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1246 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1248 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1249 return self.playlist_result(paged_list, playlist_id)
1252 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1253 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1254 _TESTS = [{
1255 'url': 'https://space.bilibili.com/313580179/audio',
1256 'info_dict': {
1257 'id': '313580179',
1259 'playlist_mincount': 1,
1262 def _real_extract(self, url):
1263 playlist_id = self._match_id(url)
1265 def fetch_page(page_idx):
1266 return self._download_json(
1267 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1268 note=f'Downloading page {page_idx}',
1269 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1271 def get_metadata(page_data):
1272 return {
1273 'page_count': page_data['pageCount'],
1274 'page_size': page_data['pageSize'],
1277 def get_entries(page_data):
1278 for entry in page_data.get('data', []):
1279 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1281 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1282 return self.playlist_result(paged_list, playlist_id)
1285 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1286 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1287 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1288 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1290 def _get_uploader(self, uid, playlist_id):
1291 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1292 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1294 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1295 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1296 metadata.pop('page_count', None)
1297 metadata.pop('page_size', None)
1298 return metadata, page_list
1301 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1302 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1303 _TESTS = [{
1304 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1305 'info_dict': {
1306 'id': '2142762_57445',
1307 'title': '【完结】《底特律 变人》全结局流程解说',
1308 'description': '',
1309 'uploader': '老戴在此',
1310 'uploader_id': '2142762',
1311 'timestamp': int,
1312 'upload_date': str,
1313 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1315 'playlist_mincount': 31,
1318 def _real_extract(self, url):
1319 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1320 playlist_id = f'{mid}_{sid}'
1322 def fetch_page(page_idx):
1323 return self._download_json(
1324 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1325 playlist_id, note=f'Downloading page {page_idx}',
1326 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1328 def get_metadata(page_data):
1329 page_size = page_data['page']['page_size']
1330 entry_count = page_data['page']['total']
1331 return {
1332 'page_count': math.ceil(entry_count / page_size),
1333 'page_size': page_size,
1334 'uploader': self._get_uploader(mid, playlist_id),
1335 **traverse_obj(page_data, {
1336 'title': ('meta', 'name', {str}),
1337 'description': ('meta', 'description', {str}),
1338 'uploader_id': ('meta', 'mid', {str_or_none}),
1339 'timestamp': ('meta', 'ptime', {int_or_none}),
1340 'thumbnail': ('meta', 'cover', {url_or_none}),
1344 def get_entries(page_data):
1345 return self._get_entries(page_data, 'archives')
1347 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1348 return self.playlist_result(paged_list, playlist_id, **metadata)
1351 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1352 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1353 _TESTS = [{
1354 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1355 'info_dict': {
1356 'id': '1958703906_547718',
1357 'title': '直播回放',
1358 'description': '直播回放',
1359 'uploader': '靡烟miya',
1360 'uploader_id': '1958703906',
1361 'timestamp': 1637985853,
1362 'upload_date': '20211127',
1363 'modified_timestamp': int,
1364 'modified_date': str,
1366 'playlist_mincount': 513,
1369 def _real_extract(self, url):
1370 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1371 playlist_id = f'{mid}_{sid}'
1372 playlist_meta = traverse_obj(self._download_json(
1373 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1374 ), {
1375 'title': ('data', 'meta', 'name', {str}),
1376 'description': ('data', 'meta', 'description', {str}),
1377 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1378 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1379 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1382 def fetch_page(page_idx):
1383 return self._download_json(
1384 'https://api.bilibili.com/x/series/archives',
1385 playlist_id, note=f'Downloading page {page_idx}',
1386 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1388 def get_metadata(page_data):
1389 page_size = page_data['page']['size']
1390 entry_count = page_data['page']['total']
1391 return {
1392 'page_count': math.ceil(entry_count / page_size),
1393 'page_size': page_size,
1394 'uploader': self._get_uploader(mid, playlist_id),
1395 **playlist_meta,
1398 def get_entries(page_data):
1399 return self._get_entries(page_data, 'archives')
1401 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1402 return self.playlist_result(paged_list, playlist_id, **metadata)
1405 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1406 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1407 _TESTS = [{
1408 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1409 'info_dict': {
1410 'id': '1103407912',
1411 'title': '【V2】(旧)',
1412 'description': '',
1413 'uploader': '晓月春日',
1414 'uploader_id': '84912',
1415 'timestamp': 1604905176,
1416 'upload_date': '20201109',
1417 'modified_timestamp': int,
1418 'modified_date': str,
1419 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1420 'view_count': int,
1421 'like_count': int,
1423 'playlist_mincount': 22,
1424 }, {
1425 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1426 'only_matching': True,
1429 def _real_extract(self, url):
1430 fid = self._match_id(url)
1432 list_info = self._download_json(
1433 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1434 fid, note='Downloading favlist metadata')
1435 if list_info['code'] == -403:
1436 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1438 entries = self._get_entries(self._download_json(
1439 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1440 fid, note='Download favlist entries'), 'data')
1442 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1443 'title': ('title', {str}),
1444 'description': ('intro', {str}),
1445 'uploader': ('upper', 'name', {str}),
1446 'uploader_id': ('upper', 'mid', {str_or_none}),
1447 'timestamp': ('ctime', {int_or_none}),
1448 'modified_timestamp': ('mtime', {int_or_none}),
1449 'thumbnail': ('cover', {url_or_none}),
1450 'view_count': ('cnt_info', 'play', {int_or_none}),
1451 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1452 })))
1455 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1456 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1457 _TESTS = [{
1458 'url': 'https://www.bilibili.com/watchlater/#/list',
1459 'info_dict': {
1460 'id': r're:\d+',
1461 'title': '稍后再看',
1463 'playlist_mincount': 0,
1464 'skip': 'login required',
1467 def _real_extract(self, url):
1468 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1469 watchlater_info = self._download_json(
1470 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1471 if watchlater_info['code'] == -101:
1472 self.raise_login_required(msg='You need to login to access your watchlater list')
1473 entries = self._get_entries(watchlater_info, ('data', 'list'))
1474 return self.playlist_result(entries, id=list_id, title='稍后再看')
1477 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1478 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1479 _TESTS = [{
1480 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1481 'info_dict': {
1482 'id': '5_547718',
1483 'title': '直播回放',
1484 'uploader': '靡烟miya',
1485 'uploader_id': '1958703906',
1486 'timestamp': 1637985853,
1487 'upload_date': '20211127',
1489 'playlist_mincount': 513,
1490 }, {
1491 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1492 'info_dict': {
1493 'id': 'BV1DU4y1r7tz',
1494 'ext': 'mp4',
1495 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1496 'upload_date': '20220820',
1497 'description': '',
1498 'timestamp': 1661016330,
1499 'uploader_id': '1958703906',
1500 'uploader': '靡烟miya',
1501 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1502 'duration': 9552.903,
1503 'tags': list,
1504 'comment_count': int,
1505 'view_count': int,
1506 'like_count': int,
1507 '_old_archive_ids': ['bilibili 687146339_part1'],
1509 'params': {'noplaylist': True},
1510 }, {
1511 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1512 'info_dict': {
1513 'id': '5_547718',
1515 'playlist_mincount': 513,
1516 'skip': 'redirect url',
1517 }, {
1518 'url': 'https://www.bilibili.com/list/ml1103407912',
1519 'info_dict': {
1520 'id': '3_1103407912',
1521 'title': '【V2】(旧)',
1522 'uploader': '晓月春日',
1523 'uploader_id': '84912',
1524 'timestamp': 1604905176,
1525 'upload_date': '20201109',
1526 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1528 'playlist_mincount': 22,
1529 }, {
1530 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1531 'info_dict': {
1532 'id': '3_1103407912',
1534 'playlist_mincount': 22,
1535 'skip': 'redirect url',
1536 }, {
1537 'url': 'https://www.bilibili.com/list/watchlater',
1538 'info_dict': {
1539 'id': r're:2_\d+',
1540 'title': '稍后再看',
1541 'uploader': str,
1542 'uploader_id': str,
1544 'playlist_mincount': 0,
1545 'skip': 'login required',
1546 }, {
1547 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1548 'info_dict': {'id': 'watchlater'},
1549 'playlist_mincount': 0,
1550 'skip': 'redirect url & login required',
1553 def _extract_medialist(self, query, list_id):
1554 for page_num in itertools.count(1):
1555 page_data = self._download_json(
1556 'https://api.bilibili.com/x/v2/medialist/resource/list',
1557 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1558 )['data']
1559 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1560 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1561 if not page_data.get('has_more', False):
1562 break
1564 def _real_extract(self, url):
1565 list_id = self._match_id(url)
1567 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1568 if not self._yes_playlist(list_id, bvid):
1569 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1571 webpage = self._download_webpage(url, list_id)
1572 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1573 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1574 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1575 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1576 if error_code == -400 and list_id == 'watchlater':
1577 self.raise_login_required('You need to login to access your watchlater playlist')
1578 elif error_code == -403:
1579 self.raise_login_required('This is a private playlist. You need to login as its owner')
1580 elif error_code == 11010:
1581 raise ExtractorError('Playlist is no longer available', expected=True)
1582 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1584 query = {
1585 'ps': 20,
1586 'with_current': False,
1587 **traverse_obj(initial_state, {
1588 'type': ('playlist', 'type', {int_or_none}),
1589 'biz_id': ('playlist', 'id', {int_or_none}),
1590 'tid': ('tid', {int_or_none}),
1591 'sort_field': ('sortFiled', {int_or_none}),
1592 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1595 metadata = {
1596 'id': f'{query["type"]}_{query["biz_id"]}',
1597 **traverse_obj(initial_state, ('mediaListInfo', {
1598 'title': ('title', {str}),
1599 'uploader': ('upper', 'name', {str}),
1600 'uploader_id': ('upper', 'mid', {str_or_none}),
1601 'timestamp': ('ctime', {int_or_none}, filter),
1602 'thumbnail': ('cover', {url_or_none}),
1603 })),
1605 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1608 class BilibiliCategoryIE(InfoExtractor):
1609 IE_NAME = 'Bilibili category extractor'
1610 _MAX_RESULTS = 1000000
1611 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1612 _TESTS = [{
1613 'url': 'https://www.bilibili.com/v/kichiku/mad',
1614 'info_dict': {
1615 'id': 'kichiku: mad',
1616 'title': 'kichiku: mad',
1618 'playlist_mincount': 45,
1619 'params': {
1620 'playlistend': 45,
1624 def _fetch_page(self, api_url, num_pages, query, page_num):
1625 parsed_json = self._download_json(
1626 api_url, query, query={'Search_key': query, 'pn': page_num},
1627 note=f'Extracting results from page {page_num} of {num_pages}')
1629 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1630 if not video_list:
1631 raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1633 for video in video_list:
1634 yield self.url_result(
1635 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1637 def _entries(self, category, subcategory, query):
1638 # map of categories : subcategories : RIDs
1639 rid_map = {
1640 'kichiku': {
1641 'mad': 26,
1642 'manual_vocaloid': 126,
1643 'guide': 22,
1644 'theatre': 216,
1645 'course': 127,
1649 if category not in rid_map:
1650 raise ExtractorError(
1651 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1652 if subcategory not in rid_map[category]:
1653 raise ExtractorError(
1654 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1655 rid_value = rid_map[category][subcategory]
1657 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1658 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1659 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1660 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1661 if count is None or not size:
1662 raise ExtractorError('Failed to calculate either page count or size')
1664 num_pages = math.ceil(count / size)
1666 return OnDemandPagedList(functools.partial(
1667 self._fetch_page, api_url, num_pages, query), size)
1669 def _real_extract(self, url):
1670 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1671 query = f'{category}: {subcategory}'
1673 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1676 class BiliBiliSearchIE(SearchInfoExtractor):
1677 IE_DESC = 'Bilibili video search'
1678 _MAX_RESULTS = 100000
1679 _SEARCH_KEY = 'bilisearch'
1680 _TESTS = [{
1681 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1682 'playlist_count': 3,
1683 'info_dict': {
1684 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1685 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1687 'playlist': [{
1688 'info_dict': {
1689 'id': 'BV1n44y1Q7sc',
1690 'ext': 'mp4',
1691 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1692 'timestamp': 1669889987,
1693 'upload_date': '20221201',
1694 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1695 'tags': list,
1696 'uploader': '靡烟miya',
1697 'duration': 123.156,
1698 'uploader_id': '1958703906',
1699 'comment_count': int,
1700 'view_count': int,
1701 'like_count': int,
1702 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1703 '_old_archive_ids': ['bilibili 988222410_part1'],
1708 def _search_results(self, query):
1709 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1710 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1711 for page_num in itertools.count(1):
1712 videos = self._download_json(
1713 'https://api.bilibili.com/x/web-interface/search/type', query,
1714 note=f'Extracting results from page {page_num}', query={
1715 'Search_key': query,
1716 'keyword': query,
1717 'page': page_num,
1718 'context': '',
1719 'duration': 0,
1720 'tids_2': '',
1721 '__refresh__': 'true',
1722 'search_type': 'video',
1723 'tids': 0,
1724 'highlight': 1,
1725 })['data'].get('result')
1726 if not videos:
1727 break
1728 for video in videos:
1729 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1732 class BilibiliAudioBaseIE(InfoExtractor):
1733 def _call_api(self, path, sid, query=None):
1734 if not query:
1735 query = {'sid': sid}
1736 return self._download_json(
1737 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1738 sid, query=query)['data']
1741 class BilibiliAudioIE(BilibiliAudioBaseIE):
1742 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1743 _TEST = {
1744 'url': 'https://www.bilibili.com/audio/au1003142',
1745 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1746 'info_dict': {
1747 'id': '1003142',
1748 'ext': 'm4a',
1749 'title': '【tsukimi】YELLOW / 神山羊',
1750 'artist': 'tsukimi',
1751 'comment_count': int,
1752 'description': 'YELLOW的mp3版!',
1753 'duration': 183,
1754 'subtitles': {
1755 'origin': [{
1756 'ext': 'lrc',
1759 'thumbnail': r're:^https?://.+\.jpg',
1760 'timestamp': 1564836614,
1761 'upload_date': '20190803',
1762 'uploader': 'tsukimi-つきみぐー',
1763 'view_count': int,
1767 def _real_extract(self, url):
1768 au_id = self._match_id(url)
1770 play_data = self._call_api('url', au_id)
1771 formats = [{
1772 'url': play_data['cdns'][0],
1773 'filesize': int_or_none(play_data.get('size')),
1774 'vcodec': 'none',
1777 for a_format in formats:
1778 a_format.setdefault('http_headers', {}).update({
1779 'Referer': url,
1782 song = self._call_api('song/info', au_id)
1783 title = song['title']
1784 statistic = song.get('statistic') or {}
1786 subtitles = None
1787 lyric = song.get('lyric')
1788 if lyric:
1789 subtitles = {
1790 'origin': [{
1791 'url': lyric,
1795 return {
1796 'id': au_id,
1797 'title': title,
1798 'formats': formats,
1799 'artist': song.get('author'),
1800 'comment_count': int_or_none(statistic.get('comment')),
1801 'description': song.get('intro'),
1802 'duration': int_or_none(song.get('duration')),
1803 'subtitles': subtitles,
1804 'thumbnail': song.get('cover'),
1805 'timestamp': int_or_none(song.get('passtime')),
1806 'uploader': song.get('uname'),
1807 'view_count': int_or_none(statistic.get('play')),
1811 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1812 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1813 _TEST = {
1814 'url': 'https://www.bilibili.com/audio/am10624',
1815 'info_dict': {
1816 'id': '10624',
1817 'title': '每日新曲推荐(每日11:00更新)',
1818 'description': '每天11:00更新,为你推送最新音乐',
1820 'playlist_count': 19,
1823 def _real_extract(self, url):
1824 am_id = self._match_id(url)
1826 songs = self._call_api(
1827 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1829 entries = []
1830 for song in songs:
1831 sid = str_or_none(song.get('id'))
1832 if not sid:
1833 continue
1834 entries.append(self.url_result(
1835 'https://www.bilibili.com/audio/au' + sid,
1836 BilibiliAudioIE.ie_key(), sid))
1838 if entries:
1839 album_data = self._call_api('menu/info', am_id) or {}
1840 album_title = album_data.get('title')
1841 if album_title:
1842 for entry in entries:
1843 entry['album'] = album_title
1844 return self.playlist_result(
1845 entries, am_id, album_title, album_data.get('intro'))
1847 return self.playlist_result(entries, am_id)
1850 class BiliBiliPlayerIE(InfoExtractor):
1851 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1852 _TEST = {
1853 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1854 'only_matching': True,
1857 def _real_extract(self, url):
1858 video_id = self._match_id(url)
1859 return self.url_result(
1860 f'http://www.bilibili.tv/video/av{video_id}/',
1861 ie=BiliBiliIE.ie_key(), video_id=video_id)
1864 class BiliIntlBaseIE(InfoExtractor):
1865 _API_URL = 'https://api.bilibili.tv/intl/gateway'
1866 _NETRC_MACHINE = 'biliintl'
1867 _HEADERS = {'Referer': 'https://www.bilibili.tv/'}
1869 def _call_api(self, endpoint, *args, **kwargs):
1870 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1871 if json.get('code'):
1872 if json['code'] in (10004004, 10004005, 10023006):
1873 self.raise_login_required()
1874 elif json['code'] == 10004001:
1875 self.raise_geo_restricted()
1876 else:
1877 if json.get('message') and str(json['code']) != json['message']:
1878 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1879 else:
1880 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1881 if kwargs.get('fatal'):
1882 raise ExtractorError(errmsg)
1883 else:
1884 self.report_warning(errmsg)
1885 return json.get('data')
1887 def json2srt(self, json):
1888 return '\n\n'.join(
1889 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1890 for i, line in enumerate(traverse_obj(json, (
1891 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1893 def _get_subtitles(self, *, ep_id=None, aid=None):
1894 sub_json = self._call_api(
1895 '/web/v2/subtitle', ep_id or aid, fatal=False,
1896 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1897 query=filter_dict({
1898 'platform': 'web',
1899 's_locale': 'en_US',
1900 'episode_id': ep_id,
1901 'aid': aid,
1902 })) or {}
1903 subtitles = {}
1904 fetched_urls = set()
1905 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1906 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1907 if url in fetched_urls:
1908 continue
1909 fetched_urls.add(url)
1910 sub_ext = determine_ext(url)
1911 sub_lang = sub.get('lang_key') or 'en'
1913 if sub_ext == 'ass':
1914 subtitles.setdefault(sub_lang, []).append({
1915 'ext': 'ass',
1916 'url': url,
1918 elif sub_ext == 'json':
1919 sub_data = self._download_json(
1920 url, ep_id or aid, fatal=False,
1921 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1922 errnote='Unable to download subtitles')
1924 if sub_data:
1925 subtitles.setdefault(sub_lang, []).append({
1926 'ext': 'srt',
1927 'data': self.json2srt(sub_data),
1929 else:
1930 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1932 return subtitles
1934 def _get_formats(self, *, ep_id=None, aid=None):
1935 video_json = self._call_api(
1936 '/web/playurl', ep_id or aid, note='Downloading video formats',
1937 errnote='Unable to download video formats', query=filter_dict({
1938 'platform': 'web',
1939 'ep_id': ep_id,
1940 'aid': aid,
1942 video_json = video_json['playurl']
1943 formats = []
1944 for vid in video_json.get('video') or []:
1945 video_res = vid.get('video_resource') or {}
1946 video_info = vid.get('stream_info') or {}
1947 if not video_res.get('url'):
1948 continue
1949 formats.append({
1950 'url': video_res['url'],
1951 'ext': 'mp4',
1952 'format_note': video_info.get('desc_words'),
1953 'width': video_res.get('width'),
1954 'height': video_res.get('height'),
1955 'vbr': video_res.get('bandwidth'),
1956 'acodec': 'none',
1957 'vcodec': video_res.get('codecs'),
1958 'filesize': video_res.get('size'),
1960 for aud in video_json.get('audio_resource') or []:
1961 if not aud.get('url'):
1962 continue
1963 formats.append({
1964 'url': aud['url'],
1965 'ext': 'mp4',
1966 'abr': aud.get('bandwidth'),
1967 'acodec': aud.get('codecs'),
1968 'vcodec': 'none',
1969 'filesize': aud.get('size'),
1972 return formats
1974 def _parse_video_metadata(self, video_data):
1975 return {
1976 'title': video_data.get('title_display') or video_data.get('title'),
1977 'description': video_data.get('desc'),
1978 'thumbnail': video_data.get('cover'),
1979 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1980 'episode_number': int_or_none(self._search_regex(
1981 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1984 def _perform_login(self, username, password):
1985 if not Cryptodome.RSA:
1986 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1988 key_data = self._download_json(
1989 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1990 note='Downloading login key', errnote='Unable to download login key')['data']
1992 public_key = Cryptodome.RSA.importKey(key_data['key'])
1993 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1994 login_post = self._download_json(
1995 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1996 data=urlencode_postdata({
1997 'username': username,
1998 'password': base64.b64encode(password_hash).decode('ascii'),
1999 'keep_me': 'true',
2000 's_locale': 'en_US',
2001 'isTrusted': 'true',
2002 }), note='Logging in', errnote='Unable to log in')
2003 if login_post.get('code'):
2004 if login_post.get('message'):
2005 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
2006 else:
2007 raise ExtractorError('Unable to log in')
2010 class BiliIntlIE(BiliIntlBaseIE):
2011 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
2012 _TESTS = [{
2013 # Bstation page
2014 'url': 'https://www.bilibili.tv/en/play/34613/341736',
2015 'info_dict': {
2016 'id': '341736',
2017 'ext': 'mp4',
2018 'title': 'E2 - The First Night',
2019 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2020 'episode_number': 2,
2021 'upload_date': '20201009',
2022 'episode': 'Episode 2',
2023 'timestamp': 1602259500,
2024 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2025 'chapters': [{
2026 'start_time': 0,
2027 'end_time': 76.242,
2028 'title': '<Untitled Chapter 1>',
2029 }, {
2030 'start_time': 76.242,
2031 'end_time': 161.161,
2032 'title': 'Intro',
2033 }, {
2034 'start_time': 1325.742,
2035 'end_time': 1403.903,
2036 'title': 'Outro',
2039 }, {
2040 # Non-Bstation page
2041 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2042 'info_dict': {
2043 'id': '11005006',
2044 'ext': 'mp4',
2045 'title': 'E3 - Who?',
2046 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2047 'episode_number': 3,
2048 'description': 'md5:e1a775e71a35c43f141484715470ad09',
2049 'episode': 'Episode 3',
2050 'upload_date': '20211219',
2051 'timestamp': 1639928700,
2052 'chapters': [{
2053 'start_time': 0,
2054 'end_time': 88.0,
2055 'title': '<Untitled Chapter 1>',
2056 }, {
2057 'start_time': 88.0,
2058 'end_time': 156.0,
2059 'title': 'Intro',
2060 }, {
2061 'start_time': 1173.0,
2062 'end_time': 1259.535,
2063 'title': 'Outro',
2066 }, {
2067 # Subtitle with empty content
2068 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2069 'info_dict': {
2070 'id': '10131790',
2071 'ext': 'mp4',
2072 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2073 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2074 'episode_number': 140,
2076 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2077 }, {
2078 # episode comment extraction
2079 'url': 'https://www.bilibili.tv/en/play/34580/340317',
2080 'info_dict': {
2081 'id': '340317',
2082 'ext': 'mp4',
2083 'timestamp': 1604057820,
2084 'upload_date': '20201030',
2085 'episode_number': 5,
2086 'title': 'E5 - My Own Steel',
2087 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2088 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2089 'episode': 'Episode 5',
2090 'comment_count': int,
2091 'chapters': [{
2092 'start_time': 0,
2093 'end_time': 61.0,
2094 'title': '<Untitled Chapter 1>',
2095 }, {
2096 'start_time': 61.0,
2097 'end_time': 134.0,
2098 'title': 'Intro',
2099 }, {
2100 'start_time': 1290.0,
2101 'end_time': 1379.0,
2102 'title': 'Outro',
2105 'params': {
2106 'getcomments': True,
2108 }, {
2109 # user generated content comment extraction
2110 'url': 'https://www.bilibili.tv/en/video/2045730385',
2111 'info_dict': {
2112 'id': '2045730385',
2113 'ext': 'mp4',
2114 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2115 'timestamp': 1667891924,
2116 'upload_date': '20221108',
2117 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2118 'comment_count': int,
2119 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2121 'params': {
2122 'getcomments': True,
2124 }, {
2125 # episode id without intro and outro
2126 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2127 'info_dict': {
2128 'id': '11246489',
2129 'ext': 'mp4',
2130 'title': 'E1 - Operation \'Strix\' <Owl>',
2131 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2132 'timestamp': 1649516400,
2133 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2134 'episode': 'Episode 1',
2135 'episode_number': 1,
2136 'upload_date': '20220409',
2138 }, {
2139 'url': 'https://www.biliintl.com/en/play/34613/341736',
2140 'only_matching': True,
2141 }, {
2142 # User-generated content (as opposed to a series licensed from a studio)
2143 'url': 'https://bilibili.tv/en/video/2019955076',
2144 'only_matching': True,
2145 }, {
2146 # No language in URL
2147 'url': 'https://www.bilibili.tv/video/2019955076',
2148 'only_matching': True,
2149 }, {
2150 # Uppercase language in URL
2151 'url': 'https://www.bilibili.tv/EN/video/2019955076',
2152 'only_matching': True,
2155 @staticmethod
2156 def _make_url(video_id, series_id=None):
2157 if series_id:
2158 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2159 return f'https://www.bilibili.tv/en/video/{video_id}'
2161 def _extract_video_metadata(self, url, video_id, season_id):
2162 url, smuggled_data = unsmuggle_url(url, {})
2163 if smuggled_data.get('title'):
2164 return smuggled_data
2166 webpage = self._download_webpage(url, video_id)
2167 # Bstation layout
2168 initial_data = (
2169 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2170 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2171 video_data = traverse_obj(
2172 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2174 if season_id and not video_data:
2175 # Non-Bstation layout, read through episode list
2176 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2177 video_data = traverse_obj(season_json, (
2178 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2179 ), expected_type=dict, get_all=False)
2181 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2182 return merge_dicts(
2183 self._parse_video_metadata(video_data), {
2184 'title': get_element_by_class(
2185 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2186 'description': get_element_by_class(
2187 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2188 }, self._search_json_ld(webpage, video_id, default={}))
2190 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2191 comment_api_raw_data = self._download_json(
2192 'https://api.bilibili.tv/reply/web/detail', display_id,
2193 note=f'Downloading reply comment of {root_id} - {next_id}',
2194 query={
2195 'platform': 'web',
2196 'ps': 20, # comment's reply per page (default: 3)
2197 'root': root_id,
2198 'next': next_id,
2201 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2202 yield {
2203 'author': traverse_obj(replies, ('member', 'name')),
2204 'author_id': traverse_obj(replies, ('member', 'mid')),
2205 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2206 'text': traverse_obj(replies, ('content', 'message')),
2207 'id': replies.get('rpid'),
2208 'like_count': int_or_none(replies.get('like_count')),
2209 'parent': replies.get('parent'),
2210 'timestamp': unified_timestamp(replies.get('ctime_text')),
2213 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2214 yield from self._get_comments_reply(
2215 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2217 def _get_comments(self, video_id, ep_id):
2218 for i in itertools.count(0):
2219 comment_api_raw_data = self._download_json(
2220 'https://api.bilibili.tv/reply/web/root', video_id,
2221 note=f'Downloading comment page {i + 1}',
2222 query={
2223 'platform': 'web',
2224 'pn': i, # page number
2225 'ps': 20, # comment per page (default: 20)
2226 'oid': video_id,
2227 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2228 'sort_type': 1, # 1: best, 2: recent
2231 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2232 yield {
2233 'author': traverse_obj(replies, ('member', 'name')),
2234 'author_id': traverse_obj(replies, ('member', 'mid')),
2235 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2236 'text': traverse_obj(replies, ('content', 'message')),
2237 'id': replies.get('rpid'),
2238 'like_count': int_or_none(replies.get('like_count')),
2239 'timestamp': unified_timestamp(replies.get('ctime_text')),
2240 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2242 if replies.get('count'):
2243 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2245 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2246 break
2248 def _real_extract(self, url):
2249 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2250 video_id = ep_id or aid
2251 chapters = None
2253 if ep_id:
2254 intro_ending_json = self._call_api(
2255 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2256 video_id, fatal=False) or {}
2257 if intro_ending_json.get('skip'):
2258 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2259 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2260 chapters = [{
2261 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2262 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2263 'title': 'Intro',
2264 }, {
2265 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2266 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2267 'title': 'Outro',
2270 return {
2271 'id': video_id,
2272 **self._extract_video_metadata(url, video_id, season_id),
2273 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2274 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2275 'chapters': chapters,
2276 '__post_extractor': self.extract_comments(video_id, ep_id),
2277 'http_headers': self._HEADERS,
2281 class BiliIntlSeriesIE(BiliIntlBaseIE):
2282 IE_NAME = 'biliIntl:series'
2283 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2284 _TESTS = [{
2285 'url': 'https://www.bilibili.tv/en/play/34613',
2286 'playlist_mincount': 15,
2287 'info_dict': {
2288 'id': '34613',
2289 'title': 'TONIKAWA: Over the Moon For You',
2290 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2291 'categories': ['Slice of life', 'Comedy', 'Romance'],
2292 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2293 'view_count': int,
2295 'params': {
2296 'skip_download': True,
2298 }, {
2299 'url': 'https://www.bilibili.tv/en/media/1048837',
2300 'info_dict': {
2301 'id': '1048837',
2302 'title': 'SPY×FAMILY',
2303 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2304 'categories': ['Adventure', 'Action', 'Comedy'],
2305 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2306 'view_count': int,
2308 'playlist_mincount': 25,
2309 }, {
2310 'url': 'https://www.biliintl.com/en/play/34613',
2311 'only_matching': True,
2312 }, {
2313 'url': 'https://www.biliintl.com/EN/play/34613',
2314 'only_matching': True,
2317 def _entries(self, series_id):
2318 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2319 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2320 episode_id = str(episode['episode_id'])
2321 yield self.url_result(smuggle_url(
2322 BiliIntlIE._make_url(episode_id, series_id),
2323 self._parse_video_metadata(episode),
2324 ), BiliIntlIE, episode_id)
2326 def _real_extract(self, url):
2327 series_id = self._match_id(url)
2328 series_info = self._call_api(
2329 f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2330 return self.playlist_result(
2331 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2332 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2333 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2336 class BiliLiveIE(InfoExtractor):
2337 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2339 _TESTS = [{
2340 'url': 'https://live.bilibili.com/196',
2341 'info_dict': {
2342 'id': '33989',
2343 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
2344 'ext': 'flv',
2345 'title': '太空狼人杀联动,不被爆杀就算赢',
2346 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2347 'timestamp': 1650802769,
2349 'skip': 'not live',
2350 }, {
2351 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2352 'only_matching': True,
2353 }, {
2354 'url': 'https://live.bilibili.com/blanc/196',
2355 'only_matching': True,
2358 _FORMATS = {
2359 80: {'format_id': 'low', 'format_note': '流畅'},
2360 150: {'format_id': 'high_res', 'format_note': '高清'},
2361 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2362 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2363 10000: {'format_id': 'source', 'format_note': '原画'},
2364 20000: {'format_id': '4K', 'format_note': '4K'},
2365 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2368 _quality = staticmethod(qualities(list(_FORMATS)))
2370 def _call_api(self, path, room_id, query):
2371 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2372 if api_result.get('code') != 0:
2373 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2374 return api_result.get('data') or {}
2376 def _parse_formats(self, qn, fmt):
2377 for codec in fmt.get('codec') or []:
2378 if codec.get('current_qn') != qn:
2379 continue
2380 for url_info in codec['url_info']:
2381 yield {
2382 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2383 'ext': fmt.get('format_name'),
2384 'vcodec': codec.get('codec_name'),
2385 'quality': self._quality(qn),
2386 **self._FORMATS[qn],
2389 def _real_extract(self, url):
2390 room_id = self._match_id(url)
2391 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2392 if room_data.get('live_status') == 0:
2393 raise ExtractorError('Streamer is not live', expected=True)
2395 formats = []
2396 for qn in self._FORMATS:
2397 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2398 'room_id': room_id,
2399 'qn': qn,
2400 'codec': '0,1',
2401 'format': '0,2',
2402 'mask': '0',
2403 'no_playurl': '0',
2404 'platform': 'web',
2405 'protocol': '0,1',
2407 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2408 formats.extend(self._parse_formats(qn, fmt))
2410 return {
2411 'id': room_id,
2412 'title': room_data.get('title'),
2413 'description': room_data.get('description'),
2414 'thumbnail': room_data.get('user_cover'),
2415 'timestamp': stream_data.get('live_time'),
2416 'formats': formats,
2417 'is_live': True,
2418 'http_headers': {
2419 'Referer': url,