[ie/bilibili] Fix festival URL support (#10740)
[yt-dlp3.git] / yt_dlp / extractor / bilibili.py
blob3163df8ab7122045f2570c46ff83b94d2ab42b14
1 import base64
2 import functools
3 import hashlib
4 import itertools
5 import json
6 import math
7 import re
8 import time
9 import urllib.parse
10 import uuid
12 from .common import InfoExtractor, SearchInfoExtractor
13 from ..dependencies import Cryptodome
14 from ..networking.exceptions import HTTPError
15 from ..utils import (
16 ExtractorError,
17 GeoRestrictedError,
18 InAdvancePagedList,
19 OnDemandPagedList,
20 bool_or_none,
21 clean_html,
22 determine_ext,
23 filter_dict,
24 float_or_none,
25 format_field,
26 get_element_by_class,
27 int_or_none,
28 join_nonempty,
29 make_archive_id,
30 merge_dicts,
31 mimetype2ext,
32 parse_count,
33 parse_qs,
34 parse_resolution,
35 qualities,
36 smuggle_url,
37 srt_subtitles_timecode,
38 str_or_none,
39 traverse_obj,
40 unified_timestamp,
41 unsmuggle_url,
42 url_or_none,
43 urlencode_postdata,
44 variadic,
48 class BilibiliBaseIE(InfoExtractor):
49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50 _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
51 _wbi_key_cache = {}
53 @property
54 def is_logged_in(self):
55 return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
57 def _check_missing_formats(self, play_info, formats):
58 parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
59 missing_formats = join_nonempty(*[
60 traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
61 for fmt in traverse_obj(play_info, (
62 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
63 if missing_formats:
64 self.to_screen(
65 f'Format(s) {missing_formats} are missing; you have to login or '
66 f'become a premium member to download them. {self._login_hint()}')
68 def extract_formats(self, play_info):
69 format_names = {
70 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
71 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
74 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
75 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
76 if flac_audio:
77 audios.append(flac_audio)
78 formats = [{
79 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
80 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
81 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
82 'vcodec': 'none',
83 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
84 'filesize': int_or_none(audio.get('size')),
85 'format_id': str_or_none(audio.get('id')),
86 } for audio in audios]
88 formats.extend({
89 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
90 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
91 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
92 'width': int_or_none(video.get('width')),
93 'height': int_or_none(video.get('height')),
94 'vcodec': video.get('codecs'),
95 'acodec': 'none' if audios else None,
96 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
97 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
98 'filesize': int_or_none(video.get('size')),
99 'quality': int_or_none(video.get('id')),
100 'format_id': traverse_obj(
101 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
102 ('id', {str_or_none}), get_all=False),
103 'format': format_names.get(video.get('id')),
104 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
106 if formats:
107 self._check_missing_formats(play_info, formats)
109 fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
110 'url': ('url', {url_or_none}),
111 'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
112 'filesize': ('size', {int_or_none}),
114 if fragments:
115 formats.append({
116 'url': fragments[0]['url'],
117 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
118 **({
119 'fragments': fragments,
120 'protocol': 'http_dash_segments',
121 } if len(fragments) > 1 else {}),
122 **traverse_obj(play_info, {
123 'quality': ('quality', {int_or_none}),
124 'format_id': ('quality', {str_or_none}),
125 'format_note': ('quality', {lambda x: format_names.get(x)}),
126 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
128 **parse_resolution(format_names.get(play_info.get('quality'))),
130 return formats
132 def _get_wbi_key(self, video_id):
133 if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
134 return self._wbi_key_cache['key']
136 session_data = self._download_json(
137 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
139 lookup = ''.join(traverse_obj(session_data, (
140 'data', 'wbi_img', ('img_url', 'sub_url'),
141 {lambda x: x.rpartition('/')[2].partition('.')[0]})))
143 # from getMixinKey() in the vendor js
144 mixin_key_enc_tab = [
145 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
146 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
147 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
148 36, 20, 34, 44, 52,
151 self._wbi_key_cache.update({
152 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
153 'ts': time.time(),
155 return self._wbi_key_cache['key']
157 def _sign_wbi(self, params, video_id):
158 params['wts'] = round(time.time())
159 params = {
160 k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
161 for k, v in sorted(params.items())
163 query = urllib.parse.urlencode(params)
164 params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
165 return params
167 def _download_playinfo(self, bvid, cid, headers=None, qn=None):
168 params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
169 if qn:
170 params['qn'] = qn
171 return self._download_json(
172 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
173 query=self._sign_wbi(params, bvid), headers=headers,
174 note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
176 def json2srt(self, json_data):
177 srt_data = ''
178 for idx, line in enumerate(json_data.get('body') or []):
179 srt_data += (f'{idx + 1}\n'
180 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
181 f'{line["content"]}\n\n')
182 return srt_data
184 def _get_subtitles(self, video_id, cid, aid=None):
185 subtitles = {
186 'danmaku': [{
187 'ext': 'xml',
188 'url': f'https://comment.bilibili.com/{cid}.xml',
192 video_info = self._download_json(
193 'https://api.bilibili.com/x/player/v2', video_id,
194 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
195 note=f'Extracting subtitle info {cid}')
196 if traverse_obj(video_info, ('data', 'need_login_subtitle')):
197 self.report_warning(
198 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
199 for s in traverse_obj(video_info, (
200 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
201 subtitles.setdefault(s['lan'], []).append({
202 'ext': 'srt',
203 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
205 return subtitles
207 def _get_chapters(self, aid, cid):
208 chapters = aid and cid and self._download_json(
209 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
210 note='Extracting chapters', fatal=False)
211 return traverse_obj(chapters, ('data', 'view_points', ..., {
212 'title': 'content',
213 'start_time': 'from',
214 'end_time': 'to',
215 })) or None
217 def _get_comments(self, aid):
218 for idx in itertools.count(1):
219 replies = traverse_obj(
220 self._download_json(
221 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
222 aid, note=f'Extracting comments from page {idx}', fatal=False),
223 ('data', 'replies'))
224 if not replies:
225 return
226 for children in map(self._get_all_children, replies):
227 yield from children
229 def _get_all_children(self, reply):
230 yield {
231 'author': traverse_obj(reply, ('member', 'uname')),
232 'author_id': traverse_obj(reply, ('member', 'mid')),
233 'id': reply.get('rpid'),
234 'text': traverse_obj(reply, ('content', 'message')),
235 'timestamp': reply.get('ctime'),
236 'parent': reply.get('parent') or 'root',
238 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
239 yield from children
241 def _get_episodes_from_season(self, ss_id, url):
242 season_info = self._download_json(
243 'https://api.bilibili.com/pgc/web/season/section', ss_id,
244 note='Downloading season info', query={'season_id': ss_id},
245 headers={'Referer': url, **self.geo_verification_headers()})
247 for entry in traverse_obj(season_info, (
248 'result', 'main_section', 'episodes',
249 lambda _, v: url_or_none(v['share_url']) and v['id'])):
250 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
252 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
253 cid_edges = cid_edges or {}
254 division_data = self._download_json(
255 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
256 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
257 note=f'Extracting divisions from edge {edge_id}')
258 edges.setdefault(edge_id, {}).update(
259 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
260 'title': ('title', {str}),
261 'cid': ('cid', {int_or_none}),
262 }), get_all=False))
264 edges[edge_id].update(traverse_obj(division_data, ('data', {
265 'title': ('title', {str}),
266 'choices': ('edges', 'questions', ..., 'choices', ..., {
267 'edge_id': ('id', {int_or_none}),
268 'cid': ('cid', {int_or_none}),
269 'text': ('option', {str}),
271 })))
272 # use dict to combine edges that use the same video section (same cid)
273 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
274 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
275 if choice['edge_id'] not in edges:
276 edges[choice['edge_id']] = {'cid': choice['cid']}
277 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
278 return cid_edges
280 def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
281 graph_version = traverse_obj(
282 self._download_json(
283 'https://api.bilibili.com/x/player/wbi/v2', video_id,
284 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
285 ('data', 'interaction', 'graph_version', {int_or_none}))
286 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
287 for cid, edges in cid_edges.items():
288 play_info = self._download_playinfo(video_id, cid, headers=headers)
289 yield {
290 **metainfo,
291 'id': f'{video_id}_{cid}',
292 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
293 'formats': self.extract_formats(play_info),
294 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
295 'duration': float_or_none(play_info.get('timelength'), scale=1000),
296 'subtitles': self.extract_subtitles(video_id, cid),
300 class BiliBiliIE(BilibiliBaseIE):
301 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/[^/?#]+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
303 _TESTS = [{
304 'url': 'https://www.bilibili.com/video/BV13x41117TL',
305 'info_dict': {
306 'id': 'BV13x41117TL',
307 'title': '阿滴英文|英文歌分享#6 "Closer',
308 'ext': 'mp4',
309 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
310 'uploader_id': '65880958',
311 'uploader': '阿滴英文',
312 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
313 'duration': 554.117,
314 'tags': list,
315 'comment_count': int,
316 'upload_date': '20170301',
317 'timestamp': 1488353834,
318 'like_count': int,
319 'view_count': int,
320 '_old_archive_ids': ['bilibili 8903802_part1'],
322 }, {
323 'note': 'old av URL version',
324 'url': 'http://www.bilibili.com/video/av1074402/',
325 'info_dict': {
326 'id': 'BV11x411K7CN',
327 'ext': 'mp4',
328 'title': '【金坷垃】金泡沫',
329 'uploader': '菊子桑',
330 'uploader_id': '156160',
331 'duration': 308.36,
332 'upload_date': '20140420',
333 'timestamp': 1397983878,
334 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
335 'like_count': int,
336 'comment_count': int,
337 'view_count': int,
338 'tags': list,
339 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
340 '_old_archive_ids': ['bilibili 1074402_part1'],
342 'params': {'skip_download': True},
343 }, {
344 'note': 'Anthology',
345 'url': 'https://www.bilibili.com/video/BV1bK411W797',
346 'info_dict': {
347 'id': 'BV1bK411W797',
348 'title': '物语中的人物是如何吐槽自己的OP的',
350 'playlist_count': 18,
351 'playlist': [{
352 'info_dict': {
353 'id': 'BV1bK411W797_p1',
354 'ext': 'mp4',
355 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
356 'tags': 'count:10',
357 'timestamp': 1589601697,
358 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
359 'uploader': '打牌还是打桩',
360 'uploader_id': '150259984',
361 'like_count': int,
362 'comment_count': int,
363 'upload_date': '20200516',
364 'view_count': int,
365 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
366 'duration': 90.314,
367 '_old_archive_ids': ['bilibili 498159642_part1'],
370 }, {
371 'note': 'Specific page of Anthology',
372 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
373 'info_dict': {
374 'id': 'BV1bK411W797_p1',
375 'ext': 'mp4',
376 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
377 'tags': 'count:10',
378 'timestamp': 1589601697,
379 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
380 'uploader': '打牌还是打桩',
381 'uploader_id': '150259984',
382 'like_count': int,
383 'comment_count': int,
384 'upload_date': '20200516',
385 'view_count': int,
386 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
387 'duration': 90.314,
388 '_old_archive_ids': ['bilibili 498159642_part1'],
390 }, {
391 'url': 'https://www.bilibili.com/video/av8903802/',
392 'info_dict': {
393 'id': 'BV13x41117TL',
394 'ext': 'mp4',
395 'title': '阿滴英文|英文歌分享#6 "Closer',
396 'upload_date': '20170301',
397 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
398 'timestamp': 1488353834,
399 'uploader_id': '65880958',
400 'uploader': '阿滴英文',
401 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
402 'duration': 554.117,
403 'tags': list,
404 'comment_count': int,
405 'view_count': int,
406 'like_count': int,
407 '_old_archive_ids': ['bilibili 8903802_part1'],
409 'params': {
410 'skip_download': True,
412 }, {
413 'note': 'video has chapter',
414 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
415 'info_dict': {
416 'id': 'BV1vL411G7N7',
417 'ext': 'mp4',
418 'title': '如何为你的B站视频添加进度条分段',
419 'timestamp': 1634554558,
420 'upload_date': '20211018',
421 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
422 'tags': list,
423 'uploader': '爱喝咖啡的当麻',
424 'duration': 669.482,
425 'uploader_id': '1680903',
426 'chapters': 'count:6',
427 'comment_count': int,
428 'view_count': int,
429 'like_count': int,
430 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
431 '_old_archive_ids': ['bilibili 463665680_part1'],
433 'params': {'skip_download': True},
434 }, {
435 'note': 'video redirects to festival page',
436 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
437 'info_dict': {
438 'id': 'BV1wP4y1P72h',
439 'ext': 'mp4',
440 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
441 'timestamp': 1643947497,
442 'upload_date': '20220204',
443 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
444 'uploader': '叨叨冯聊音乐',
445 'duration': 246.719,
446 'uploader_id': '528182630',
447 'view_count': int,
448 'like_count': int,
449 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
450 '_old_archive_ids': ['bilibili 893839363_part1'],
452 }, {
453 'note': 'newer festival video',
454 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
455 'info_dict': {
456 'id': 'BV1ay4y1d77f',
457 'ext': 'mp4',
458 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
459 'timestamp': 1674273600,
460 'upload_date': '20230121',
461 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
462 'uploader': '果蝇轰',
463 'duration': 1111.722,
464 'uploader_id': '8469526',
465 'view_count': int,
466 'like_count': int,
467 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
468 '_old_archive_ids': ['bilibili 778246196_part1'],
470 }, {
471 'note': 'legacy flv/mp4 video',
472 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
473 'info_dict': {
474 'id': 'BV1ms411Q7vw_p4',
475 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
476 'timestamp': 1458222815,
477 'upload_date': '20160317',
478 'description': '云南方言快乐生产线出品',
479 'duration': float,
480 'uploader': '一笑颠天',
481 'uploader_id': '3916081',
482 'view_count': int,
483 'comment_count': int,
484 'like_count': int,
485 'tags': list,
486 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
487 '_old_archive_ids': ['bilibili 4120229_part4'],
489 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
490 'playlist_count': 19,
491 'playlist': [{
492 'info_dict': {
493 'id': 'BV1ms411Q7vw_p4_0',
494 'ext': 'flv',
495 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
496 'duration': 399.102,
499 }, {
500 'note': 'legacy mp4-only video',
501 'url': 'https://www.bilibili.com/video/BV1nx411u79K',
502 'info_dict': {
503 'id': 'BV1nx411u79K',
504 'ext': 'mp4',
505 'title': '【练习室】201603声乐练习《No Air》with VigoVan',
506 'timestamp': 1508893551,
507 'upload_date': '20171025',
508 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
509 'duration': 80.384,
510 'uploader': '伯远',
511 'uploader_id': '10584494',
512 'comment_count': int,
513 'view_count': int,
514 'like_count': int,
515 'tags': list,
516 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
517 '_old_archive_ids': ['bilibili 15700301_part1'],
519 }, {
520 'note': 'interactive/split-path video',
521 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
522 'info_dict': {
523 'id': 'BV1af4y1H7ga',
524 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
525 'timestamp': 1630500414,
526 'upload_date': '20210901',
527 'description': 'md5:01113e39ab06e28042d74ac356a08786',
528 'tags': list,
529 'uploader': '钉宫妮妮Ninico',
530 'duration': 1503,
531 'uploader_id': '8881297',
532 'comment_count': int,
533 'view_count': int,
534 'like_count': int,
535 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
536 '_old_archive_ids': ['bilibili 292734508_part1'],
538 'playlist_count': 33,
539 'playlist': [{
540 'info_dict': {
541 'id': 'BV1af4y1H7ga_400950101',
542 'ext': 'mp4',
543 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
544 'timestamp': 1630500414,
545 'upload_date': '20210901',
546 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
547 'tags': list,
548 'uploader': '钉宫妮妮Ninico',
549 'duration': 11.605,
550 'uploader_id': '8881297',
551 'comment_count': int,
552 'view_count': int,
553 'like_count': int,
554 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
555 '_old_archive_ids': ['bilibili 292734508_part1'],
558 }, {
559 'note': '301 redirect to bangumi link',
560 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
561 'info_dict': {
562 'id': '288525',
563 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
564 'ext': 'mp4',
565 'series': '我和我的祖国',
566 'series_id': '4780',
567 'season': '幕后纪实',
568 'season_id': '28609',
569 'season_number': 1,
570 'episode': '钱学森弹道和乘波体飞行器是什么?',
571 'episode_id': '288525',
572 'episode_number': 105,
573 'duration': 1183.957,
574 'timestamp': 1571648124,
575 'upload_date': '20191021',
576 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
578 }, {
579 'note': 'video has subtitles, which requires login',
580 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
581 'info_dict': {
582 'id': 'BV12N4y1M7rh',
583 'ext': 'mp4',
584 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
585 'tags': list,
586 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
587 'duration': 313.557,
588 'upload_date': '20220709',
589 'uploader': '小夫太渴',
590 'timestamp': 1657347907,
591 'uploader_id': '1326814124',
592 'comment_count': int,
593 'view_count': int,
594 'like_count': int,
595 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
596 'subtitles': 'count:2', # login required for CC subtitle
597 '_old_archive_ids': ['bilibili 898179753_part1'],
599 'params': {'listsubtitles': True},
600 'skip': 'login required for subtitle',
601 }, {
602 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
603 'info_dict': {
604 'id': 'BV1jL41167ZG',
605 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
606 'ext': 'mp4',
608 'skip': 'supporter-only video',
609 }, {
610 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
611 'info_dict': {
612 'id': 'BV1Ks411f7aQ',
613 'title': '【BD1080P】狼与香辛料I【华盟】',
614 'ext': 'mp4',
616 'skip': 'login required',
617 }, {
618 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
619 'info_dict': {
620 'id': 'BV1GJ411x7h7',
621 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
622 'ext': 'mp4',
624 'skip': 'geo-restricted',
625 }, {
626 'note': 'has - in the last path segment of the url',
627 'url': 'https://www.bilibili.com/festival/bh3-7th?bvid=BV1tr4y1f7p2&',
628 'only_matching': True,
631 def _real_extract(self, url):
632 video_id = self._match_id(url)
633 headers = self.geo_verification_headers()
634 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
635 if not self._match_valid_url(urlh.url):
636 return self.url_result(urlh.url)
638 headers['Referer'] = url
640 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
641 is_festival = 'videoData' not in initial_state
642 if is_festival:
643 video_data = initial_state['videoInfo']
644 else:
645 play_info_obj = self._search_json(
646 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
647 if not play_info_obj:
648 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
649 self.raise_login_required()
650 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
651 raise ExtractorError(
652 'This video may be deleted or geo-restricted. '
653 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
654 play_info = traverse_obj(play_info_obj, ('data', {dict}))
655 if not play_info:
656 if traverse_obj(play_info_obj, 'code') == 87007:
657 toast = get_element_by_class('tips-toast', webpage) or ''
658 msg = clean_html(
659 f'{get_element_by_class("belongs-to", toast) or ""},'
660 + (get_element_by_class('level', toast) or ''))
661 raise ExtractorError(
662 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
663 raise ExtractorError('Failed to extract play info')
664 video_data = initial_state['videoData']
666 video_id, title = video_data['bvid'], video_data.get('title')
668 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
669 page_list_json = not is_festival and traverse_obj(
670 self._download_json(
671 'https://api.bilibili.com/x/player/pagelist', video_id,
672 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
673 note='Extracting videos in anthology', headers=headers),
674 'data', expected_type=list) or []
675 is_anthology = len(page_list_json) > 1
677 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
678 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
679 return self.playlist_from_matches(
680 page_list_json, video_id, title, ie=BiliBiliIE,
681 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
683 if is_anthology:
684 part_id = part_id or 1
685 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
687 aid = video_data.get('aid')
688 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
689 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
691 festival_info = {}
692 if is_festival:
693 play_info = self._download_playinfo(video_id, cid, headers=headers)
695 festival_info = traverse_obj(initial_state, {
696 'uploader': ('videoInfo', 'upName'),
697 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
698 'like_count': ('videoStatus', 'like', {int_or_none}),
699 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
700 }, get_all=False)
702 metainfo = {
703 **traverse_obj(initial_state, {
704 'uploader': ('upData', 'name'),
705 'uploader_id': ('upData', 'mid', {str_or_none}),
706 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
707 'tags': ('tags', ..., 'tag_name'),
708 'thumbnail': ('videoData', 'pic', {url_or_none}),
710 **festival_info,
711 **traverse_obj(video_data, {
712 'description': 'desc',
713 'timestamp': ('pubdate', {int_or_none}),
714 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
715 'comment_count': ('stat', 'reply', {int_or_none}),
716 }, get_all=False),
717 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
718 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
719 'title': title,
720 'http_headers': {'Referer': url},
723 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
724 if is_interactive:
725 return self.playlist_result(
726 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
727 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
728 __post_extractor=self.extract_comments(aid))
729 else:
730 formats = self.extract_formats(play_info)
732 if not traverse_obj(play_info, ('dash')):
733 # we only have legacy formats and need additional work
734 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
735 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
736 formats.extend(traverse_obj(
737 self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
738 lambda _, v: not has_qn(v['quality'])))
739 self._check_missing_formats(play_info, formats)
740 flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
741 if flv_formats and len(flv_formats) < len(formats):
742 # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
743 if not self._configuration_arg('prefer_multi_flv'):
744 dropped_fmts = ', '.join(
745 f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
746 formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
747 if dropped_fmts:
748 self.to_screen(
749 f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
750 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
751 else:
752 formats = traverse_obj(
753 # XXX: Filtering by extractor-arg is for testing purposes
754 formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
755 ) or [max(flv_formats, key=lambda x: x['quality'])]
757 if traverse_obj(formats, (0, 'fragments')):
758 # We have flv formats, which are individual short videos with their own timestamps and metainfo
759 # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
760 return {
761 **metainfo,
762 '_type': 'multi_video',
763 'entries': [{
764 'id': f'{metainfo["id"]}_{idx}',
765 'title': metainfo['title'],
766 'http_headers': metainfo['http_headers'],
767 'formats': [{
768 **fragment,
769 'format_id': formats[0].get('format_id'),
771 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
772 '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
773 } for idx, fragment in enumerate(formats[0]['fragments'])],
774 'duration': float_or_none(play_info.get('timelength'), scale=1000),
776 else:
777 return {
778 **metainfo,
779 'formats': formats,
780 'duration': float_or_none(play_info.get('timelength'), scale=1000),
781 'chapters': self._get_chapters(aid, cid),
782 'subtitles': self.extract_subtitles(video_id, cid),
783 '__post_extractor': self.extract_comments(aid),
787 class BiliBiliBangumiIE(BilibiliBaseIE):
788 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
790 _TESTS = [{
791 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
792 'info_dict': {
793 'id': '21495',
794 'ext': 'mp4',
795 'series': '悠久之翼',
796 'series_id': '774',
797 'season': '第二季',
798 'season_id': '1182',
799 'season_number': 2,
800 'episode': 'forever/ef',
801 'episode_id': '21495',
802 'episode_number': 12,
803 'title': '12 forever/ef',
804 'duration': 1420.791,
805 'timestamp': 1320412200,
806 'upload_date': '20111104',
807 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
809 }, {
810 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
811 'info_dict': {
812 'id': '267851',
813 'ext': 'mp4',
814 'series': '鬼灭之刃',
815 'series_id': '4358',
816 'season': '立志篇',
817 'season_id': '26801',
818 'season_number': 1,
819 'episode': '残酷',
820 'episode_id': '267851',
821 'episode_number': 1,
822 'title': '1 残酷',
823 'duration': 1425.256,
824 'timestamp': 1554566400,
825 'upload_date': '20190406',
826 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
828 'skip': 'Geo-restricted',
829 }, {
830 'note': 'a making-of which falls outside main section',
831 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
832 'info_dict': {
833 'id': '345120',
834 'ext': 'mp4',
835 'series': '鬼灭之刃',
836 'series_id': '4358',
837 'season': '立志篇',
838 'season_id': '26801',
839 'season_number': 1,
840 'episode': '炭治郎篇',
841 'episode_id': '345120',
842 'episode_number': 27,
843 'title': '#1 炭治郎篇',
844 'duration': 1922.129,
845 'timestamp': 1602853860,
846 'upload_date': '20201016',
847 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
851 def _real_extract(self, url):
852 episode_id = self._match_id(url)
853 headers = self.geo_verification_headers()
854 webpage = self._download_webpage(url, episode_id, headers=headers)
856 if '您所在的地区无法观看本片' in webpage:
857 raise GeoRestrictedError('This video is restricted')
858 elif '正在观看预览,大会员免费看全片' in webpage:
859 self.raise_login_required('This video is for premium members only')
861 headers['Referer'] = url
862 play_info = self._download_json(
863 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
864 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
865 headers=headers)
866 premium_only = play_info.get('code') == -10403
867 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
869 formats = self.extract_formats(play_info)
870 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
871 self.raise_login_required('This video is for premium members only')
873 bangumi_info = self._download_json(
874 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
875 query={'ep_id': episode_id}, headers=headers)['result']
877 episode_number, episode_info = next((
878 (idx, ep) for idx, ep in enumerate(traverse_obj(
879 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
880 if str_or_none(ep.get('id')) == episode_id), (1, {}))
882 season_id = bangumi_info.get('season_id')
883 season_number, season_title = season_id and next((
884 (idx + 1, e.get('season_title')) for idx, e in enumerate(
885 traverse_obj(bangumi_info, ('seasons', ...)))
886 if e.get('season_id') == season_id
887 ), (None, None))
889 aid = episode_info.get('aid')
891 return {
892 'id': episode_id,
893 'formats': formats,
894 **traverse_obj(bangumi_info, {
895 'series': ('series', 'series_title', {str}),
896 'series_id': ('series', 'series_id', {str_or_none}),
897 'thumbnail': ('square_cover', {url_or_none}),
899 **traverse_obj(episode_info, {
900 'episode': ('long_title', {str}),
901 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
902 'timestamp': ('pub_time', {int_or_none}),
903 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
905 'episode_id': episode_id,
906 'season': str_or_none(season_title),
907 'season_id': str_or_none(season_id),
908 'season_number': season_number,
909 'duration': float_or_none(play_info.get('timelength'), scale=1000),
910 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
911 '__post_extractor': self.extract_comments(aid),
912 'http_headers': {'Referer': url},
916 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
917 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
918 _TESTS = [{
919 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
920 'info_dict': {
921 'id': '24097891',
922 'title': 'CAROLE & TUESDAY',
923 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
925 'playlist_mincount': 25,
926 }, {
927 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
928 'info_dict': {
929 'id': '1565',
930 'title': '攻壳机动队 S.A.C. 2nd GIG',
931 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
933 'playlist_count': 26,
934 'playlist': [{
935 'info_dict': {
936 'id': '68540',
937 'ext': 'mp4',
938 'series': '攻壳机动队',
939 'series_id': '1077',
940 'season': '第二季',
941 'season_id': '1565',
942 'season_number': 2,
943 'episode': '再启动 REEMBODY',
944 'episode_id': '68540',
945 'episode_number': 1,
946 'title': '1 再启动 REEMBODY',
947 'duration': 1525.777,
948 'timestamp': 1425074413,
949 'upload_date': '20150227',
950 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
955 def _real_extract(self, url):
956 media_id = self._match_id(url)
957 webpage = self._download_webpage(url, media_id)
959 initial_state = self._search_json(
960 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
961 ss_id = initial_state['mediaInfo']['season_id']
963 return self.playlist_result(
964 self._get_episodes_from_season(ss_id, url), media_id,
965 **traverse_obj(initial_state, ('mediaInfo', {
966 'title': ('title', {str}),
967 'description': ('evaluate', {str}),
968 })))
971 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
972 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
973 _TESTS = [{
974 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
975 'info_dict': {
976 'id': '26801',
977 'title': '鬼灭之刃',
978 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
980 'playlist_mincount': 26,
981 }, {
982 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
983 'info_dict': {
984 'id': '2251',
985 'title': '玲音',
986 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
988 'playlist_count': 13,
989 'playlist': [{
990 'info_dict': {
991 'id': '50188',
992 'ext': 'mp4',
993 'series': '玲音',
994 'series_id': '1526',
995 'season': 'TV',
996 'season_id': '2251',
997 'season_number': 1,
998 'episode': 'WEIRD',
999 'episode_id': '50188',
1000 'episode_number': 1,
1001 'title': '1 WEIRD',
1002 'duration': 1436.992,
1003 'timestamp': 1343185080,
1004 'upload_date': '20120725',
1005 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1010 def _real_extract(self, url):
1011 ss_id = self._match_id(url)
1012 webpage = self._download_webpage(url, ss_id)
1013 metainfo = traverse_obj(
1014 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1015 ('itemListElement', ..., {
1016 'title': ('name', {str}),
1017 'description': ('description', {str}),
1018 }), get_all=False)
1020 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1023 class BilibiliCheeseBaseIE(BilibiliBaseIE):
1024 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1026 def _extract_episode(self, season_info, ep_id):
1027 episode_info = traverse_obj(season_info, (
1028 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1029 aid, cid = episode_info['aid'], episode_info['cid']
1031 if traverse_obj(episode_info, 'ep_status') == -1:
1032 raise ExtractorError('This course episode is not yet available.', expected=True)
1033 if not traverse_obj(episode_info, 'playable'):
1034 self.raise_login_required('You need to purchase the course to download this episode')
1036 play_info = self._download_json(
1037 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1038 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1039 headers=self._HEADERS, note='Downloading playinfo')['data']
1041 return {
1042 'id': str_or_none(ep_id),
1043 'episode_id': str_or_none(ep_id),
1044 'formats': self.extract_formats(play_info),
1045 'extractor_key': BilibiliCheeseIE.ie_key(),
1046 'extractor': BilibiliCheeseIE.IE_NAME,
1047 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1048 **traverse_obj(episode_info, {
1049 'episode': ('title', {str}),
1050 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1051 'alt_title': ('subtitle', {str}),
1052 'duration': ('duration', {int_or_none}),
1053 'episode_number': ('index', {int_or_none}),
1054 'thumbnail': ('cover', {url_or_none}),
1055 'timestamp': ('release_date', {int_or_none}),
1056 'view_count': ('play', {int_or_none}),
1058 **traverse_obj(season_info, {
1059 'uploader': ('up_info', 'uname', {str}),
1060 'uploader_id': ('up_info', 'mid', {str_or_none}),
1062 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1063 '__post_extractor': self.extract_comments(aid),
1064 'http_headers': self._HEADERS,
1067 def _download_season_info(self, query_key, video_id):
1068 return self._download_json(
1069 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1070 headers=self._HEADERS, note='Downloading season info')['data']
1073 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1074 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1075 _TESTS = [{
1076 'url': 'https://www.bilibili.com/cheese/play/ep229832',
1077 'info_dict': {
1078 'id': '229832',
1079 'ext': 'mp4',
1080 'title': '1 - 课程先导片',
1081 'alt_title': '视频课 · 3分41秒',
1082 'uploader': '马督工',
1083 'uploader_id': '316568752',
1084 'episode': '课程先导片',
1085 'episode_id': '229832',
1086 'episode_number': 1,
1087 'duration': 221,
1088 'timestamp': 1695549606,
1089 'upload_date': '20230924',
1090 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1091 'view_count': int,
1095 def _real_extract(self, url):
1096 ep_id = self._match_id(url)
1097 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1100 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1101 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1102 _TESTS = [{
1103 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1104 'info_dict': {
1105 'id': '5918',
1106 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1107 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1109 'playlist': [{
1110 'info_dict': {
1111 'id': '229832',
1112 'ext': 'mp4',
1113 'title': '1 - 课程先导片',
1114 'alt_title': '视频课 · 3分41秒',
1115 'uploader': '马督工',
1116 'uploader_id': '316568752',
1117 'episode': '课程先导片',
1118 'episode_id': '229832',
1119 'episode_number': 1,
1120 'duration': 221,
1121 'timestamp': 1695549606,
1122 'upload_date': '20230924',
1123 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1124 'view_count': int,
1127 'params': {'playlist_items': '1'},
1128 }, {
1129 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1130 'info_dict': {
1131 'id': '5918',
1132 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1133 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1135 'playlist_mincount': 5,
1136 'skip': 'paid video in list',
1139 def _get_cheese_entries(self, season_info):
1140 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1141 yield self._extract_episode(season_info, ep_id)
1143 def _real_extract(self, url):
1144 season_id = self._match_id(url)
1145 season_info = self._download_season_info('season_id', season_id)
1147 return self.playlist_result(
1148 self._get_cheese_entries(season_info), season_id,
1149 **traverse_obj(season_info, {
1150 'title': ('title', {str}),
1151 'description': ('subtitle', {str}),
1155 class BilibiliSpaceBaseIE(BilibiliBaseIE):
1156 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1157 first_page = fetch_page(0)
1158 metadata = get_metadata(first_page)
1160 paged_list = InAdvancePagedList(
1161 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1162 metadata['page_count'], metadata['page_size'])
1164 return metadata, paged_list
1167 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1168 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1169 _TESTS = [{
1170 'url': 'https://space.bilibili.com/3985676/video',
1171 'info_dict': {
1172 'id': '3985676',
1174 'playlist_mincount': 178,
1175 'skip': 'login required',
1176 }, {
1177 'url': 'https://space.bilibili.com/313580179/video',
1178 'info_dict': {
1179 'id': '313580179',
1181 'playlist_mincount': 92,
1182 'skip': 'login required',
1185 def _real_extract(self, url):
1186 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1187 if not is_video_url:
1188 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1189 'To download audios, add a "/audio" to the URL')
1191 def fetch_page(page_idx):
1192 query = {
1193 'keyword': '',
1194 'mid': playlist_id,
1195 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1196 'order_avoided': 'true',
1197 'platform': 'web',
1198 'pn': page_idx + 1,
1199 'ps': 30,
1200 'tid': 0,
1201 'web_location': 1550101,
1204 try:
1205 response = self._download_json(
1206 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1207 query=self._sign_wbi(query, playlist_id),
1208 note=f'Downloading space page {page_idx}', headers={'Referer': url})
1209 except ExtractorError as e:
1210 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1211 raise ExtractorError(
1212 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1213 raise
1214 status_code = response['code']
1215 if status_code == -401:
1216 raise ExtractorError(
1217 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1218 elif status_code == -352 and not self.is_logged_in:
1219 self.raise_login_required('Request is rejected, you need to login to access playlist')
1220 elif status_code != 0:
1221 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1222 return response['data']
1224 def get_metadata(page_data):
1225 page_size = page_data['page']['ps']
1226 entry_count = page_data['page']['count']
1227 return {
1228 'page_count': math.ceil(entry_count / page_size),
1229 'page_size': page_size,
1232 def get_entries(page_data):
1233 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1234 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1236 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1237 return self.playlist_result(paged_list, playlist_id)
1240 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1241 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1242 _TESTS = [{
1243 'url': 'https://space.bilibili.com/313580179/audio',
1244 'info_dict': {
1245 'id': '313580179',
1247 'playlist_mincount': 1,
1250 def _real_extract(self, url):
1251 playlist_id = self._match_id(url)
1253 def fetch_page(page_idx):
1254 return self._download_json(
1255 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1256 note=f'Downloading page {page_idx}',
1257 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1259 def get_metadata(page_data):
1260 return {
1261 'page_count': page_data['pageCount'],
1262 'page_size': page_data['pageSize'],
1265 def get_entries(page_data):
1266 for entry in page_data.get('data', []):
1267 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1269 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1270 return self.playlist_result(paged_list, playlist_id)
1273 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1274 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1275 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1276 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1278 def _get_uploader(self, uid, playlist_id):
1279 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1280 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1282 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1283 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1284 metadata.pop('page_count', None)
1285 metadata.pop('page_size', None)
1286 return metadata, page_list
1289 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1290 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1291 _TESTS = [{
1292 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1293 'info_dict': {
1294 'id': '2142762_57445',
1295 'title': '【完结】《底特律 变人》全结局流程解说',
1296 'description': '',
1297 'uploader': '老戴在此',
1298 'uploader_id': '2142762',
1299 'timestamp': int,
1300 'upload_date': str,
1301 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1303 'playlist_mincount': 31,
1306 def _real_extract(self, url):
1307 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1308 playlist_id = f'{mid}_{sid}'
1310 def fetch_page(page_idx):
1311 return self._download_json(
1312 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1313 playlist_id, note=f'Downloading page {page_idx}',
1314 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1316 def get_metadata(page_data):
1317 page_size = page_data['page']['page_size']
1318 entry_count = page_data['page']['total']
1319 return {
1320 'page_count': math.ceil(entry_count / page_size),
1321 'page_size': page_size,
1322 'uploader': self._get_uploader(mid, playlist_id),
1323 **traverse_obj(page_data, {
1324 'title': ('meta', 'name', {str}),
1325 'description': ('meta', 'description', {str}),
1326 'uploader_id': ('meta', 'mid', {str_or_none}),
1327 'timestamp': ('meta', 'ptime', {int_or_none}),
1328 'thumbnail': ('meta', 'cover', {url_or_none}),
1332 def get_entries(page_data):
1333 return self._get_entries(page_data, 'archives')
1335 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1336 return self.playlist_result(paged_list, playlist_id, **metadata)
1339 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1340 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1341 _TESTS = [{
1342 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1343 'info_dict': {
1344 'id': '1958703906_547718',
1345 'title': '直播回放',
1346 'description': '直播回放',
1347 'uploader': '靡烟miya',
1348 'uploader_id': '1958703906',
1349 'timestamp': 1637985853,
1350 'upload_date': '20211127',
1351 'modified_timestamp': int,
1352 'modified_date': str,
1354 'playlist_mincount': 513,
1357 def _real_extract(self, url):
1358 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1359 playlist_id = f'{mid}_{sid}'
1360 playlist_meta = traverse_obj(self._download_json(
1361 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1362 ), {
1363 'title': ('data', 'meta', 'name', {str}),
1364 'description': ('data', 'meta', 'description', {str}),
1365 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1366 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1367 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1370 def fetch_page(page_idx):
1371 return self._download_json(
1372 'https://api.bilibili.com/x/series/archives',
1373 playlist_id, note=f'Downloading page {page_idx}',
1374 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1376 def get_metadata(page_data):
1377 page_size = page_data['page']['size']
1378 entry_count = page_data['page']['total']
1379 return {
1380 'page_count': math.ceil(entry_count / page_size),
1381 'page_size': page_size,
1382 'uploader': self._get_uploader(mid, playlist_id),
1383 **playlist_meta,
1386 def get_entries(page_data):
1387 return self._get_entries(page_data, 'archives')
1389 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1390 return self.playlist_result(paged_list, playlist_id, **metadata)
1393 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1394 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1395 _TESTS = [{
1396 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1397 'info_dict': {
1398 'id': '1103407912',
1399 'title': '【V2】(旧)',
1400 'description': '',
1401 'uploader': '晓月春日',
1402 'uploader_id': '84912',
1403 'timestamp': 1604905176,
1404 'upload_date': '20201109',
1405 'modified_timestamp': int,
1406 'modified_date': str,
1407 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1408 'view_count': int,
1409 'like_count': int,
1411 'playlist_mincount': 22,
1412 }, {
1413 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1414 'only_matching': True,
1417 def _real_extract(self, url):
1418 fid = self._match_id(url)
1420 list_info = self._download_json(
1421 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1422 fid, note='Downloading favlist metadata')
1423 if list_info['code'] == -403:
1424 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1426 entries = self._get_entries(self._download_json(
1427 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1428 fid, note='Download favlist entries'), 'data')
1430 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1431 'title': ('title', {str}),
1432 'description': ('intro', {str}),
1433 'uploader': ('upper', 'name', {str}),
1434 'uploader_id': ('upper', 'mid', {str_or_none}),
1435 'timestamp': ('ctime', {int_or_none}),
1436 'modified_timestamp': ('mtime', {int_or_none}),
1437 'thumbnail': ('cover', {url_or_none}),
1438 'view_count': ('cnt_info', 'play', {int_or_none}),
1439 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1440 })))
1443 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1444 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1445 _TESTS = [{
1446 'url': 'https://www.bilibili.com/watchlater/#/list',
1447 'info_dict': {
1448 'id': r're:\d+',
1449 'title': '稍后再看',
1451 'playlist_mincount': 0,
1452 'skip': 'login required',
1455 def _real_extract(self, url):
1456 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1457 watchlater_info = self._download_json(
1458 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1459 if watchlater_info['code'] == -101:
1460 self.raise_login_required(msg='You need to login to access your watchlater list')
1461 entries = self._get_entries(watchlater_info, ('data', 'list'))
1462 return self.playlist_result(entries, id=list_id, title='稍后再看')
1465 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1466 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1467 _TESTS = [{
1468 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1469 'info_dict': {
1470 'id': '5_547718',
1471 'title': '直播回放',
1472 'uploader': '靡烟miya',
1473 'uploader_id': '1958703906',
1474 'timestamp': 1637985853,
1475 'upload_date': '20211127',
1477 'playlist_mincount': 513,
1478 }, {
1479 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1480 'info_dict': {
1481 'id': 'BV1DU4y1r7tz',
1482 'ext': 'mp4',
1483 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1484 'upload_date': '20220820',
1485 'description': '',
1486 'timestamp': 1661016330,
1487 'uploader_id': '1958703906',
1488 'uploader': '靡烟miya',
1489 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1490 'duration': 9552.903,
1491 'tags': list,
1492 'comment_count': int,
1493 'view_count': int,
1494 'like_count': int,
1495 '_old_archive_ids': ['bilibili 687146339_part1'],
1497 'params': {'noplaylist': True},
1498 }, {
1499 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1500 'info_dict': {
1501 'id': '5_547718',
1503 'playlist_mincount': 513,
1504 'skip': 'redirect url',
1505 }, {
1506 'url': 'https://www.bilibili.com/list/ml1103407912',
1507 'info_dict': {
1508 'id': '3_1103407912',
1509 'title': '【V2】(旧)',
1510 'uploader': '晓月春日',
1511 'uploader_id': '84912',
1512 'timestamp': 1604905176,
1513 'upload_date': '20201109',
1514 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1516 'playlist_mincount': 22,
1517 }, {
1518 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1519 'info_dict': {
1520 'id': '3_1103407912',
1522 'playlist_mincount': 22,
1523 'skip': 'redirect url',
1524 }, {
1525 'url': 'https://www.bilibili.com/list/watchlater',
1526 'info_dict': {
1527 'id': r're:2_\d+',
1528 'title': '稍后再看',
1529 'uploader': str,
1530 'uploader_id': str,
1532 'playlist_mincount': 0,
1533 'skip': 'login required',
1534 }, {
1535 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1536 'info_dict': {'id': 'watchlater'},
1537 'playlist_mincount': 0,
1538 'skip': 'redirect url & login required',
1541 def _extract_medialist(self, query, list_id):
1542 for page_num in itertools.count(1):
1543 page_data = self._download_json(
1544 'https://api.bilibili.com/x/v2/medialist/resource/list',
1545 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1546 )['data']
1547 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1548 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1549 if not page_data.get('has_more', False):
1550 break
1552 def _real_extract(self, url):
1553 list_id = self._match_id(url)
1555 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1556 if not self._yes_playlist(list_id, bvid):
1557 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1559 webpage = self._download_webpage(url, list_id)
1560 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1561 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1562 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1563 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1564 if error_code == -400 and list_id == 'watchlater':
1565 self.raise_login_required('You need to login to access your watchlater playlist')
1566 elif error_code == -403:
1567 self.raise_login_required('This is a private playlist. You need to login as its owner')
1568 elif error_code == 11010:
1569 raise ExtractorError('Playlist is no longer available', expected=True)
1570 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1572 query = {
1573 'ps': 20,
1574 'with_current': False,
1575 **traverse_obj(initial_state, {
1576 'type': ('playlist', 'type', {int_or_none}),
1577 'biz_id': ('playlist', 'id', {int_or_none}),
1578 'tid': ('tid', {int_or_none}),
1579 'sort_field': ('sortFiled', {int_or_none}),
1580 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1583 metadata = {
1584 'id': f'{query["type"]}_{query["biz_id"]}',
1585 **traverse_obj(initial_state, ('mediaListInfo', {
1586 'title': ('title', {str}),
1587 'uploader': ('upper', 'name', {str}),
1588 'uploader_id': ('upper', 'mid', {str_or_none}),
1589 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
1590 'thumbnail': ('cover', {url_or_none}),
1591 })),
1593 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1596 class BilibiliCategoryIE(InfoExtractor):
1597 IE_NAME = 'Bilibili category extractor'
1598 _MAX_RESULTS = 1000000
1599 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1600 _TESTS = [{
1601 'url': 'https://www.bilibili.com/v/kichiku/mad',
1602 'info_dict': {
1603 'id': 'kichiku: mad',
1604 'title': 'kichiku: mad',
1606 'playlist_mincount': 45,
1607 'params': {
1608 'playlistend': 45,
1612 def _fetch_page(self, api_url, num_pages, query, page_num):
1613 parsed_json = self._download_json(
1614 api_url, query, query={'Search_key': query, 'pn': page_num},
1615 note=f'Extracting results from page {page_num} of {num_pages}')
1617 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1618 if not video_list:
1619 raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1621 for video in video_list:
1622 yield self.url_result(
1623 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1625 def _entries(self, category, subcategory, query):
1626 # map of categories : subcategories : RIDs
1627 rid_map = {
1628 'kichiku': {
1629 'mad': 26,
1630 'manual_vocaloid': 126,
1631 'guide': 22,
1632 'theatre': 216,
1633 'course': 127,
1637 if category not in rid_map:
1638 raise ExtractorError(
1639 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1640 if subcategory not in rid_map[category]:
1641 raise ExtractorError(
1642 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1643 rid_value = rid_map[category][subcategory]
1645 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1646 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1647 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1648 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1649 if count is None or not size:
1650 raise ExtractorError('Failed to calculate either page count or size')
1652 num_pages = math.ceil(count / size)
1654 return OnDemandPagedList(functools.partial(
1655 self._fetch_page, api_url, num_pages, query), size)
1657 def _real_extract(self, url):
1658 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1659 query = f'{category}: {subcategory}'
1661 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1664 class BiliBiliSearchIE(SearchInfoExtractor):
1665 IE_DESC = 'Bilibili video search'
1666 _MAX_RESULTS = 100000
1667 _SEARCH_KEY = 'bilisearch'
1668 _TESTS = [{
1669 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1670 'playlist_count': 3,
1671 'info_dict': {
1672 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1673 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1675 'playlist': [{
1676 'info_dict': {
1677 'id': 'BV1n44y1Q7sc',
1678 'ext': 'mp4',
1679 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1680 'timestamp': 1669889987,
1681 'upload_date': '20221201',
1682 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1683 'tags': list,
1684 'uploader': '靡烟miya',
1685 'duration': 123.156,
1686 'uploader_id': '1958703906',
1687 'comment_count': int,
1688 'view_count': int,
1689 'like_count': int,
1690 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1691 '_old_archive_ids': ['bilibili 988222410_part1'],
1696 def _search_results(self, query):
1697 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1698 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1699 for page_num in itertools.count(1):
1700 videos = self._download_json(
1701 'https://api.bilibili.com/x/web-interface/search/type', query,
1702 note=f'Extracting results from page {page_num}', query={
1703 'Search_key': query,
1704 'keyword': query,
1705 'page': page_num,
1706 'context': '',
1707 'duration': 0,
1708 'tids_2': '',
1709 '__refresh__': 'true',
1710 'search_type': 'video',
1711 'tids': 0,
1712 'highlight': 1,
1713 })['data'].get('result')
1714 if not videos:
1715 break
1716 for video in videos:
1717 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1720 class BilibiliAudioBaseIE(InfoExtractor):
1721 def _call_api(self, path, sid, query=None):
1722 if not query:
1723 query = {'sid': sid}
1724 return self._download_json(
1725 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1726 sid, query=query)['data']
1729 class BilibiliAudioIE(BilibiliAudioBaseIE):
1730 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1731 _TEST = {
1732 'url': 'https://www.bilibili.com/audio/au1003142',
1733 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1734 'info_dict': {
1735 'id': '1003142',
1736 'ext': 'm4a',
1737 'title': '【tsukimi】YELLOW / 神山羊',
1738 'artist': 'tsukimi',
1739 'comment_count': int,
1740 'description': 'YELLOW的mp3版!',
1741 'duration': 183,
1742 'subtitles': {
1743 'origin': [{
1744 'ext': 'lrc',
1747 'thumbnail': r're:^https?://.+\.jpg',
1748 'timestamp': 1564836614,
1749 'upload_date': '20190803',
1750 'uploader': 'tsukimi-つきみぐー',
1751 'view_count': int,
1755 def _real_extract(self, url):
1756 au_id = self._match_id(url)
1758 play_data = self._call_api('url', au_id)
1759 formats = [{
1760 'url': play_data['cdns'][0],
1761 'filesize': int_or_none(play_data.get('size')),
1762 'vcodec': 'none',
1765 for a_format in formats:
1766 a_format.setdefault('http_headers', {}).update({
1767 'Referer': url,
1770 song = self._call_api('song/info', au_id)
1771 title = song['title']
1772 statistic = song.get('statistic') or {}
1774 subtitles = None
1775 lyric = song.get('lyric')
1776 if lyric:
1777 subtitles = {
1778 'origin': [{
1779 'url': lyric,
1783 return {
1784 'id': au_id,
1785 'title': title,
1786 'formats': formats,
1787 'artist': song.get('author'),
1788 'comment_count': int_or_none(statistic.get('comment')),
1789 'description': song.get('intro'),
1790 'duration': int_or_none(song.get('duration')),
1791 'subtitles': subtitles,
1792 'thumbnail': song.get('cover'),
1793 'timestamp': int_or_none(song.get('passtime')),
1794 'uploader': song.get('uname'),
1795 'view_count': int_or_none(statistic.get('play')),
1799 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1800 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1801 _TEST = {
1802 'url': 'https://www.bilibili.com/audio/am10624',
1803 'info_dict': {
1804 'id': '10624',
1805 'title': '每日新曲推荐(每日11:00更新)',
1806 'description': '每天11:00更新,为你推送最新音乐',
1808 'playlist_count': 19,
1811 def _real_extract(self, url):
1812 am_id = self._match_id(url)
1814 songs = self._call_api(
1815 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1817 entries = []
1818 for song in songs:
1819 sid = str_or_none(song.get('id'))
1820 if not sid:
1821 continue
1822 entries.append(self.url_result(
1823 'https://www.bilibili.com/audio/au' + sid,
1824 BilibiliAudioIE.ie_key(), sid))
1826 if entries:
1827 album_data = self._call_api('menu/info', am_id) or {}
1828 album_title = album_data.get('title')
1829 if album_title:
1830 for entry in entries:
1831 entry['album'] = album_title
1832 return self.playlist_result(
1833 entries, am_id, album_title, album_data.get('intro'))
1835 return self.playlist_result(entries, am_id)
1838 class BiliBiliPlayerIE(InfoExtractor):
1839 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1840 _TEST = {
1841 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1842 'only_matching': True,
1845 def _real_extract(self, url):
1846 video_id = self._match_id(url)
1847 return self.url_result(
1848 f'http://www.bilibili.tv/video/av{video_id}/',
1849 ie=BiliBiliIE.ie_key(), video_id=video_id)
1852 class BiliIntlBaseIE(InfoExtractor):
1853 _API_URL = 'https://api.bilibili.tv/intl/gateway'
1854 _NETRC_MACHINE = 'biliintl'
1855 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1857 def _call_api(self, endpoint, *args, **kwargs):
1858 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1859 if json.get('code'):
1860 if json['code'] in (10004004, 10004005, 10023006):
1861 self.raise_login_required()
1862 elif json['code'] == 10004001:
1863 self.raise_geo_restricted()
1864 else:
1865 if json.get('message') and str(json['code']) != json['message']:
1866 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1867 else:
1868 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1869 if kwargs.get('fatal'):
1870 raise ExtractorError(errmsg)
1871 else:
1872 self.report_warning(errmsg)
1873 return json.get('data')
1875 def json2srt(self, json):
1876 return '\n\n'.join(
1877 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1878 for i, line in enumerate(traverse_obj(json, (
1879 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1881 def _get_subtitles(self, *, ep_id=None, aid=None):
1882 sub_json = self._call_api(
1883 '/web/v2/subtitle', ep_id or aid, fatal=False,
1884 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1885 query=filter_dict({
1886 'platform': 'web',
1887 's_locale': 'en_US',
1888 'episode_id': ep_id,
1889 'aid': aid,
1890 })) or {}
1891 subtitles = {}
1892 fetched_urls = set()
1893 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1894 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1895 if url in fetched_urls:
1896 continue
1897 fetched_urls.add(url)
1898 sub_ext = determine_ext(url)
1899 sub_lang = sub.get('lang_key') or 'en'
1901 if sub_ext == 'ass':
1902 subtitles.setdefault(sub_lang, []).append({
1903 'ext': 'ass',
1904 'url': url,
1906 elif sub_ext == 'json':
1907 sub_data = self._download_json(
1908 url, ep_id or aid, fatal=False,
1909 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1910 errnote='Unable to download subtitles')
1912 if sub_data:
1913 subtitles.setdefault(sub_lang, []).append({
1914 'ext': 'srt',
1915 'data': self.json2srt(sub_data),
1917 else:
1918 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1920 return subtitles
1922 def _get_formats(self, *, ep_id=None, aid=None):
1923 video_json = self._call_api(
1924 '/web/playurl', ep_id or aid, note='Downloading video formats',
1925 errnote='Unable to download video formats', query=filter_dict({
1926 'platform': 'web',
1927 'ep_id': ep_id,
1928 'aid': aid,
1930 video_json = video_json['playurl']
1931 formats = []
1932 for vid in video_json.get('video') or []:
1933 video_res = vid.get('video_resource') or {}
1934 video_info = vid.get('stream_info') or {}
1935 if not video_res.get('url'):
1936 continue
1937 formats.append({
1938 'url': video_res['url'],
1939 'ext': 'mp4',
1940 'format_note': video_info.get('desc_words'),
1941 'width': video_res.get('width'),
1942 'height': video_res.get('height'),
1943 'vbr': video_res.get('bandwidth'),
1944 'acodec': 'none',
1945 'vcodec': video_res.get('codecs'),
1946 'filesize': video_res.get('size'),
1948 for aud in video_json.get('audio_resource') or []:
1949 if not aud.get('url'):
1950 continue
1951 formats.append({
1952 'url': aud['url'],
1953 'ext': 'mp4',
1954 'abr': aud.get('bandwidth'),
1955 'acodec': aud.get('codecs'),
1956 'vcodec': 'none',
1957 'filesize': aud.get('size'),
1960 return formats
1962 def _parse_video_metadata(self, video_data):
1963 return {
1964 'title': video_data.get('title_display') or video_data.get('title'),
1965 'description': video_data.get('desc'),
1966 'thumbnail': video_data.get('cover'),
1967 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1968 'episode_number': int_or_none(self._search_regex(
1969 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1972 def _perform_login(self, username, password):
1973 if not Cryptodome.RSA:
1974 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1976 key_data = self._download_json(
1977 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1978 note='Downloading login key', errnote='Unable to download login key')['data']
1980 public_key = Cryptodome.RSA.importKey(key_data['key'])
1981 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1982 login_post = self._download_json(
1983 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1984 data=urlencode_postdata({
1985 'username': username,
1986 'password': base64.b64encode(password_hash).decode('ascii'),
1987 'keep_me': 'true',
1988 's_locale': 'en_US',
1989 'isTrusted': 'true',
1990 }), note='Logging in', errnote='Unable to log in')
1991 if login_post.get('code'):
1992 if login_post.get('message'):
1993 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1994 else:
1995 raise ExtractorError('Unable to log in')
1998 class BiliIntlIE(BiliIntlBaseIE):
1999 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
2000 _TESTS = [{
2001 # Bstation page
2002 'url': 'https://www.bilibili.tv/en/play/34613/341736',
2003 'info_dict': {
2004 'id': '341736',
2005 'ext': 'mp4',
2006 'title': 'E2 - The First Night',
2007 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2008 'episode_number': 2,
2009 'upload_date': '20201009',
2010 'episode': 'Episode 2',
2011 'timestamp': 1602259500,
2012 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2013 'chapters': [{
2014 'start_time': 0,
2015 'end_time': 76.242,
2016 'title': '<Untitled Chapter 1>',
2017 }, {
2018 'start_time': 76.242,
2019 'end_time': 161.161,
2020 'title': 'Intro',
2021 }, {
2022 'start_time': 1325.742,
2023 'end_time': 1403.903,
2024 'title': 'Outro',
2027 }, {
2028 # Non-Bstation page
2029 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2030 'info_dict': {
2031 'id': '11005006',
2032 'ext': 'mp4',
2033 'title': 'E3 - Who?',
2034 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2035 'episode_number': 3,
2036 'description': 'md5:e1a775e71a35c43f141484715470ad09',
2037 'episode': 'Episode 3',
2038 'upload_date': '20211219',
2039 'timestamp': 1639928700,
2040 'chapters': [{
2041 'start_time': 0,
2042 'end_time': 88.0,
2043 'title': '<Untitled Chapter 1>',
2044 }, {
2045 'start_time': 88.0,
2046 'end_time': 156.0,
2047 'title': 'Intro',
2048 }, {
2049 'start_time': 1173.0,
2050 'end_time': 1259.535,
2051 'title': 'Outro',
2054 }, {
2055 # Subtitle with empty content
2056 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2057 'info_dict': {
2058 'id': '10131790',
2059 'ext': 'mp4',
2060 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2061 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2062 'episode_number': 140,
2064 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2065 }, {
2066 # episode comment extraction
2067 'url': 'https://www.bilibili.tv/en/play/34580/340317',
2068 'info_dict': {
2069 'id': '340317',
2070 'ext': 'mp4',
2071 'timestamp': 1604057820,
2072 'upload_date': '20201030',
2073 'episode_number': 5,
2074 'title': 'E5 - My Own Steel',
2075 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2076 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2077 'episode': 'Episode 5',
2078 'comment_count': int,
2079 'chapters': [{
2080 'start_time': 0,
2081 'end_time': 61.0,
2082 'title': '<Untitled Chapter 1>',
2083 }, {
2084 'start_time': 61.0,
2085 'end_time': 134.0,
2086 'title': 'Intro',
2087 }, {
2088 'start_time': 1290.0,
2089 'end_time': 1379.0,
2090 'title': 'Outro',
2093 'params': {
2094 'getcomments': True,
2096 }, {
2097 # user generated content comment extraction
2098 'url': 'https://www.bilibili.tv/en/video/2045730385',
2099 'info_dict': {
2100 'id': '2045730385',
2101 'ext': 'mp4',
2102 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2103 'timestamp': 1667891924,
2104 'upload_date': '20221108',
2105 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2106 'comment_count': int,
2107 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2109 'params': {
2110 'getcomments': True,
2112 }, {
2113 # episode id without intro and outro
2114 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2115 'info_dict': {
2116 'id': '11246489',
2117 'ext': 'mp4',
2118 'title': 'E1 - Operation \'Strix\' <Owl>',
2119 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2120 'timestamp': 1649516400,
2121 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2122 'episode': 'Episode 1',
2123 'episode_number': 1,
2124 'upload_date': '20220409',
2126 }, {
2127 'url': 'https://www.biliintl.com/en/play/34613/341736',
2128 'only_matching': True,
2129 }, {
2130 # User-generated content (as opposed to a series licensed from a studio)
2131 'url': 'https://bilibili.tv/en/video/2019955076',
2132 'only_matching': True,
2133 }, {
2134 # No language in URL
2135 'url': 'https://www.bilibili.tv/video/2019955076',
2136 'only_matching': True,
2137 }, {
2138 # Uppercase language in URL
2139 'url': 'https://www.bilibili.tv/EN/video/2019955076',
2140 'only_matching': True,
2143 @staticmethod
2144 def _make_url(video_id, series_id=None):
2145 if series_id:
2146 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2147 return f'https://www.bilibili.tv/en/video/{video_id}'
2149 def _extract_video_metadata(self, url, video_id, season_id):
2150 url, smuggled_data = unsmuggle_url(url, {})
2151 if smuggled_data.get('title'):
2152 return smuggled_data
2154 webpage = self._download_webpage(url, video_id)
2155 # Bstation layout
2156 initial_data = (
2157 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2158 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2159 video_data = traverse_obj(
2160 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2162 if season_id and not video_data:
2163 # Non-Bstation layout, read through episode list
2164 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2165 video_data = traverse_obj(season_json, (
2166 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2167 ), expected_type=dict, get_all=False)
2169 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2170 return merge_dicts(
2171 self._parse_video_metadata(video_data), {
2172 'title': get_element_by_class(
2173 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2174 'description': get_element_by_class(
2175 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2176 }, self._search_json_ld(webpage, video_id, default={}))
2178 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2179 comment_api_raw_data = self._download_json(
2180 'https://api.bilibili.tv/reply/web/detail', display_id,
2181 note=f'Downloading reply comment of {root_id} - {next_id}',
2182 query={
2183 'platform': 'web',
2184 'ps': 20, # comment's reply per page (default: 3)
2185 'root': root_id,
2186 'next': next_id,
2189 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2190 yield {
2191 'author': traverse_obj(replies, ('member', 'name')),
2192 'author_id': traverse_obj(replies, ('member', 'mid')),
2193 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2194 'text': traverse_obj(replies, ('content', 'message')),
2195 'id': replies.get('rpid'),
2196 'like_count': int_or_none(replies.get('like_count')),
2197 'parent': replies.get('parent'),
2198 'timestamp': unified_timestamp(replies.get('ctime_text')),
2201 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2202 yield from self._get_comments_reply(
2203 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2205 def _get_comments(self, video_id, ep_id):
2206 for i in itertools.count(0):
2207 comment_api_raw_data = self._download_json(
2208 'https://api.bilibili.tv/reply/web/root', video_id,
2209 note=f'Downloading comment page {i + 1}',
2210 query={
2211 'platform': 'web',
2212 'pn': i, # page number
2213 'ps': 20, # comment per page (default: 20)
2214 'oid': video_id,
2215 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2216 'sort_type': 1, # 1: best, 2: recent
2219 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2220 yield {
2221 'author': traverse_obj(replies, ('member', 'name')),
2222 'author_id': traverse_obj(replies, ('member', 'mid')),
2223 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2224 'text': traverse_obj(replies, ('content', 'message')),
2225 'id': replies.get('rpid'),
2226 'like_count': int_or_none(replies.get('like_count')),
2227 'timestamp': unified_timestamp(replies.get('ctime_text')),
2228 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2230 if replies.get('count'):
2231 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2233 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2234 break
2236 def _real_extract(self, url):
2237 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2238 video_id = ep_id or aid
2239 chapters = None
2241 if ep_id:
2242 intro_ending_json = self._call_api(
2243 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2244 video_id, fatal=False) or {}
2245 if intro_ending_json.get('skip'):
2246 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2247 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2248 chapters = [{
2249 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2250 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2251 'title': 'Intro',
2252 }, {
2253 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2254 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2255 'title': 'Outro',
2258 return {
2259 'id': video_id,
2260 **self._extract_video_metadata(url, video_id, season_id),
2261 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2262 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2263 'chapters': chapters,
2264 '__post_extractor': self.extract_comments(video_id, ep_id),
2265 'http_headers': self._HEADERS,
2269 class BiliIntlSeriesIE(BiliIntlBaseIE):
2270 IE_NAME = 'biliIntl:series'
2271 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2272 _TESTS = [{
2273 'url': 'https://www.bilibili.tv/en/play/34613',
2274 'playlist_mincount': 15,
2275 'info_dict': {
2276 'id': '34613',
2277 'title': 'TONIKAWA: Over the Moon For You',
2278 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2279 'categories': ['Slice of life', 'Comedy', 'Romance'],
2280 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2281 'view_count': int,
2283 'params': {
2284 'skip_download': True,
2286 }, {
2287 'url': 'https://www.bilibili.tv/en/media/1048837',
2288 'info_dict': {
2289 'id': '1048837',
2290 'title': 'SPY×FAMILY',
2291 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2292 'categories': ['Adventure', 'Action', 'Comedy'],
2293 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2294 'view_count': int,
2296 'playlist_mincount': 25,
2297 }, {
2298 'url': 'https://www.biliintl.com/en/play/34613',
2299 'only_matching': True,
2300 }, {
2301 'url': 'https://www.biliintl.com/EN/play/34613',
2302 'only_matching': True,
2305 def _entries(self, series_id):
2306 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2307 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2308 episode_id = str(episode['episode_id'])
2309 yield self.url_result(smuggle_url(
2310 BiliIntlIE._make_url(episode_id, series_id),
2311 self._parse_video_metadata(episode),
2312 ), BiliIntlIE, episode_id)
2314 def _real_extract(self, url):
2315 series_id = self._match_id(url)
2316 series_info = self._call_api(
2317 f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2318 return self.playlist_result(
2319 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2320 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2321 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2324 class BiliLiveIE(InfoExtractor):
2325 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2327 _TESTS = [{
2328 'url': 'https://live.bilibili.com/196',
2329 'info_dict': {
2330 'id': '33989',
2331 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
2332 'ext': 'flv',
2333 'title': '太空狼人杀联动,不被爆杀就算赢',
2334 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2335 'timestamp': 1650802769,
2337 'skip': 'not live',
2338 }, {
2339 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2340 'only_matching': True,
2341 }, {
2342 'url': 'https://live.bilibili.com/blanc/196',
2343 'only_matching': True,
2346 _FORMATS = {
2347 80: {'format_id': 'low', 'format_note': '流畅'},
2348 150: {'format_id': 'high_res', 'format_note': '高清'},
2349 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2350 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2351 10000: {'format_id': 'source', 'format_note': '原画'},
2352 20000: {'format_id': '4K', 'format_note': '4K'},
2353 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2356 _quality = staticmethod(qualities(list(_FORMATS)))
2358 def _call_api(self, path, room_id, query):
2359 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2360 if api_result.get('code') != 0:
2361 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2362 return api_result.get('data') or {}
2364 def _parse_formats(self, qn, fmt):
2365 for codec in fmt.get('codec') or []:
2366 if codec.get('current_qn') != qn:
2367 continue
2368 for url_info in codec['url_info']:
2369 yield {
2370 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2371 'ext': fmt.get('format_name'),
2372 'vcodec': codec.get('codec_name'),
2373 'quality': self._quality(qn),
2374 **self._FORMATS[qn],
2377 def _real_extract(self, url):
2378 room_id = self._match_id(url)
2379 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2380 if room_data.get('live_status') == 0:
2381 raise ExtractorError('Streamer is not live', expected=True)
2383 formats = []
2384 for qn in self._FORMATS:
2385 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2386 'room_id': room_id,
2387 'qn': qn,
2388 'codec': '0,1',
2389 'format': '0,2',
2390 'mask': '0',
2391 'no_playurl': '0',
2392 'platform': 'web',
2393 'protocol': '0,1',
2395 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2396 formats.extend(self._parse_formats(qn, fmt))
2398 return {
2399 'id': room_id,
2400 'title': room_data.get('title'),
2401 'description': room_data.get('description'),
2402 'thumbnail': room_data.get('user_cover'),
2403 'timestamp': stream_data.get('live_time'),
2404 'formats': formats,
2405 'is_live': True,
2406 'http_headers': {
2407 'Referer': url,