22 from .common
import InfoExtractor
, SearchInfoExtractor
23 from .openload
import PhantomJSwrapper
24 from ..jsinterp
import JSInterpreter
25 from ..networking
.exceptions
import HTTPError
, TransportError
, network_exceptions
72 STREAMING_DATA_CLIENT_NAME
= '__yt_dlp_client'
73 STREAMING_DATA_PO_TOKEN
= '__yt_dlp_po_token'
75 # any clients starting with _ cannot be explicitly requested by the user
78 'INNERTUBE_CONTEXT': {
81 'clientVersion': '2.20240726.00.00',
84 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
85 'REQUIRE_PO_TOKEN': True,
87 # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
89 'INNERTUBE_CONTEXT': {
92 'clientVersion': '2.20240726.00.00',
93 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
97 'REQUIRE_PO_TOKEN': True,
100 'INNERTUBE_CONTEXT': {
102 'clientName': 'WEB_EMBEDDED_PLAYER',
103 'clientVersion': '1.20240723.01.00',
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
109 'INNERTUBE_HOST': 'music.youtube.com',
110 'INNERTUBE_CONTEXT': {
112 'clientName': 'WEB_REMIX',
113 'clientVersion': '1.20240724.00.00',
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
118 # This client now requires sign-in for every video
120 'INNERTUBE_CONTEXT': {
122 'clientName': 'WEB_CREATOR',
123 'clientVersion': '1.20240723.03.00',
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
127 'REQUIRE_AUTH': True,
130 'INNERTUBE_CONTEXT': {
132 'clientName': 'ANDROID',
133 'clientVersion': '19.44.38',
134 'androidSdkVersion': 30,
135 'userAgent': 'com.google.android.youtube/19.44.38 (Linux; U; Android 11) gzip',
140 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
141 'REQUIRE_JS_PLAYER': False,
142 'REQUIRE_PO_TOKEN': True,
144 # This client now requires sign-in for every video
146 'INNERTUBE_CONTEXT': {
148 'clientName': 'ANDROID_MUSIC',
149 'clientVersion': '7.27.52',
150 'androidSdkVersion': 30,
151 'userAgent': 'com.google.android.apps.youtube.music/7.27.52 (Linux; U; Android 11) gzip',
156 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
157 'REQUIRE_JS_PLAYER': False,
158 'REQUIRE_PO_TOKEN': True,
159 'REQUIRE_AUTH': True,
161 # This client now requires sign-in for every video
163 'INNERTUBE_CONTEXT': {
165 'clientName': 'ANDROID_CREATOR',
166 'clientVersion': '24.45.100',
167 'androidSdkVersion': 30,
168 'userAgent': 'com.google.android.apps.youtube.creator/24.45.100 (Linux; U; Android 11) gzip',
173 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
174 'REQUIRE_JS_PLAYER': False,
175 'REQUIRE_PO_TOKEN': True,
176 'REQUIRE_AUTH': True,
178 # YouTube Kids videos aren't returned on this client for some reason
180 'INNERTUBE_CONTEXT': {
182 'clientName': 'ANDROID_VR',
183 'clientVersion': '1.60.19',
184 'deviceMake': 'Oculus',
185 'deviceModel': 'Quest 3',
186 'androidSdkVersion': 32,
187 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
192 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
193 'REQUIRE_JS_PLAYER': False,
195 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
196 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
198 'INNERTUBE_CONTEXT': {
201 'clientVersion': '19.45.4',
202 'deviceMake': 'Apple',
203 'deviceModel': 'iPhone16,2',
204 'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
206 'osVersion': '18.1.0.22B83',
209 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
210 'REQUIRE_JS_PLAYER': False,
212 # This client now requires sign-in for every video
214 'INNERTUBE_CONTEXT': {
216 'clientName': 'IOS_MUSIC',
217 'clientVersion': '7.27.0',
218 'deviceMake': 'Apple',
219 'deviceModel': 'iPhone16,2',
220 'userAgent': 'com.google.ios.youtubemusic/7.27.0 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
222 'osVersion': '18.1.0.22B83',
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
226 'REQUIRE_JS_PLAYER': False,
227 'REQUIRE_AUTH': True,
229 # This client now requires sign-in for every video
231 'INNERTUBE_CONTEXT': {
233 'clientName': 'IOS_CREATOR',
234 'clientVersion': '24.45.100',
235 'deviceMake': 'Apple',
236 'deviceModel': 'iPhone16,2',
237 'userAgent': 'com.google.ios.ytcreator/24.45.100 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
239 'osVersion': '18.1.0.22B83',
242 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
243 'REQUIRE_JS_PLAYER': False,
244 'REQUIRE_AUTH': True,
246 # mweb has 'ultralow' formats
247 # See: https://github.com/yt-dlp/yt-dlp/pull/557
249 'INNERTUBE_CONTEXT': {
251 'clientName': 'MWEB',
252 'clientVersion': '2.20240726.01.00',
255 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
258 'INNERTUBE_CONTEXT': {
260 'clientName': 'TVHTML5',
261 'clientVersion': '7.20240724.13.00',
264 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
266 # This client now requires sign-in for every video
267 # It was previously an age-gate workaround for videos that were `playable_in_embed`
268 # It may still be useful if signed into an EU account that is not age-verified
270 'INNERTUBE_CONTEXT': {
272 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
273 'clientVersion': '2.0',
276 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
277 'REQUIRE_AUTH': True,
279 # This client now requires sign-in for every video
280 # It may be able to receive pre-merged video+audio 720p/1080p streams
282 'INNERTUBE_CONTEXT': {
284 'clientName': 'MEDIA_CONNECT_FRONTEND',
285 'clientVersion': '0.1',
288 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
289 'REQUIRE_JS_PLAYER': False,
290 'REQUIRE_AUTH': True,
295 def _split_innertube_client(client_name
):
296 variant
, *base
= client_name
.rsplit('.', 1)
298 return variant
, base
[0], variant
299 base
, *variant
= client_name
.split('_', 1)
300 return client_name
, base
, variant
[0] if variant
else None
303 def short_client_name(client_name
):
304 main
, *parts
= _split_innertube_client(client_name
)[0].split('_')
305 return join_nonempty(main
[:4], ''.join(x
[0] for x
in parts
)).upper()
308 def build_innertube_clients():
310 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
312 BASE_CLIENTS
= ('ios', 'web', 'tv', 'mweb', 'android')
313 priority
= qualities(BASE_CLIENTS
[::-1])
315 for client
, ytcfg
in tuple(INNERTUBE_CLIENTS
.items()):
316 ytcfg
.setdefault('INNERTUBE_HOST', 'www.youtube.com')
317 ytcfg
.setdefault('REQUIRE_JS_PLAYER', True)
318 ytcfg
.setdefault('REQUIRE_PO_TOKEN', False)
319 ytcfg
.setdefault('REQUIRE_AUTH', False)
320 ytcfg
.setdefault('PLAYER_PARAMS', None)
321 ytcfg
['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
323 _
, base_client
, variant
= _split_innertube_client(client
)
324 ytcfg
['priority'] = 10 * priority(base_client
)
326 if variant
== 'embedded':
327 ytcfg
['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
328 ytcfg
['priority'] -= 2
330 ytcfg
['priority'] -= 3
333 build_innertube_clients()
336 class BadgeType(enum
.Enum
):
337 AVAILABILITY_UNLISTED
= enum
.auto()
338 AVAILABILITY_PRIVATE
= enum
.auto()
339 AVAILABILITY_PUBLIC
= enum
.auto()
340 AVAILABILITY_PREMIUM
= enum
.auto()
341 AVAILABILITY_SUBSCRIPTION
= enum
.auto()
342 LIVE_NOW
= enum
.auto()
343 VERIFIED
= enum
.auto()
346 class YoutubeBaseInfoExtractor(InfoExtractor
):
347 """Provide base functions for Youtube extractors"""
350 r
'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
351 r
'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
352 r
'browse|oembed|get_video_info|iframe_api|s/player|source|'
353 r
'storefront|oops|index|account|t/terms|about|upload|signin|logout')
355 _PLAYLIST_ID_RE
= r
'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
357 # _NETRC_MACHINE = 'youtube'
359 # If True it will raise an error if no login info is provided
360 _LOGIN_REQUIRED
= False
363 # invidious-redirect websites
364 r
'(?:www\.)?redirect\.invidious\.io',
365 r
'(?:(?:www|dev)\.)?invidio\.us',
366 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
367 r
'(?:www\.)?invidious\.pussthecat\.org',
368 r
'(?:www\.)?invidious\.zee\.li',
369 r
'(?:www\.)?invidious\.ethibox\.fr',
370 r
'(?:www\.)?iv\.ggtyler\.dev',
371 r
'(?:www\.)?inv\.vern\.i2p',
372 r
'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
373 r
'(?:www\.)?inv\.riverside\.rocks',
374 r
'(?:www\.)?invidious\.silur\.me',
375 r
'(?:www\.)?inv\.bp\.projectsegfau\.lt',
376 r
'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
377 r
'(?:www\.)?invidious\.slipfox\.xyz',
378 r
'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
379 r
'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
380 r
'(?:www\.)?invidious\.tiekoetter\.com',
381 r
'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
382 r
'(?:www\.)?invidious\.nerdvpn\.de',
383 r
'(?:www\.)?invidious\.weblibre\.org',
384 r
'(?:www\.)?inv\.odyssey346\.dev',
385 r
'(?:www\.)?invidious\.dhusch\.de',
386 r
'(?:www\.)?iv\.melmac\.space',
387 r
'(?:www\.)?watch\.thekitty\.zone',
388 r
'(?:www\.)?invidious\.privacydev\.net',
389 r
'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
390 r
'(?:www\.)?invidious\.drivet\.xyz',
391 r
'(?:www\.)?vid\.priv\.au',
392 r
'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
393 r
'(?:www\.)?inv\.vern\.cc',
394 r
'(?:www\.)?invidious\.esmailelbob\.xyz',
395 r
'(?:www\.)?invidious\.sethforprivacy\.com',
396 r
'(?:www\.)?yt\.oelrichsgarcia\.de',
397 r
'(?:www\.)?yt\.artemislena\.eu',
398 r
'(?:www\.)?invidious\.flokinet\.to',
399 r
'(?:www\.)?invidious\.baczek\.me',
400 r
'(?:www\.)?y\.com\.sb',
401 r
'(?:www\.)?invidious\.epicsite\.xyz',
402 r
'(?:www\.)?invidious\.lidarshield\.cloud',
403 r
'(?:www\.)?yt\.funami\.tech',
404 r
'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
405 r
'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
406 r
'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
407 # youtube-dl invidious instances list
408 r
'(?:(?:www|no)\.)?invidiou\.sh',
409 r
'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
410 r
'(?:www\.)?invidious\.kabi\.tk',
411 r
'(?:www\.)?invidious\.mastodon\.host',
412 r
'(?:www\.)?invidious\.zapashcanon\.fr',
413 r
'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
414 r
'(?:www\.)?invidious\.tinfoil-hat\.net',
415 r
'(?:www\.)?invidious\.himiko\.cloud',
416 r
'(?:www\.)?invidious\.reallyancient\.tech',
417 r
'(?:www\.)?invidious\.tube',
418 r
'(?:www\.)?invidiou\.site',
419 r
'(?:www\.)?invidious\.site',
420 r
'(?:www\.)?invidious\.xyz',
421 r
'(?:www\.)?invidious\.nixnet\.xyz',
422 r
'(?:www\.)?invidious\.048596\.xyz',
423 r
'(?:www\.)?invidious\.drycat\.fr',
424 r
'(?:www\.)?inv\.skyn3t\.in',
425 r
'(?:www\.)?tube\.poal\.co',
426 r
'(?:www\.)?tube\.connect\.cafe',
427 r
'(?:www\.)?vid\.wxzm\.sx',
428 r
'(?:www\.)?vid\.mint\.lgbt',
429 r
'(?:www\.)?vid\.puffyan\.us',
430 r
'(?:www\.)?yewtu\.be',
431 r
'(?:www\.)?yt\.elukerio\.org',
432 r
'(?:www\.)?yt\.lelux\.fi',
433 r
'(?:www\.)?invidious\.ggc-project\.de',
434 r
'(?:www\.)?yt\.maisputain\.ovh',
435 r
'(?:www\.)?ytprivate\.com',
436 r
'(?:www\.)?invidious\.13ad\.de',
437 r
'(?:www\.)?invidious\.toot\.koeln',
438 r
'(?:www\.)?invidious\.fdn\.fr',
439 r
'(?:www\.)?watch\.nettohikari\.com',
440 r
'(?:www\.)?invidious\.namazso\.eu',
441 r
'(?:www\.)?invidious\.silkky\.cloud',
442 r
'(?:www\.)?invidious\.exonip\.de',
443 r
'(?:www\.)?invidious\.riverside\.rocks',
444 r
'(?:www\.)?invidious\.blamefran\.net',
445 r
'(?:www\.)?invidious\.moomoo\.de',
446 r
'(?:www\.)?ytb\.trom\.tf',
447 r
'(?:www\.)?yt\.cyberhost\.uk',
448 r
'(?:www\.)?kgg2m7yk5aybusll\.onion',
449 r
'(?:www\.)?qklhadlycap4cnod\.onion',
450 r
'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
451 r
'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
452 r
'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
453 r
'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
454 r
'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
455 r
'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
456 r
'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
457 r
'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
458 r
'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
459 r
'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
460 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
461 r
'(?:www\.)?piped\.kavin\.rocks',
462 r
'(?:www\.)?piped\.tokhmi\.xyz',
463 r
'(?:www\.)?piped\.syncpundit\.io',
464 r
'(?:www\.)?piped\.mha\.fi',
465 r
'(?:www\.)?watch\.whatever\.social',
466 r
'(?:www\.)?piped\.garudalinux\.org',
467 r
'(?:www\.)?piped\.rivo\.lol',
468 r
'(?:www\.)?piped-libre\.kavin\.rocks',
469 r
'(?:www\.)?yt\.jae\.fi',
470 r
'(?:www\.)?piped\.mint\.lgbt',
472 r
'(?:www\.)?piped\.esmailelbob\.xyz',
473 r
'(?:www\.)?piped\.projectsegfau\.lt',
474 r
'(?:www\.)?piped\.privacydev\.net',
475 r
'(?:www\.)?piped\.palveluntarjoaja\.eu',
476 r
'(?:www\.)?piped\.smnz\.de',
477 r
'(?:www\.)?piped\.adminforge\.de',
478 r
'(?:www\.)?watch\.whatevertinfoil\.de',
479 r
'(?:www\.)?piped\.qdi\.fi',
480 r
'(?:(?:www|cf)\.)?piped\.video',
481 r
'(?:www\.)?piped\.aeong\.one',
482 r
'(?:www\.)?piped\.moomoo\.me',
483 r
'(?:www\.)?piped\.chauvet\.pro',
484 r
'(?:www\.)?watch\.leptons\.xyz',
485 r
'(?:www\.)?pd\.vern\.cc',
486 r
'(?:www\.)?piped\.hostux\.net',
487 r
'(?:www\.)?piped\.lunar\.icu',
488 # Hyperpipe instances from https://hyperpipe.codeberg.page/
489 r
'(?:www\.)?hyperpipe\.surge\.sh',
490 r
'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
491 r
'(?:www\.)?listen\.whatever\.social',
492 r
'(?:www\.)?music\.adminforge\.de',
495 # extracted from account/account_menu ep
496 # XXX: These are the supported YouTube UI and API languages,
497 # which is slightly different from languages supported for translation in YouTube studio
498 _SUPPORTED_LANG_CODES
= [
499 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
500 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
501 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
502 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
503 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
504 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
507 _IGNORED_WARNINGS
= {
508 'Unavailable videos will be hidden during playback',
509 'Unavailable videos are hidden',
512 _YT_HANDLE_RE
= r
'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
513 _YT_CHANNEL_UCID_RE
= r
'UC[\w-]{22}'
515 _NETRC_MACHINE
= 'youtube'
517 def ucid_or_none(self
, ucid
):
518 return self
._search
_regex
(rf
'^({self._YT_CHANNEL_UCID_RE})$', ucid
, 'UC-id', default
=None)
520 def handle_or_none(self
, handle
):
521 return self
._search
_regex
(rf
'^({self._YT_HANDLE_RE})$', handle
, '@-handle', default
=None)
523 def handle_from_url(self
, url
):
524 return self
._search
_regex
(rf
'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
525 url
, 'channel handle', default
=None)
527 def ucid_from_url(self
, url
):
528 return self
._search
_regex
(rf
'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
529 url
, 'channel id', default
=None)
531 @functools.cached_property
532 def _preferred_lang(self
):
534 Returns a language code supported by YouTube for the user preferred language.
535 Returns None if no preferred language set.
537 preferred_lang
= self
._configuration
_arg
('lang', ie_key
='Youtube', casesense
=True, default
=[''])[0]
538 if not preferred_lang
:
540 if preferred_lang
not in self
._SUPPORTED
_LANG
_CODES
:
541 raise ExtractorError(
542 f
'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
544 elif preferred_lang
!= 'en':
546 f
'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
547 return preferred_lang
549 def _initialize_consent(self
):
550 cookies
= self
._get
_cookies
('https://www.youtube.com/')
551 if cookies
.get('__Secure-3PSID'):
553 socs
= cookies
.get('SOCS')
554 if socs
and not socs
.value
.startswith('CAA'): # not consented
556 self
._set
_cookie
('.youtube.com', 'SOCS', 'CAI', secure
=True) # accept all (required for mixes)
558 def _initialize_pref(self
):
559 cookies
= self
._get
_cookies
('https://www.youtube.com/')
560 pref_cookie
= cookies
.get('PREF')
564 pref
= dict(urllib
.parse
.parse_qsl(pref_cookie
.value
))
566 self
.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
567 pref
.update({'hl': self
._preferred
_lang
or 'en', 'tz': 'UTC'})
568 self
._set
_cookie
('.youtube.com', name
='PREF', value
=urllib
.parse
.urlencode(pref
))
570 def _real_initialize(self
):
571 self
._initialize
_pref
()
572 self
._initialize
_consent
()
573 self
._check
_login
_required
()
575 def _perform_login(self
, username
, password
):
576 auth_type
, _
, user
= (username
or '').partition('+')
578 if auth_type
!= 'oauth':
579 raise ExtractorError(self
._youtube
_login
_hint
, expected
=True)
581 self
._initialize
_oauth
(user
, password
)
584 OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv).
586 For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see:
587 - https://developers.google.com/identity/protocols/oauth2/limited-input-device
588 - https://accounts.google.com/.well-known/openid-configuration
589 - https://www.rfc-editor.org/rfc/rfc8628
590 - https://www.rfc-editor.org/rfc/rfc6749
592 Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2,
593 which applies some modifications to the response (such as returning errors as 200 OK).
594 Since the client works with the standard API, we will use that as it is well-documented.
597 _OAUTH_PROFILE
= None
598 _OAUTH_ACCESS_TOKEN_CACHE
= {}
599 _OAUTH_DISPLAY_ID
= 'oauth'
601 # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv
602 _OAUTH_CLIENT_ID
= '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
603 _OAUTH_CLIENT_SECRET
= 'SboVhoG9s0rNafixCSGGKXAT'
604 _OAUTH_SCOPE
= 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content'
606 # From https://accounts.google.com/.well-known/openid-configuration
607 # Technically, these should be fetched dynamically and not hard-coded.
608 # However, as these endpoints rarely change, we can risk saving an extra request for every invocation.
609 _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT
= 'https://oauth2.googleapis.com/device/code'
610 _OAUTH_TOKEN_ENDPOINT
= 'https://oauth2.googleapis.com/token'
613 def _oauth_cache_key(self
):
614 return f
'oauth_refresh_token_{self._OAUTH_PROFILE}'
616 def _read_oauth_error_response(self
, response
):
618 self
._webpage
_read
_content
(response
, self
._OAUTH
_TOKEN
_ENDPOINT
, self
._OAUTH
_DISPLAY
_ID
, fatal
=False),
619 ({json
.loads
}, 'error', {str}
))
621 def _set_oauth_info(self
, token_response
):
622 YoutubeBaseInfoExtractor
._OAUTH
_ACCESS
_TOKEN
_CACHE
.setdefault(self
._OAUTH
_PROFILE
, {}).update({
623 'access_token': token_response
['access_token'],
624 'token_type': token_response
['token_type'],
625 'expiry': time_seconds(
626 seconds
=traverse_obj(token_response
, ('expires_in', {float_or_none}
), default
=300) - 10),
628 refresh_token
= traverse_obj(token_response
, ('refresh_token', {str}
))
630 self
.cache
.store(self
._NETRC
_MACHINE
, self
._oauth
_cache
_key
, refresh_token
)
631 YoutubeBaseInfoExtractor
._OAUTH
_ACCESS
_TOKEN
_CACHE
[self
._OAUTH
_PROFILE
]['refresh_token'] = refresh_token
633 def _initialize_oauth(self
, user
, refresh_token
):
634 self
._OAUTH
_PROFILE
= user
or 'default'
636 if self
._OAUTH
_PROFILE
in YoutubeBaseInfoExtractor
._OAUTH
_ACCESS
_TOKEN
_CACHE
:
637 self
.write_debug(f
'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"')
640 YoutubeBaseInfoExtractor
._OAUTH
_ACCESS
_TOKEN
_CACHE
[self
._OAUTH
_PROFILE
] = {}
643 msg
= f
'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
644 if self
.get_param('cachedir') is not False:
645 msg
+= ' and caching token to disk; you should supply an empty password next time'
647 self
.cache
.store(self
._NETRC
_MACHINE
, self
._oauth
_cache
_key
, refresh_token
)
649 refresh_token
= self
.cache
.load(self
._NETRC
_MACHINE
, self
._oauth
_cache
_key
)
652 YoutubeBaseInfoExtractor
._OAUTH
_ACCESS
_TOKEN
_CACHE
[self
._OAUTH
_PROFILE
]['refresh_token'] = refresh_token
654 token_response
= self
._refresh
_token
(refresh_token
)
655 except ExtractorError
as e
:
656 error_msg
= str(e
.orig_msg
).replace('Failed to refresh access token: ', '')
657 self
.report_warning(f
'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}')
658 token_response
= self
._oauth
_authorize
660 token_response
= self
._oauth
_authorize
662 self
._set
_oauth
_info
(token_response
)
663 self
.write_debug(f
'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"')
665 def _refresh_token(self
, refresh_token
):
667 token_response
= self
._download
_json
(
668 self
._OAUTH
_TOKEN
_ENDPOINT
,
669 video_id
=self
._OAUTH
_DISPLAY
_ID
,
670 note
='Refreshing access token',
672 'client_id': self
._OAUTH
_CLIENT
_ID
,
673 'client_secret': self
._OAUTH
_CLIENT
_SECRET
,
674 'refresh_token': refresh_token
,
675 'grant_type': 'refresh_token',
677 headers
={'Content-Type': 'application/json'})
678 except ExtractorError
as e
:
679 if isinstance(e
.cause
, HTTPError
):
680 error
= self
._read
_oauth
_error
_response
(e
.cause
.response
)
681 if error
== 'invalid_grant':
683 raise ExtractorError(
684 'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)',
685 expected
=True, video_id
=self
._OAUTH
_DISPLAY
_ID
)
686 raise ExtractorError(
687 f
'Failed to refresh access token: Authorization server returned error {error}',
688 video_id
=self
._OAUTH
_DISPLAY
_ID
)
690 return token_response
693 def _oauth_authorize(self
):
694 code_response
= self
._download
_json
(
695 self
._OAUTH
_DEVICE
_AUTHORIZATION
_ENDPOINT
,
696 video_id
=self
._OAUTH
_DISPLAY
_ID
,
697 note
='Initializing authorization flow',
699 'client_id': self
._OAUTH
_CLIENT
_ID
,
700 'scope': self
._OAUTH
_SCOPE
,
702 headers
={'Content-Type': 'application/json'})
704 verification_url
= traverse_obj(code_response
, ('verification_url', {str}
))
705 user_code
= traverse_obj(code_response
, ('user_code', {str}
))
706 if not verification_url
or not user_code
:
707 raise ExtractorError(
708 'Authorization server did not provide verification_url or user_code', video_id
=self
._OAUTH
_DISPLAY
_ID
)
710 # note: The whitespace is intentional
712 f
'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, '
713 f
'go to {verification_url} and enter code {user_code}')
715 # RFC8628 § 3.5: default poll interval is 5 seconds if not provided
716 poll_interval
= traverse_obj(code_response
, ('interval', {int}
), default
=5)
718 for retry
in self
.RetryManager():
721 token_response
= self
._download
_json
(
722 self
._OAUTH
_TOKEN
_ENDPOINT
,
723 video_id
=self
._OAUTH
_DISPLAY
_ID
,
725 errnote
='Failed to request access token',
727 'client_id': self
._OAUTH
_CLIENT
_ID
,
728 'client_secret': self
._OAUTH
_CLIENT
_SECRET
,
729 'device_code': code_response
['device_code'],
730 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code',
732 headers
={'Content-Type': 'application/json'})
733 except ExtractorError
as e
:
734 if isinstance(e
.cause
, TransportError
):
737 elif isinstance(e
.cause
, HTTPError
):
738 error
= self
._read
_oauth
_error
_response
(e
.cause
.response
)
743 if error
== 'authorization_pending':
744 time
.sleep(poll_interval
)
746 elif error
== 'expired_token':
747 raise ExtractorError(
748 'Authorization timed out', expected
=True, video_id
=self
._OAUTH
_DISPLAY
_ID
)
749 elif error
== 'access_denied':
750 raise ExtractorError(
751 'You denied access to an account', expected
=True, video_id
=self
._OAUTH
_DISPLAY
_ID
)
752 elif error
== 'slow_down':
753 # RFC8628 § 3.5: add 5 seconds to the poll interval
755 time
.sleep(poll_interval
)
758 raise ExtractorError(
759 f
'Authorization server returned an error when fetching access token: {error}',
760 video_id
=self
._OAUTH
_DISPLAY
_ID
)
763 return token_response
765 def _update_oauth(self
):
766 token
= YoutubeBaseInfoExtractor
._OAUTH
_ACCESS
_TOKEN
_CACHE
.get(self
._OAUTH
_PROFILE
)
767 if token
is None or token
['expiry'] > time
.time():
770 self
._set
_oauth
_info
(self
._refresh
_token
(token
['refresh_token']))
773 def _youtube_login_hint(self
):
774 return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, '
775 f
'or else u{self._login_hint(method="cookies")[1:]}. '
776 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. '
777 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies')
779 def _check_login_required(self
):
780 if self
._LOGIN
_REQUIRED
and not self
.is_authenticated
:
781 self
.raise_login_required(
782 f
'Login details are needed to download this content. {self._youtube_login_hint}', method
=None)
784 _YT_INITIAL_DATA_RE
= r
'(?:window\s*\[\s*["\']ytInitialData
["\']\s*\]|ytInitialData)\s*='
785 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
787 def _get_default_ytcfg(self, client='web'):
788 return copy.deepcopy(INNERTUBE_CLIENTS[client])
790 def _get_innertube_host(self, client='web'):
791 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
793 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
794 # try_get but with fallback to default ytcfg client values when present
795 _func = lambda y: try_get(y, getter, expected_type)
796 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
798 def _extract_client_name(self, ytcfg, default_client='web'):
799 return self._ytcfg_get_safe(
800 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
801 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
803 def _extract_client_version(self, ytcfg, default_client='web'):
804 return self._ytcfg_get_safe(
805 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
806 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
808 def _select_api_hostname(self, req_api_hostname, default_client=None):
809 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
810 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
812 def _extract_context(self, ytcfg=None, default_client='web'):
814 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
815 # Enforce language and tz for extraction
816 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
817 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
822 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
823 time_now = round(time.time())
824 if self._SAPISID is None:
825 yt_cookies = self._get_cookies('https://www.youtube.com')
826 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
827 # See: https://github.com/yt-dlp/yt-dlp/issues/393
828 sapisid_cookie = dict_get(
829 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
830 if sapisid_cookie and sapisid_cookie.value:
831 self._SAPISID = sapisid_cookie.value
832 self.write_debug('Extracted SAPISID cookie')
833 # SAPISID cookie is required if not already present
834 if not yt_cookies.get('SAPISID'):
835 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
837 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
839 self._SAPISID = False
840 if not self._SAPISID:
842 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
843 sapisidhash = hashlib.sha1(
844 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
845 return f'SAPISIDHASH {time_now}_{sapisidhash}'
847 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
848 note='Downloading API JSON', errnote='Unable to download API page',
849 context=None, api_key=None, api_hostname=None, default_client='web'):
851 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
853 real_headers = self.generate_api_headers(default_client=default_client)
854 real_headers.update({'content-type': 'application/json'})
856 real_headers.update(headers)
857 return self._download_json(
858 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
859 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
860 data=json.dumps(data).encode('utf8'), headers=real_headers,
862 'key': self._configuration_arg(
863 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
864 'prettyPrint': 'false',
865 }, cndn=lambda _, v: v))
867 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
868 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
871 def _extract_session_index(*data):
873 Index of current account in account list.
874 See: https://github.com/yt-dlp/yt-dlp/pull/519
877 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
878 if session_index is not None:
881 def _data_sync_id_to_delegated_session_id(self, data_sync_id):
884 # datasyncid is of the form "channel_syncid||user_syncid
" for secondary channel
885 # and just "user_syncid||
" for primary channel. We only want the channel_syncid
886 channel_syncid, _, user_syncid = data_sync_id.partition('||')
888 return channel_syncid
890 def _extract_account_syncid(self, *args):
892 Extract current session ID required to download private playlists of secondary channels
893 @params response and/or ytcfg
895 # ytcfg includes channel_syncid if on secondary channel
896 if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
899 data_sync_id = self._extract_data_sync_id(*args)
900 return self._data_sync_id_to_delegated_session_id(data_sync_id)
902 def _extract_data_sync_id(self, *args):
904 Extract current account dataSyncId.
905 In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID||
906 @params response and/or ytcfg
908 if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]:
912 args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any))
914 def _extract_visitor_data(self, *args):
916 Extracts visitorData from an API response or ytcfg
917 Appears to be used to track session state
919 if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]:
922 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
925 @functools.cached_property
926 def is_authenticated(self):
927 return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header())
929 def extract_ytcfg(self, video_id, webpage):
932 return self._parse_json(
934 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
935 default='{}'), video_id, fatal=False) or {}
937 def _generate_oauth_headers(self):
939 oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
944 'Authorization': f'{oauth_token["token_type
"]} {oauth_token["access_token
"]}',
947 def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
949 account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
951 headers['X-Goog-PageId'] = account_syncid
952 if session_index is None:
953 session_index = self._extract_session_index(ytcfg)
954 if account_syncid or session_index is not None:
955 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
957 auth = self._generate_sapisidhash_header(origin)
959 headers['Authorization'] = auth
960 headers['X-Origin'] = origin
964 def generate_api_headers(
965 self, *, ytcfg=None, account_syncid=None, session_index=None,
966 visitor_data=None, api_hostname=None, default_client='web', **kwargs):
968 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
970 'X-YouTube-Client-Name': str(
971 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
972 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
974 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
975 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
976 **self._generate_oauth_headers(),
977 **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
979 return filter_dict(headers)
981 def _generate_webpage_headers(self):
982 return self._generate_oauth_headers()
984 def _download_ytcfg(self, client, video_id):
986 'web': 'https://www.youtube.com',
987 'web_music': 'https://music.youtube.com',
988 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
992 webpage = self._download_webpage(
993 url, video_id, fatal=False, note=f'Downloading {client.replace("_
", " ").strip()} client config',
994 headers=self._generate_webpage_headers())
995 return self.extract_ytcfg(video_id, webpage) or {}
998 def _build_api_continuation_query(continuation, ctp=None):
1000 'continuation': continuation,
1002 # TODO: Inconsistency with clickTrackingParams.
1003 # Currently we have a fixed ctp contained within context (from ytcfg)
1004 # and a ctp in root query for continuation.
1006 query['clickTracking'] = {'clickTrackingParams': ctp}
1010 def _extract_next_continuation_data(cls, renderer):
1011 next_continuation = try_get(
1012 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
1013 lambda x: x['continuation']['reloadContinuationData']), dict)
1014 if not next_continuation:
1016 continuation = next_continuation.get('continuation')
1017 if not continuation:
1019 ctp = next_continuation.get('clickTrackingParams')
1020 return cls._build_api_continuation_query(continuation, ctp)
1023 def _extract_continuation_ep_data(cls, continuation_ep: dict):
1024 if isinstance(continuation_ep, dict):
1025 continuation = try_get(
1026 continuation_ep, lambda x: x['continuationCommand']['token'], str)
1027 if not continuation:
1029 ctp = continuation_ep.get('clickTrackingParams')
1030 return cls._build_api_continuation_query(continuation, ctp)
1033 def _extract_continuation(cls, renderer):
1034 next_continuation = cls._extract_next_continuation_data(renderer)
1035 if next_continuation:
1036 return next_continuation
1038 return traverse_obj(renderer, (
1039 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
1040 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
1041 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
1044 def _extract_alerts(cls, data):
1045 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
1046 if not isinstance(alert_dict, dict):
1048 for alert in alert_dict.values():
1049 alert_type = alert.get('type')
1052 message = cls._get_text(alert, 'text')
1054 yield alert_type, message
1056 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
1057 errors, warnings = [], []
1058 for alert_type, alert_message in alerts:
1059 if alert_type.lower() == 'error' and fatal:
1060 errors.append([alert_type, alert_message])
1061 elif alert_message not in self._IGNORED_WARNINGS:
1062 warnings.append([alert_type, alert_message])
1064 for alert_type, alert_message in (warnings + errors[:-1]):
1065 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
1067 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
1069 def _extract_and_report_alerts(self, data, *args, **kwargs):
1070 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
1072 def _extract_badges(self, badge_list: list):
1074 Extract known BadgeType's from a list of badge renderers.
1075 @returns [{'type': BadgeType}]
1078 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
1079 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
1080 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
1081 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
1082 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
1083 'CHECK': BadgeType.VERIFIED,
1087 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
1088 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
1089 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
1090 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
1091 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
1095 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
1096 'private': BadgeType.AVAILABILITY_PRIVATE,
1097 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
1098 'live': BadgeType.LIVE_NOW,
1099 'premium': BadgeType.AVAILABILITY_PREMIUM,
1100 'verified': BadgeType.VERIFIED,
1101 'official artist channel': BadgeType.VERIFIED,
1105 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
1107 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
1108 or badge_style_map.get(traverse_obj(badge, 'style'))
1111 badges.append({'type': badge_type})
1114 # fallback, won't work in some languages
1115 label = traverse_obj(
1116 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
1117 for match, label_badge_type in label_map.items():
1118 if match in label.lower():
1119 badges.append({'type': label_badge_type})
1125 def _has_badge(badges, badge_type):
1126 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
1129 def _get_text(data, *path_list, max_runs=None):
1130 for path in path_list or [None]:
1134 obj = traverse_obj(data, path, default=[])
1135 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
1138 text = try_get(item, lambda x: x['simpleText'], str)
1141 runs = try_get(item, lambda x: x['runs'], list) or []
1142 if not runs and isinstance(item, list):
1145 runs = runs[:min(len(runs), max_runs or len(runs))]
1146 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
1150 def _get_count(self, data, *path_list):
1151 count_text = self._get_text(data, *path_list) or ''
1152 count = parse_count(count_text)
1155 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
1159 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
1161 Extract thumbnails from thumbnails dict
1162 @param path_list: path list to level that contains 'thumbnails' key
1165 for path in path_list or [()]:
1166 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
1167 thumbnail_url = url_or_none(thumbnail.get('url'))
1168 if not thumbnail_url:
1170 # Sometimes youtube gives a wrong thumbnail URL. See:
1171 # https://github.com/yt-dlp/yt-dlp/issues/233
1172 # https://github.com/ytdl-org/youtube-dl/issues/28023
1173 if 'maxresdefault' in thumbnail_url:
1174 thumbnail_url = thumbnail_url.split('?')[0]
1176 'url': thumbnail_url,
1177 'height': int_or_none(thumbnail.get('height')),
1178 'width': int_or_none(thumbnail.get('width')),
1183 def extract_relative_time(relative_time_text):
1185 Extracts a relative time from string and converts to dt object
1186 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
1189 # XXX: this could be moved to a general function in utils/_utils.py
1190 # The relative time text strings are roughly the same as what
1191 # Javascript's Intl.RelativeTimeFormat function generates.
1192 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
1194 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
1197 start = mobj.group('start')
1199 return datetime_from_str(start)
1201 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
1205 def _parse_time_text(self, text):
1208 dt_ = self.extract_relative_time(text)
1210 if isinstance(dt_, dt.datetime):
1211 timestamp = calendar.timegm(dt_.timetuple())
1213 if timestamp is None:
1215 unified_timestamp(text) or unified_timestamp(
1217 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
1218 text.lower(), 'time text', default=None)))
1220 if text and timestamp is None and self._preferred_lang in (None, 'en'):
1221 self.report_warning(
1222 f'Cannot parse localized time text "{text}
"', only_once=True)
1225 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
1226 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
1227 default_client='web'):
1228 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
1229 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
1230 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
1231 icd_rm = next(icd_retries)
1232 main_retries = iter(self.RetryManager())
1233 main_rm = next(main_retries)
1234 # Manual retry loop for multiple RetryManagers
1235 # The proper RetryManager MUST be advanced after an error
1236 # and its result MUST be checked if the manager is non fatal
1239 response = self._call_api(
1240 ep=ep, fatal=True, headers=headers,
1241 video_id=item_id, query=query, note=note,
1242 context=self._extract_context(ytcfg, default_client),
1243 api_hostname=api_hostname, default_client=default_client)
1244 except ExtractorError as e:
1245 if not isinstance(e.cause, network_exceptions):
1246 return self._error_or_warning(e, fatal=fatal)
1247 elif not isinstance(e.cause, HTTPError):
1252 first_bytes = e.cause.response.read(512)
1253 if not is_html(first_bytes):
1256 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
1257 lambda x: x['error']['message'], str)
1259 self._report_alerts([('ERROR', yt_error)], fatal=False)
1260 # Downloading page may result in intermittent 5xx HTTP error
1261 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1262 # We also want to catch all other network exceptions since errors in later pages can be troublesome
1263 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
1264 if e.cause.status not in (403, 429):
1268 return self._error_or_warning(e, fatal=fatal)
1271 self._extract_and_report_alerts(response, only_once=True)
1272 except ExtractorError as e:
1273 # YouTube's servers may return errors we want to retry on in a 200 OK response
1274 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1275 if 'unknown error' in e.msg.lower():
1279 return self._error_or_warning(e, fatal=fatal)
1280 # Youtube sometimes sends incomplete data
1281 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1282 if not traverse_obj(response, *variadic(check_get_keys)):
1283 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1284 should_retry = next(icd_retries, None)
1285 if not should_retry:
1292 def is_music_url(url):
1293 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1295 def _extract_video(self, renderer):
1296 video_id = renderer.get('videoId')
1298 reel_header_renderer = traverse_obj(renderer, (
1299 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1300 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1302 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1303 description = self._get_text(renderer, 'descriptionSnippet')
1305 duration = int_or_none(renderer.get('lengthSeconds'))
1306 if duration is None:
1307 duration = parse_duration(self._get_text(
1308 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1309 if duration is None:
1310 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1311 duration = parse_duration(self._search_regex(
1312 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1313 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1314 video_id, default=None, group='duration'))
1316 channel_id = traverse_obj(
1317 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1318 expected_type=str, get_all=False)
1320 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1322 channel_id = self.ucid_or_none(channel_id)
1324 overlay_style = traverse_obj(
1325 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1326 get_all=False, expected_type=str)
1327 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1328 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1329 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1330 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1331 expected_type=str)) or ''
1332 url = f'https://www.youtube.com/watch?v={video_id}'
1333 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1334 url = f'https://www.youtube.com/shorts/{video_id}'
1336 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1337 or self._get_text(reel_header_renderer, 'timestampText') or '')
1338 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1341 'is_upcoming' if scheduled_timestamp is not None
1342 else 'was_live' if 'streamed' in time_text.lower()
1343 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1346 # videoInfo is a string like '50K views • 10 years ago'.
1347 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1348 view_count = (0 if 'no views' in view_count_text.lower()
1349 else self._get_count({'simpleText': view_count_text}))
1350 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1352 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1353 or self._get_text(reel_header_renderer, 'channelTitleText'))
1355 channel_handle = traverse_obj(renderer, (
1356 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1357 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1358 expected_type=self.handle_from_url, get_all=False)
1361 'ie_key': YoutubeIE.ie_key(),
1365 'description': description,
1366 'duration': duration,
1367 'channel_id': channel_id,
1369 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1370 'uploader': channel,
1371 'uploader_id': channel_handle,
1372 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1373 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1374 'timestamp': (self._parse_time_text(time_text)
1375 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1377 'release_timestamp': scheduled_timestamp,
1379 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1380 else self._availability(
1381 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1382 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1383 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1384 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1385 view_count_field: view_count,
1386 'live_status': live_status,
1387 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
1391 class YoutubeIE(YoutubeBaseInfoExtractor):
1393 _VALID_URL = r'''(?x)^
1395 (?:https?://|//) # http(s):// or protocol-independent URL
1396 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1397 (?:www\.)?deturl\.com/www\.youtube\.com|
1398 (?:www\.)?pwnyoutube\.com|
1399 (?:www\.)?hooktube\.com|
1400 (?:www\.)?yourepeat\.com|
1401 tube\.majestyc\.net|
1403 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1404 (?:.*?\#/)? # handle anchor (#/) redirect urls
1405 (?: # the various things that can precede the ID:
1406 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1407 |(?: # or the v= param in all its forms
1408 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1409 (?:\?|\#!?) # the params delimiter ? or # or #!
1410 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY)
1415 youtu\.be| # just youtu.be/xxxx
1416 vid\.plus| # or vid.plus/xxxx
1417 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1420 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1422 )? # all until now is optional -> you can pass the naked ID
1423 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1424 (?(1).+)? # if we found the ID, everything can follow
1426 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1431 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1439 (?P
<url
>(?
:https?
:)?
//(?
:www\
.)?
youtube(?
:-nocookie
)?\
.com
/
1440 (?
:embed|v|p
)/[0-9A
-Za
-z_
-]{11}
.*?
)
1442 # https://wordpress.org/plugins/lazy-load-for-videos/
1444 <a\s
[^
>]*\bhref
="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1445 \s
[^
>]*\bclass
="[^"]*\blazy
-load
-youtube
''',
1447 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1450 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1451 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1452 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1454 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1455 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1456 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1457 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1458 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1459 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1460 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1461 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1462 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1463 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1464 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1465 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1466 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1467 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1468 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1469 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1470 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1471 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1472 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1476 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1477 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1478 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1479 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1480 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1481 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1482 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1484 # Apple HTTP Live Streaming
1485 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1486 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1487 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1488 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1489 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1490 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1491 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1492 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1495 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1496 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1497 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1498 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1499 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1500 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1501 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1502 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1503 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1504 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1505 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1506 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1509 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1510 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1511 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1512 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1513 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1514 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1515 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1518 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1519 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1520 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1521 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1522 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1523 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1524 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1525 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1526 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1527 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1528 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1529 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1530 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1531 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1532 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1533 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1534 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1535 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1536 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1537 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1538 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1539 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1542 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1543 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1545 # Dash webm audio with opus inside
1546 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1547 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1548 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1551 '_rtmp': {'protocol': 'rtmp'},
1553 # av01 video only formats sometimes served with "unknown" codecs
1554 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1555 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1556 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1557 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1558 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1559 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1560 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1561 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1563 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1564 _DEFAULT_CLIENTS = ('ios', 'mweb')
1571 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1573 'id': 'BaW_jenozKc',
1575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1576 'channel': 'Philipp Hagemeister',
1577 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1578 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1579 'upload_date': '20121002',
1580 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1581 'categories': ['Science & Technology'],
1582 'tags': ['youtube-dl'],
1586 'availability': 'public',
1587 'playable_in_embed': True,
1588 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1589 'live_status': 'not_live',
1593 'comment_count': int,
1594 'channel_follower_count': int,
1595 'uploader': 'Philipp Hagemeister',
1596 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1597 'uploader_id': '@PhilippHagemeister',
1598 'heatmap': 'count:100',
1599 'timestamp': 1349198244,
1603 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1604 'note': 'Embed-only video (#1746)',
1606 'id': 'yZIXLfi8CZQ',
1608 'upload_date': '20120608',
1609 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1610 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1613 'skip': 'Private video',
1616 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1617 'note': 'Use the first video ID in the URL',
1619 'id': 'BaW_jenozKc',
1621 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1622 'channel': 'Philipp Hagemeister',
1623 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1624 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1625 'upload_date': '20121002',
1626 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1627 'categories': ['Science & Technology'],
1628 'tags': ['youtube-dl'],
1632 'availability': 'public',
1633 'playable_in_embed': True,
1634 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1635 'live_status': 'not_live',
1637 'comment_count': int,
1638 'channel_follower_count': int,
1639 'uploader': 'Philipp Hagemeister',
1640 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1641 'uploader_id': '@PhilippHagemeister',
1642 'heatmap': 'count:100',
1643 'timestamp': 1349198244,
1646 'skip_download': True,
1650 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1651 'note': '256k DASH audio (format 141) via DASH manifest',
1653 'id': 'a9LDPn-MO4I',
1655 'upload_date': '20121002',
1657 'title': 'UHDTV TEST 8K VIDEO.mp4',
1660 'youtube_include_dash_manifest': True,
1663 'skip': 'format 141 not served anymore',
1665 # DASH manifest with encrypted signature
1667 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1669 'id': 'IB3lcPjvWLA',
1671 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1672 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1674 'upload_date': '20131011',
1677 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1678 'playable_in_embed': True,
1679 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1681 'track': 'The Spark',
1682 'live_status': 'not_live',
1683 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1684 'channel': 'Afrojack',
1686 'availability': 'public',
1687 'categories': ['Music'],
1689 'alt_title': 'The Spark',
1690 'channel_follower_count': int,
1691 'uploader': 'Afrojack',
1692 'uploader_url': 'https://www.youtube.com/@Afrojack',
1693 'uploader_id': '@Afrojack',
1696 'youtube_include_dash_manifest': True,
1697 'format': '141/bestaudio[ext=m4a]',
1700 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1702 'note': 'Embed allowed age-gate video',
1703 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1705 'id': 'HtVdAasjOgU',
1707 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1708 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1710 'upload_date': '20140605',
1712 'categories': ['Gaming'],
1713 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1714 'availability': 'needs_auth',
1715 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1717 'channel': 'The Witcher',
1718 'live_status': 'not_live',
1720 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1721 'playable_in_embed': True,
1723 'channel_follower_count': int,
1724 'uploader': 'The Witcher',
1725 'uploader_url': 'https://www.youtube.com/@thewitcher',
1726 'uploader_id': '@thewitcher',
1727 'comment_count': int,
1728 'channel_is_verified': True,
1729 'heatmap': 'count:100',
1730 'timestamp': 1401991663,
1732 'skip': 'Age-restricted; requires authentication',
1735 'note': 'Age-gate video with embed allowed in public site',
1736 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1738 'id': 'HsUATh_Nc2U',
1740 'title': 'Godzilla 2 (Official Video)',
1741 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1742 'upload_date': '20200408',
1744 'availability': 'needs_auth',
1745 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1746 'channel': 'FlyingKitty',
1747 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1749 'categories': ['Entertainment'],
1750 'live_status': 'not_live',
1751 'tags': ['Flyingkitty', 'godzilla 2'],
1752 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1755 'playable_in_embed': True,
1756 'channel_follower_count': int,
1757 'uploader': 'FlyingKitty',
1758 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1759 'uploader_id': '@FlyingKitty900',
1760 'comment_count': int,
1761 'channel_is_verified': True,
1763 'skip': 'Age-restricted; requires authentication',
1766 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1767 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1769 'id': 'Tq92D6wQ1mg',
1770 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1772 'upload_date': '20191228',
1773 'description': 'md5:17eccca93a786d51bc67646756894066',
1776 'availability': 'needs_auth',
1777 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1779 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1780 'channel': 'Projekt Melody',
1781 'live_status': 'not_live',
1782 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1783 'playable_in_embed': True,
1784 'categories': ['Entertainment'],
1786 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1787 'comment_count': int,
1788 'channel_follower_count': int,
1789 'uploader': 'Projekt Melody',
1790 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1791 'uploader_id': '@ProjektMelody',
1792 'timestamp': 1577508724,
1794 'skip': 'Age-restricted; requires authentication',
1797 'note': 'Non-Agegated non-embeddable video',
1798 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1800 'id': 'MeJVWBSsPAY',
1802 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1803 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1804 'upload_date': '20130730',
1805 'track': 'Such mich find mich',
1807 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1809 'playable_in_embed': False,
1810 'creator': 'OOMPH!',
1811 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1813 'alt_title': 'Such mich find mich',
1815 'channel': 'Herr Lurik',
1816 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1817 'categories': ['Music'],
1818 'availability': 'public',
1819 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1820 'live_status': 'not_live',
1822 'channel_follower_count': int,
1823 'uploader': 'Herr Lurik',
1824 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1825 'uploader_id': '@HerrLurik',
1829 'note': 'Non-bypassable age-gated video',
1830 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1831 'only_matching': True,
1833 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1834 # YouTube Red ad is not captured for creator
1836 'url': '__2ABJjxzNo',
1838 'id': '__2ABJjxzNo',
1841 'upload_date': '20100430',
1842 'creator': 'deadmau5',
1843 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1844 'title': 'Deadmau5 - Some Chords (HD)',
1845 'alt_title': 'Some Chords',
1846 'availability': 'public',
1848 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1850 'live_status': 'not_live',
1851 'channel': 'deadmau5',
1852 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1854 'track': 'Some Chords',
1855 'artist': 'deadmau5',
1856 'playable_in_embed': True,
1858 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1859 'categories': ['Music'],
1860 'album': 'Some Chords',
1861 'channel_follower_count': int,
1862 'uploader': 'deadmau5',
1863 'uploader_url': 'https://www.youtube.com/@deadmau5',
1864 'uploader_id': '@deadmau5',
1866 'expected_warnings': [
1867 'DASH manifest missing',
1870 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1872 'url': 'lqQg6PlCWgI',
1874 'id': 'lqQg6PlCWgI',
1877 'upload_date': '20150827',
1878 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1879 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1881 'release_timestamp': 1343767800,
1882 'playable_in_embed': True,
1883 'categories': ['Sports'],
1884 'release_date': '20120731',
1885 'channel': 'Olympics',
1886 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1887 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1888 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1890 'availability': 'public',
1891 'live_status': 'was_live',
1893 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1894 'channel_follower_count': int,
1895 'uploader': 'Olympics',
1896 'uploader_url': 'https://www.youtube.com/@Olympics',
1897 'uploader_id': '@Olympics',
1898 'channel_is_verified': True,
1899 'timestamp': 1440707674,
1902 'skip_download': 'requires avconv',
1907 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1909 'id': '_b-2C3KPAM0',
1911 'stretched_ratio': 16 / 9.,
1913 'upload_date': '20110310',
1914 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1915 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1916 'playable_in_embed': True,
1920 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1921 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1922 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1924 'categories': ['People & Blogs'],
1926 'live_status': 'not_live',
1927 'availability': 'unlisted',
1928 'comment_count': int,
1929 'channel_follower_count': int,
1931 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1932 'uploader_id': '@AllenMeow',
1933 'timestamp': 1299776999,
1936 # url_encoded_fmt_stream_map is empty string
1938 'url': 'qEJwOuvDf7I',
1940 'id': 'qEJwOuvDf7I',
1942 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1944 'upload_date': '20150404',
1947 'skip_download': 'requires avconv',
1949 'skip': 'This live event has ended.',
1951 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1953 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1955 'id': 'FIl7x6_3R5Y',
1957 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1958 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1960 'upload_date': '20150625',
1961 'formats': 'mincount:31',
1963 'skip': 'not actual anymore',
1965 # DASH manifest with segment_list
1967 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1968 'md5': '8ce563a1d667b599d21064e982ab9e31',
1970 'id': 'CsmdDsKjzN8',
1972 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1973 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1974 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1977 'youtube_include_dash_manifest': True,
1978 'format': '135', # bestvideo
1980 'skip': 'This live event has ended.',
1983 # Multifeed videos (multiple cameras), URL can be of any Camera
1984 # TODO: fix multifeed titles
1985 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1987 'id': 'zaPI8MvL8pg',
1988 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1989 'description': 'md5:563ccbc698b39298481ca3c571169519',
1993 'id': 'j5yGuxZ8lLU',
1995 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1996 'description': 'md5:563ccbc698b39298481ca3c571169519',
1998 'channel_follower_count': int,
1999 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
2000 'availability': 'public',
2001 'playable_in_embed': True,
2002 'upload_date': '20131105',
2003 'categories': ['Gaming'],
2004 'live_status': 'was_live',
2006 'release_timestamp': 1383701910,
2007 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
2008 'comment_count': int,
2011 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
2012 'channel': 'WiiLikeToPlay',
2014 'release_date': '20131106',
2015 'uploader': 'WiiLikeToPlay',
2016 'uploader_id': '@WLTP',
2017 'uploader_url': 'https://www.youtube.com/@WLTP',
2021 'id': 'zaPI8MvL8pg',
2023 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
2024 'availability': 'public',
2025 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
2026 'channel': 'WiiLikeToPlay',
2027 'channel_follower_count': int,
2028 'description': 'md5:563ccbc698b39298481ca3c571169519',
2033 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
2034 'release_timestamp': 1383701915,
2035 'comment_count': int,
2036 'upload_date': '20131105',
2037 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
2038 'release_date': '20131106',
2039 'playable_in_embed': True,
2040 'live_status': 'was_live',
2041 'categories': ['Gaming'],
2043 'uploader': 'WiiLikeToPlay',
2044 'uploader_id': '@WLTP',
2045 'uploader_url': 'https://www.youtube.com/@WLTP',
2049 'id': 'R7r3vfO7Hao',
2051 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
2052 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
2053 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
2055 'availability': 'public',
2056 'playable_in_embed': True,
2057 'upload_date': '20131105',
2058 'description': 'md5:563ccbc698b39298481ca3c571169519',
2059 'channel_follower_count': int,
2061 'release_date': '20131106',
2062 'comment_count': int,
2063 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
2064 'channel': 'WiiLikeToPlay',
2065 'categories': ['Gaming'],
2066 'release_timestamp': 1383701914,
2067 'live_status': 'was_live',
2071 'uploader': 'WiiLikeToPlay',
2072 'uploader_id': '@WLTP',
2073 'uploader_url': 'https://www.youtube.com/@WLTP',
2076 'params': {'skip_download': True},
2077 'skip': 'Not multifeed anymore',
2080 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
2081 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
2083 'id': 'gVfLd0zydlo',
2084 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
2086 'playlist_count': 2,
2087 'skip': 'Not multifeed anymore',
2090 'url': 'https://vid.plus/FlRa-iH7PGw',
2091 'only_matching': True,
2094 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
2095 'only_matching': True,
2098 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2099 # Also tests cut-off URL expansion in video description (see
2100 # https://github.com/ytdl-org/youtube-dl/issues/1892,
2101 # https://github.com/ytdl-org/youtube-dl/issues/8164)
2102 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
2104 'id': 'lsguqyKfVQg',
2106 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
2107 'alt_title': 'Dark Walk',
2108 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
2110 'upload_date': '20151119',
2111 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
2112 'track': 'Dark Walk',
2113 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
2114 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
2115 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
2116 'categories': ['Film & Animation'],
2118 'live_status': 'not_live',
2119 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
2120 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
2122 'availability': 'public',
2123 'channel': 'IronSoulElf',
2124 'playable_in_embed': True,
2127 'channel_follower_count': int,
2130 'skip_download': True,
2134 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2135 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
2136 'only_matching': True,
2139 # Video with yt:stretch=17:0
2140 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
2142 'id': 'Q39EVAstoRM',
2144 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
2145 'description': 'md5:ee18a25c350637c8faff806845bddee9',
2146 'upload_date': '20151107',
2149 'skip_download': True,
2151 'skip': 'This video does not exist.',
2154 # Video with incomplete 'yt:stretch=16:'
2155 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
2156 'only_matching': True,
2159 # Video licensed under Creative Commons
2160 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
2162 'id': 'M4gD1WSo5mA',
2164 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
2165 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
2167 'upload_date': '20150128',
2168 'license': 'Creative Commons Attribution license (reuse allowed)',
2169 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
2170 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
2173 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
2174 'channel': 'The Berkman Klein Center for Internet & Society',
2175 'availability': 'public',
2177 'categories': ['Education'],
2178 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
2179 'live_status': 'not_live',
2180 'playable_in_embed': True,
2181 'channel_follower_count': int,
2183 'uploader': 'The Berkman Klein Center for Internet & Society',
2184 'uploader_id': '@BKCHarvard',
2185 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
2186 'timestamp': 1422422076,
2189 'skip_download': True,
2193 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
2195 'id': 'eQcmzGIKrzg',
2197 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
2198 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
2200 'upload_date': '20151120',
2201 'license': 'Creative Commons Attribution license (reuse allowed)',
2202 'playable_in_embed': True,
2205 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
2207 'availability': 'public',
2208 'categories': ['News & Politics'],
2209 'channel': 'Bernie Sanders',
2210 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
2212 'live_status': 'not_live',
2213 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
2214 'comment_count': int,
2215 'channel_follower_count': int,
2217 'uploader': 'Bernie Sanders',
2218 'uploader_url': 'https://www.youtube.com/@BernieSanders',
2219 'uploader_id': '@BernieSanders',
2220 'channel_is_verified': True,
2221 'heatmap': 'count:100',
2222 'timestamp': 1447987198,
2225 'skip_download': True,
2229 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
2230 'only_matching': True,
2233 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2234 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
2235 'only_matching': True,
2238 # Rental video preview
2239 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
2241 'id': 'uGpuVWrhIzE',
2243 'title': 'Piku - Trailer',
2244 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
2245 'upload_date': '20150811',
2246 'license': 'Standard YouTube License',
2249 'skip_download': True,
2251 'skip': 'This video is not available.',
2254 # YouTube Red video with episode data
2255 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
2257 'id': 'iqKdEhx-dD4',
2259 'title': 'Isolation - Mind Field (Ep 1)',
2260 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
2262 'upload_date': '20170118',
2263 'series': 'Mind Field',
2265 'episode_number': 1,
2266 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
2269 'availability': 'public',
2271 'channel': 'Vsauce',
2272 'episode': 'Episode 1',
2273 'categories': ['Entertainment'],
2274 'season': 'Season 1',
2275 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2276 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2278 'playable_in_embed': True,
2279 'live_status': 'not_live',
2280 'channel_follower_count': int,
2281 'uploader': 'Vsauce',
2282 'uploader_url': 'https://www.youtube.com/@Vsauce',
2283 'uploader_id': '@Vsauce',
2284 'comment_count': int,
2285 'channel_is_verified': True,
2286 'timestamp': 1484761047,
2289 'skip_download': True,
2291 'expected_warnings': [
2292 'Skipping DASH manifest',
2296 # The following content has been identified by the YouTube community
2297 # as inappropriate or offensive to some audiences.
2298 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2300 'id': '6SJNVb0GnPI',
2302 'title': 'Race Differences in Intelligence',
2303 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2305 'upload_date': '20140124',
2308 'skip_download': True,
2310 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2314 'url': '1t24XAntNCY',
2315 'only_matching': True,
2318 # geo restricted to JP
2319 'url': 'sJL6WA-aGkQ',
2320 'only_matching': True,
2323 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2324 'only_matching': True,
2327 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2328 'only_matching': True,
2331 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2332 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2333 'only_matching': True,
2337 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2338 'only_matching': True,
2341 # Video with unsupported adaptive stream type formats
2342 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2344 'id': 'Z4Vy8R84T1U',
2346 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2347 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2349 'upload_date': '20130923',
2350 'formats': 'maxcount:10',
2353 'skip_download': True,
2354 'youtube_include_dash_manifest': False,
2356 'skip': 'not actual anymore',
2359 # Youtube Music Auto-generated description
2360 # TODO: fix metadata extraction
2361 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2363 'id': 'MgNrAu2pzNs',
2365 'title': 'Voyeur Girl',
2366 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2367 'upload_date': '20190312',
2368 'artists': ['Stephen'],
2369 'creators': ['Stephen'],
2370 'track': 'Voyeur Girl',
2371 'album': 'it\'s too much love to know my dear',
2372 'release_date': '20190313',
2373 'alt_title': 'Voyeur Girl',
2375 'playable_in_embed': True,
2377 'categories': ['Music'],
2378 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2379 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2380 'uploader': 'Stephen',
2381 'availability': 'public',
2383 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2385 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2387 'live_status': 'not_live',
2388 'channel_follower_count': int,
2391 'skip_download': True,
2395 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2396 'only_matching': True,
2399 # invalid -> valid video id redirection
2400 'url': 'DJztXj2GPfl',
2402 'id': 'DJztXj2GPfk',
2404 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2405 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2406 'upload_date': '20090125',
2407 'artist': 'Panjabi MC',
2408 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2409 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2412 'skip_download': True,
2414 'skip': 'Video unavailable',
2417 # empty description results in an empty string
2418 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2420 'id': 'x41yOUIvK2k',
2422 'title': 'IMG 3456',
2424 'upload_date': '20170613',
2426 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2428 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2430 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2431 'availability': 'public',
2433 'categories': ['Pets & Animals'],
2435 'playable_in_embed': True,
2436 'live_status': 'not_live',
2437 'channel': 'l\'Or Vert asbl',
2438 'channel_follower_count': int,
2439 'uploader': 'l\'Or Vert asbl',
2440 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2441 'uploader_id': '@ElevageOrVert',
2442 'timestamp': 1497343210,
2445 'skip_download': True,
2449 # with '};' inside yt initial data (see [1])
2450 # see [2] for an example with '};' inside ytInitialPlayerResponse
2451 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2452 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2453 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2455 'id': 'CHqg6qOn4no',
2457 'title': 'Part 77 Sort a list of simple types in c#',
2458 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2459 'upload_date': '20130831',
2460 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2462 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2463 'live_status': 'not_live',
2464 'categories': ['Education'],
2465 'availability': 'public',
2466 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2468 'playable_in_embed': True,
2472 'channel': 'kudvenkat',
2473 'comment_count': int,
2474 'channel_follower_count': int,
2476 'uploader': 'kudvenkat',
2477 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2478 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2479 'channel_is_verified': True,
2480 'heatmap': 'count:100',
2481 'timestamp': 1377976349,
2484 'skip_download': True,
2488 # another example of '};' in ytInitialData
2489 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2490 'only_matching': True,
2493 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2494 'only_matching': True,
2497 # https://github.com/ytdl-org/youtube-dl/pull/28094
2498 'url': 'OtqTfy26tG0',
2500 'id': 'OtqTfy26tG0',
2502 'title': 'Burn Out',
2503 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2504 'upload_date': '20141120',
2505 'artist': 'The Cinematic Orchestra',
2506 'track': 'Burn Out',
2507 'album': 'Every Day',
2509 'live_status': 'not_live',
2510 'alt_title': 'Burn Out',
2514 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2515 'creator': 'The Cinematic Orchestra',
2516 'channel': 'The Cinematic Orchestra',
2517 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2518 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2519 'availability': 'public',
2520 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2521 'categories': ['Music'],
2522 'playable_in_embed': True,
2523 'channel_follower_count': int,
2524 'uploader': 'The Cinematic Orchestra',
2525 'comment_count': int,
2528 'skip_download': True,
2532 # controversial video, only works with bpctr when authenticated with cookies
2533 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2534 'only_matching': True,
2537 # controversial video, requires bpctr/contentCheckOk
2538 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2540 'id': 'SZJvDhaSDnc',
2542 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2543 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2544 'upload_date': '20140716',
2545 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2547 'categories': ['News & Politics'],
2549 'channel': 'CBS Mornings',
2550 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2551 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2553 'availability': 'needs_auth',
2554 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2556 'live_status': 'not_live',
2557 'playable_in_embed': True,
2558 'channel_follower_count': int,
2559 'uploader': 'CBS Mornings',
2560 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2561 'uploader_id': '@CBSMornings',
2562 'comment_count': int,
2563 'channel_is_verified': True,
2564 'timestamp': 1405513526,
2566 'skip': 'Age-restricted; requires authentication',
2569 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2570 'url': 'cBvYw8_A0vQ',
2572 'id': 'cBvYw8_A0vQ',
2574 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2575 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2576 'upload_date': '20201120',
2578 'categories': ['Travel & Events'],
2579 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2581 'channel': 'Walk around Japan',
2582 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2583 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2585 'availability': 'public',
2586 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2587 'live_status': 'not_live',
2588 'playable_in_embed': True,
2589 'channel_follower_count': int,
2590 'uploader': 'Walk around Japan',
2591 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2592 'uploader_id': '@walkaroundjapan7124',
2593 'timestamp': 1605884416,
2596 'skip_download': True,
2599 # Has multiple audio streams
2600 'url': 'WaOKSUlf4TM',
2601 'only_matching': True,
2603 # Requires Premium: has format 141 when requested using YTM url
2604 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2605 'only_matching': True,
2607 # multiple subtitles with same lang_code
2608 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2609 'only_matching': True,
2611 # Force use android client fallback
2612 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2614 'id': 'YOelRv7fMxY',
2615 'title': 'DIGGING A SECRET TUNNEL Part 1',
2617 'upload_date': '20210624',
2618 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2619 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2620 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2622 'categories': ['Entertainment'],
2624 'channel': 'colinfurze',
2625 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2626 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2628 'availability': 'public',
2630 'live_status': 'not_live',
2631 'playable_in_embed': True,
2632 'channel_follower_count': int,
2634 'uploader': 'colinfurze',
2635 'uploader_url': 'https://www.youtube.com/@colinfurze',
2636 'uploader_id': '@colinfurze',
2637 'comment_count': int,
2638 'channel_is_verified': True,
2639 'heatmap': 'count:100',
2642 'format': '17', # 3gp format available on android
2643 'extractor_args': {'youtube': {'player_client': ['android']}},
2645 'skip': 'android client broken',
2648 # Skip download of additional client configs (remix client config in this case)
2649 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2650 'only_matching': True,
2652 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2656 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2657 'only_matching': True,
2659 'note': 'Storyboards',
2660 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2662 'id': '5KLPxDtMqe8',
2665 'title': 'Your Brain is Plastic',
2666 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2667 'upload_date': '20140324',
2669 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2670 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2672 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2673 'playable_in_embed': True,
2675 'availability': 'public',
2676 'channel': 'SciShow',
2677 'live_status': 'not_live',
2679 'categories': ['Education'],
2681 'channel_follower_count': int,
2683 'uploader': 'SciShow',
2684 'uploader_url': 'https://www.youtube.com/@SciShow',
2685 'uploader_id': '@SciShow',
2686 'comment_count': int,
2687 'channel_is_verified': True,
2688 'heatmap': 'count:100',
2689 'timestamp': 1395685455,
2690 }, 'params': {'format': 'mhtml', 'skip_download': True},
2692 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2693 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2695 'id': '2NUZ8W2llS4',
2697 'title': 'The NP that test your phone performance 🙂',
2698 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2699 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2700 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2704 'categories': ['Gaming'],
2706 'playable_in_embed': True,
2707 'live_status': 'not_live',
2708 'upload_date': '20220103',
2710 'availability': 'public',
2711 'channel': 'Leon Nguyen',
2712 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2713 'comment_count': int,
2714 'channel_follower_count': int,
2715 'uploader': 'Leon Nguyen',
2716 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2717 'uploader_id': '@LeonNguyen',
2718 'heatmap': 'count:100',
2719 'timestamp': 1641170939,
2722 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2723 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2725 'id': 'mzZzzBU6lrM',
2727 'title': 'I Met GeorgeNotFound In Real Life...',
2728 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2729 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2730 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2734 'categories': ['Entertainment'],
2736 'playable_in_embed': True,
2737 'live_status': 'not_live',
2738 'release_timestamp': 1641172509,
2739 'release_date': '20220103',
2740 'upload_date': '20220103',
2742 'availability': 'public',
2743 'channel': 'Quackity',
2744 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2745 'channel_follower_count': int,
2746 'uploader': 'Quackity',
2747 'uploader_id': '@Quackity',
2748 'uploader_url': 'https://www.youtube.com/@Quackity',
2749 'comment_count': int,
2750 'channel_is_verified': True,
2751 'heatmap': 'count:100',
2752 'timestamp': 1641172509,
2755 { # continuous livestream.
2756 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2757 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2759 'id': 'jfKfPfyJRdk',
2761 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2763 'uploader': 'Lofi Girl',
2764 'categories': ['Music'],
2765 'concurrent_view_count': int,
2766 'playable_in_embed': True,
2767 'timestamp': 1657627949,
2768 'release_date': '20220712',
2769 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2770 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2772 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2773 'release_timestamp': 1657641570,
2774 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2775 'channel_follower_count': int,
2776 'channel_is_verified': True,
2777 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2779 'live_status': 'is_live',
2781 'channel': 'Lofi Girl',
2782 'availability': 'public',
2783 'upload_date': '20220712',
2784 'uploader_id': '@LofiGirl',
2786 'params': {'skip_download': True},
2788 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2790 'id': 'tjjjtzRLHvA',
2792 'title': 'ハッシュタグ無し };if window.ytcsi',
2793 'upload_date': '20220323',
2795 'availability': 'unlisted',
2796 'channel': 'Lesmiscore',
2797 'thumbnail': r're:^https?://.*\.jpg',
2799 'categories': ['Music'],
2802 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2803 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2804 'live_status': 'not_live',
2805 'playable_in_embed': True,
2806 'channel_follower_count': int,
2809 'uploader_id': '@lesmiscore',
2810 'uploader': 'Lesmiscore',
2811 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2812 'timestamp': 1648005313,
2815 # Prefer primary title+description language metadata by default
2816 # Do not prefer translated description if primary is empty
2817 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2819 'id': 'el3E4MbxRqQ',
2821 'title': 'dlp test video 2 - primary sv no desc',
2823 'channel': 'cole-dlp-test-acc',
2826 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2828 'playable_in_embed': True,
2829 'availability': 'unlisted',
2830 'thumbnail': r're:^https?://.*\.jpg',
2833 'live_status': 'not_live',
2834 'upload_date': '20220908',
2835 'categories': ['People & Blogs'],
2836 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2837 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2838 'uploader_id': '@coletdjnz',
2839 'uploader': 'cole-dlp-test-acc',
2840 'timestamp': 1662677394,
2842 'params': {'skip_download': True},
2844 # Extractor argument: prefer translated title+description
2845 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2847 'id': 'gHKT4uU8Zng',
2849 'channel': 'cole-dlp-test-acc',
2852 'live_status': 'not_live',
2853 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2854 'upload_date': '20220729',
2856 'categories': ['People & Blogs'],
2857 'thumbnail': r're:^https?://.*\.jpg',
2858 'title': 'dlp test video title translated (fr)',
2859 'availability': 'public',
2861 'description': 'dlp test video description translated (fr)',
2862 'playable_in_embed': True,
2863 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2864 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2865 'uploader_id': '@coletdjnz',
2866 'uploader': 'cole-dlp-test-acc',
2867 'timestamp': 1659073275,
2870 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2871 'expected_warnings': [r'Preferring "fr" translated fields'],
2873 'note': '6 channel audio',
2874 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2875 'only_matching': True,
2877 'note': 'Multiple HLS formats with same itag',
2878 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2880 'id': 'kX3nB4PpJko',
2882 'categories': ['Entertainment'],
2883 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2884 'live_status': 'not_live',
2886 'channel_follower_count': int,
2887 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2888 'title': 'Last To Take Hand Off Jet, Keeps It!',
2889 'channel': 'MrBeast',
2890 'playable_in_embed': True,
2892 'upload_date': '20221112',
2893 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2895 'availability': 'public',
2896 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2899 'uploader': 'MrBeast',
2900 'uploader_url': 'https://www.youtube.com/@MrBeast',
2901 'uploader_id': '@MrBeast',
2902 'comment_count': int,
2903 'channel_is_verified': True,
2904 'heatmap': 'count:100',
2906 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2908 'note': 'Audio formats with Dynamic Range Compression',
2909 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2911 'id': 'Tq92D6wQ1mg',
2913 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2914 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2915 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2916 'channel_follower_count': int,
2917 'description': 'md5:17eccca93a786d51bc67646756894066',
2918 'upload_date': '20191228',
2919 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2920 'playable_in_embed': True,
2922 'categories': ['Entertainment'],
2923 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2925 'channel': 'Projekt Melody',
2927 'availability': 'needs_auth',
2928 'comment_count': int,
2929 'live_status': 'not_live',
2931 'uploader': 'Projekt Melody',
2932 'uploader_id': '@ProjektMelody',
2933 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2934 'timestamp': 1577508724,
2936 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2937 'skip': 'Age-restricted; requires authentication',
2940 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2942 'id': 'qVv6vCqciTM',
2945 'comment_count': int,
2946 'chapters': 'count:13',
2947 'upload_date': '20221223',
2948 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2949 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2951 'release_date': '20221223',
2952 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2953 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2955 'playable_in_embed': True,
2957 'availability': 'public',
2958 'channel_follower_count': int,
2959 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2960 'categories': ['Entertainment'],
2961 'live_status': 'was_live',
2962 'release_timestamp': 1671793345,
2963 'channel': 'さなちゃんねる',
2964 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2965 'uploader': 'さなちゃんねる',
2966 'uploader_url': 'https://www.youtube.com/@sana_natori',
2967 'uploader_id': '@sana_natori',
2968 'channel_is_verified': True,
2969 'heatmap': 'count:100',
2970 'timestamp': 1671798112,
2974 # Fallbacks when webpage and web client is unavailable
2975 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2977 'id': 'wSSmNUl9Snw',
2979 # 'categories': ['Science & Technology'],
2981 'chapters': 'count:2',
2982 'channel': 'Scott Manley',
2985 # 'availability': 'public',
2986 'channel_follower_count': int,
2987 'live_status': 'not_live',
2988 'upload_date': '20170831',
2991 'uploader_url': 'https://www.youtube.com/@scottmanley',
2992 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2993 'uploader': 'Scott Manley',
2994 'uploader_id': '@scottmanley',
2995 'title': 'The Computer Hack That Saved Apollo 14',
2996 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2997 'thumbnail': r're:^https?://.*\.webp',
2998 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2999 'playable_in_embed': True,
3000 'comment_count': int,
3001 'channel_is_verified': True,
3002 'heatmap': 'count:100',
3005 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
3011 # YouTube <object> embed
3013 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
3014 'md5': '873c81d308b979f0e23ee7e620b312a3',
3016 'id': 'msN87y-iEx0',
3018 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
3019 'upload_date': '20080526',
3020 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
3022 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
3023 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
3024 'playable_in_embed': True,
3025 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
3027 'comment_count': int,
3028 'channel': 'Christopher Sykes',
3029 'live_status': 'not_live',
3030 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
3031 'availability': 'public',
3034 'categories': ['Science & Technology'],
3035 'channel_follower_count': int,
3036 'uploader': 'Christopher Sykes',
3037 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
3038 'uploader_id': '@ChristopherSykesDocumentaries',
3039 'heatmap': 'count:100',
3040 'timestamp': 1211825920,
3043 'skip_download': True,
3049 def suitable(cls, url):
3050 from ..utils import parse_qs
3053 if qs.get('list', [None])[0]:
3055 return super().suitable(url)
3057 def __init__(self, *args, **kwargs):
3058 super().__init__(*args, **kwargs)
3059 self._code_cache = {}
3060 self._player_cache = {}
3062 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
3063 lock = threading.Lock()
3064 start_time = time.time()
3065 formats = [f for f in formats if f.get('is_from_start')]
3067 def refetch_manifest(format_id, delay):
3068 nonlocal formats, start_time, is_live
3069 if time.time() <= start_time + delay:
3072 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3073 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
3074 microformats = traverse_obj(
3075 prs, (..., 'microformat', 'playerMicroformatRenderer'),
3077 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
3078 is_live = live_status == 'is_live'
3079 start_time = time.time()
3081 def mpd_feed(format_id, delay):
3083 @returns (manifest_url, manifest_stream_number, is_live) or None
3085 for retry in self.RetryManager(fatal=False):
3087 refetch_manifest(format_id, delay)
3089 f = next((f for f in formats if f['format_id'] == format_id), None)
3092 retry.error = f'{video_id}: Video is no longer live'
3094 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
3096 return f['manifest_url'], f['manifest_stream_number'], is_live
3100 f['is_live'] = is_live
3101 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
3102 live_start_time, mpd_feed, not is_live and f.copy())
3104 f['fragments'] = gen
3105 f['protocol'] = 'http_dash_segments_generator'
3107 f['fragments'] = LazyList(gen({}))
3108 del f['is_from_start']
3110 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
3111 FETCH_SPAN, MAX_DURATION = 5, 432000
3113 mpd_url, stream_number, is_live = None, None, True
3116 download_start_time = ctx.get('start') or time.time()
3118 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
3119 if lack_early_segments:
3120 self.report_warning(bug_reports_message(
3121 'Starting download from the last 120 hours of the live stream since '
3122 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
3123 lack_early_segments = True
3125 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
3126 fragments, fragment_base_url = None, None
3128 def _extract_sequence_from_mpd(refresh_sequence, immediate):
3129 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
3130 # Obtain from MPD's maximum seq value
3131 old_mpd_url = mpd_url
3132 last_error = ctx.pop('last_error', None)
3133 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
3134 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
3135 or (mpd_url, stream_number, False))
3136 if not refresh_sequence:
3137 if expire_fast and not is_live:
3138 return False, last_seq
3139 elif old_mpd_url == mpd_url:
3140 return True, last_seq
3141 if manifestless_orig_fmt:
3142 fmt_info = manifestless_orig_fmt
3145 fmts, _ = self._extract_mpd_formats_and_subtitles(
3146 mpd_url, None, note=False, errnote=False, fatal=False)
3147 except ExtractorError:
3150 no_fragment_score += 2
3151 return False, last_seq
3152 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
3153 fragments = fmt_info['fragments']
3154 fragment_base_url = fmt_info['fragment_base_url']
3155 assert fragment_base_url
3157 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
3158 return True, _last_seq
3160 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
3162 fetch_time = time.time()
3163 if no_fragment_score > 30:
3165 if last_segment_url:
3166 # Obtain from "X-Head-Seqnum" header value from each segment
3168 urlh = self._request_webpage(
3169 last_segment_url, None, note=False, errnote=False, fatal=False)
3170 except ExtractorError:
3172 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
3173 if last_seq is None:
3174 no_fragment_score += 2
3175 last_segment_url = None
3178 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
3179 no_fragment_score += 2
3180 if not should_continue:
3183 if known_idx > last_seq:
3184 last_segment_url = None
3189 if begin_index < 0 and known_idx < 0:
3190 # skip from the start when it's negative value
3191 known_idx = last_seq + begin_index
3192 if lack_early_segments:
3193 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
3195 for idx in range(known_idx, last_seq):
3196 # do not update sequence here or you'll get skipped some part of it
3197 should_continue, _ = _extract_sequence_from_mpd(False, False)
3198 if not should_continue:
3200 raise ExtractorError('breaking out of outer loop')
3201 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
3203 'url': last_segment_url,
3204 'fragment_count': last_seq,
3206 if known_idx == last_seq:
3207 no_fragment_score += 5
3209 no_fragment_score = 0
3210 known_idx = last_seq
3211 except ExtractorError:
3214 if manifestless_orig_fmt:
3215 # Stop at the first iteration if running for post-live manifestless;
3216 # fragment count no longer increase since it starts
3219 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
3221 def _extract_player_url(self, *ytcfgs, webpage=None):
3222 player_url = traverse_obj(
3223 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
3224 get_all=False, expected_type=str)
3227 return urljoin('https://www.youtube.com', player_url)
3229 def _download_player_url(self, video_id, fatal=False):
3230 res = self._download_webpage(
3231 'https://www.youtube.com/iframe_api',
3232 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
3234 player_version = self._search_regex(
3235 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
3237 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
3239 def _signature_cache_id(self, example_sig):
3240 """ Return a string representation of a signature """
3241 return '.'.join(str(len(part)) for part in example_sig.split('.'))
3244 def _extract_player_info(cls, player_url):
3245 for player_re in cls._PLAYER_INFO_RE:
3246 id_m = re.search(player_re, player_url)
3250 raise ExtractorError(f'Cannot identify player {player_url!r}')
3251 return id_m.group('id')
3253 def _load_player(self, video_id, player_url, fatal=True):
3254 player_id = self._extract_player_info(player_url)
3255 if player_id not in self._code_cache:
3256 code = self._download_webpage(
3257 player_url, video_id, fatal=fatal,
3258 note='Downloading player ' + player_id,
3259 errnote=f'Download of {player_url} failed',
3260 headers=self._generate_webpage_headers())
3262 self._code_cache[player_id] = code
3263 return self._code_cache.get(player_id)
3265 def _extract_signature_function(self, video_id, player_url, example_sig):
3266 player_id = self._extract_player_info(player_url)
3268 # Read from filesystem cache
3269 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
3270 assert os.path.basename(func_id) == func_id
3272 self.write_debug(f'Extracting signature function {func_id}')
3273 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3276 code = self._load_player(video_id, player_url)
3278 res = self._parse_sig_js(code)
3279 test_string = ''.join(map(chr, range(len(example_sig))))
3280 cache_spec = [ord(c) for c in res(test_string)]
3281 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3283 return lambda s: ''.join(s[i] for i in cache_spec)
3285 def _print_sig_code(self, func, example_sig):
3286 if not self.get_param('youtube_print_sig_code'):
3289 def gen_sig_code(idxs):
3290 def _genslice(start, end, step):
3291 starts = '' if start == 0 else str(start)
3292 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3293 steps = '' if step == 1 else (':%d' % step)
3294 return f's[{starts}{ends}{steps}]'
3297 # Quelch pyflakes warnings - start will be set when step is set
3298 start = '(Never used)'
3299 for i, prev in zip(idxs[1:], idxs[:-1]):
3300 if step is not None:
3301 if i - prev == step:
3303 yield _genslice(start, prev, step)
3306 if i - prev in [-1, 1]:
3311 yield 's[%d]' % prev
3315 yield _genslice(start, i, step)
3317 test_string = ''.join(map(chr, range(len(example_sig))))
3318 cache_res = func(test_string)
3319 cache_spec = [ord(c) for c in cache_res]
3320 expr_code = ' + '.join(gen_sig_code(cache_spec))
3321 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3322 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3323 f' return {expr_code}\n')
3324 self.to_screen('Extracted signature function:\n' + code)
3326 def _parse_sig_js(self, jscode):
3327 funcname = self._search_regex(
3328 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3329 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3330 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3331 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3332 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3333 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3335 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3336 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3337 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3338 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3340 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3341 jscode, 'Initial JS player signature function name', group='sig')
3343 jsi = JSInterpreter(jscode)
3344 initial_function = jsi.extract_function(funcname)
3345 return lambda s: initial_function([s])
3347 def _cached(self, func, *cache_id):
3348 def inner(*args, **kwargs):
3349 if cache_id not in self._player_cache:
3351 self._player_cache[cache_id] = func(*args, **kwargs)
3352 except ExtractorError as e:
3353 self._player_cache[cache_id] = e
3354 except Exception as e:
3355 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3357 ret = self._player_cache[cache_id]
3358 if isinstance(ret, Exception):
3363 def _decrypt_signature(self, s, video_id, player_url):
3364 """Turn the encrypted s field into a working signature"""
3365 extract_sig = self._cached(
3366 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3367 func = extract_sig(video_id, player_url, s)
3368 self._print_sig_code(func, s)
3371 def _decrypt_nsig(self, s, video_id, player_url):
3372 """Turn the encrypted n field into a working signature"""
3373 if player_url is None:
3374 raise ExtractorError('Cannot decrypt nsig without player_url')
3375 player_url = urljoin('https://www.youtube.com', player_url)
3378 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3379 except ExtractorError as e:
3380 raise ExtractorError('Unable to extract nsig function code', cause=e)
3381 if self.get_param('youtube_print_sig_code'):
3382 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3385 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3386 ret = extract_nsig(jsi, func_code)(s)
3387 except JSInterpreter.Exception as e:
3389 jsi = PhantomJSwrapper(self, timeout=5000)
3390 except ExtractorError:
3392 self.report_warning(
3393 f'Native nsig extraction failed: Trying with PhantomJS\n'
3394 f' n = {s} ; player = {player_url}', video_id)
3395 self.write_debug(e, only_once=True)
3397 args, func_body = func_code
3399 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3400 video_id=video_id, note='Executing signature code').strip()
3402 self.write_debug(f'Decrypted nsig {s} => {ret}')
3405 def _extract_n_function_name(self, jscode, player_url=None):
3406 # Examples (with placeholders nfunc, narray, idx):
3407 # * .get("n"))&&(b=nfunc(b)
3408 # * .get("n"))&&(b=narray[idx](b)
3409 # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
3410 # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
3411 # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
3412 # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
3413 funcname, idx = self._search_regex(
3416 \
.get\
("n"\
)\
)&&\
(b
=|
3418 b
=String\
.fromCharCode\
(110\
)|
3419 (?P
<str_idx
>[a
-zA
-Z0
-9_$
.]+)&&\
(b
="nn"\
[\
+(?P
=str_idx
)\
]
3422 ,[a
-zA
-Z0
-9_$
]+\
(a\
))?
,c
=a\
.
3425 [a
-zA
-Z0
-9_$
]+\
[b\
]\|\|null
3427 \b(?P
<var
>[a
-zA
-Z0
-9_$
]+)=
3428 )(?P
<nfunc
>[a
-zA
-Z0
-9_$
]+)(?
:\
[(?P
<idx
>\d
+)\
])?\
([a
-zA
-Z
]\
)
3429 (?
(var
),[a
-zA
-Z0
-9_$
]+\
.set\
("n"\
,(?P
=var
)\
),(?P
=nfunc
)\
.length
)''',
3430 jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
3432 self.report_warning(join_nonempty(
3433 'Falling back to generic n function search',
3434 player_url and f' player = {player_url}', delim='\n'))
3435 return self._search_regex(
3437 ;\s
*(?P
<name
>[a
-zA
-Z0
-9_$
]+)\s
*=\s
*function\
([a
-zA
-Z0
-9_$
]+\
)
3438 \s
*\
{(?
:(?
!};).)+?
["']enhanced_except_''',
3439 jscode, 'Initial JS player n function name', group='name')
3443 return json.loads(js_to_json(self._search_regex(
3444 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3445 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3447 def _extract_n_function_code(self, video_id, player_url):
3448 player_id = self._extract_player_info(player_url)
3449 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
3450 jscode = func_code or self._load_player(video_id, player_url)
3451 jsi = JSInterpreter(jscode)
3454 return jsi, player_id, func_code
3456 func_name = self._extract_n_function_name(jscode, player_url=player_url)
3458 func_code = jsi.extract_function_code(func_name)
3460 self.cache.store('youtube-nsig', player_id, func_code)
3461 return jsi, player_id, func_code
3463 def _extract_n_function_from_code(self, jsi, func_code):
3464 func = jsi.extract_function_from_code(*func_code)
3466 def extract_nsig(s):
3469 except JSInterpreter.Exception:
3471 except Exception as e:
3472 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3474 if ret.startswith('enhanced_except_'):
3475 raise JSInterpreter.Exception('Signature function returned an exception')
3480 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3482 Extract signatureTimestamp (sts)
3483 Required to tell API what sig/player version is in use.
3486 if isinstance(ytcfg, dict):
3487 sts = int_or_none(ytcfg.get('STS'))
3490 # Attempt to extract from player
3491 if player_url is None:
3492 error_msg = 'Cannot extract signature timestamp without player_url.'
3494 raise ExtractorError(error_msg)
3495 self.report_warning(error_msg)
3497 code = self._load_player(video_id, player_url, fatal=fatal)
3499 sts = int_or_none(self._search_regex(
3500 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3501 'JS player signature timestamp', group='sts', fatal=fatal))
3504 def _mark_watched(self, video_id, player_responses):
3505 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3506 label = 'fully ' if is_full else ''
3507 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3508 expected_type=url_or_none)
3510 self.report_warning(f'Unable to mark {label}watched')
3512 parsed_url = urllib.parse.urlparse(url)
3513 qs = urllib.parse.parse_qs(parsed_url.query)
3515 # cpn generation algorithm is reverse engineered from base.js.
3516 # In fact it works even with dummy cpn.
3517 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3518 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3520 # # more consistent results setting it to right before the end
3521 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3526 'cmt': video_length,
3527 'el': 'detailpage', # otherwise defaults to "shorts
"
3531 # these seem to mark watchtime "history
" in the real world
3532 # they're required, so send in a single value
3538 url = urllib.parse.urlunparse(
3539 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3541 self._download_webpage(
3542 url, video_id, f'Marking {label}watched',
3543 'Unable to mark watched', fatal=False,
3544 headers=self._generate_webpage_headers())
3547 def _extract_from_webpage(cls, url, webpage):
3548 # Invidious Instances
3549 # https://github.com/yt-dlp/yt-dlp/issues/195
3550 # https://github.com/iv-org/invidious/pull/1730
3552 r'<link rel="alternate
" href="(?P
<url
>https
://www\
.youtube\
.com
/watch
\?v
=[0-9A
-Za
-z_
-]{11}
)"',
3555 yield cls.url_result(mobj.group('url'), cls)
3556 raise cls.StopExtraction
3558 yield from super()._extract_from_webpage(url, webpage)
3560 # lazyYT YouTube embed
3561 for id_ in re.findall(r'class="lazyYT
" data-youtube-id="([^
"]+)"', webpage):
3562 yield cls.url_result(unescapeHTML(id_), cls, id_)
3564 # Wordpress "YouTube Video Importer" plugin
3565 for m in re.findall(r'''(?x)<div[^>]+
3566 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3567 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3568 yield cls.url_result(m[-1], cls, m[-1])
3571 def extract_id(cls, url):
3572 video_id = cls.get_temp_id(url)
3574 raise ExtractorError(f'Invalid URL
: {url}
')
3577 def _extract_chapters_from_json(self, data, duration):
3578 chapter_list = traverse_obj(
3580 'playerOverlays
', 'playerOverlayRenderer
', 'decoratedPlayerBarRenderer
',
3581 'decoratedPlayerBarRenderer
', 'playerBar
', 'chapteredPlayerBarRenderer
', 'chapters
',
3582 ), expected_type=list)
3584 return self._extract_chapters_helper(
3586 start_function=lambda chapter: float_or_none(
3587 traverse_obj(chapter, ('chapterRenderer
', 'timeRangeStartMillis
')), scale=1000),
3588 title_function=lambda chapter: traverse_obj(
3589 chapter, ('chapterRenderer
', 'title
', 'simpleText
'), expected_type=str),
3592 def _extract_chapters_from_engagement_panel(self, data, duration):
3593 content_list = traverse_obj(
3595 ('engagementPanels
', ..., 'engagementPanelSectionListRenderer
', 'content
', 'macroMarkersListRenderer
', 'contents
'),
3597 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription
'))
3598 chapter_title = lambda chapter: self._get_text(chapter, 'title
')
3600 return next(filter(None, (
3601 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer
')),
3602 chapter_time, chapter_title, duration)
3603 for contents in content_list)), [])
3605 def _extract_heatmap(self, data):
3606 return traverse_obj(data, (
3607 'frameworkUpdates
', 'entityBatchUpdate
', 'mutations
',
3608 lambda _, v: v['payload
']['macroMarkersListEntity
']['markersList
']['markerType
'] == 'MARKER_TYPE_HEATMAP
',
3609 'payload
', 'macroMarkersListEntity
', 'markersList
', 'markers
', ..., {
3610 'start_time
': ('startMillis
', {float_or_none(scale=1000)}),
3611 'end_time
': {lambda x: (int(x['startMillis
']) + int(x['durationMillis
'])) / 1000},
3612 'value
': ('intensityScoreNormalized
', {float_or_none}),
3615 def _extract_comment(self, entities, parent=None):
3616 comment_entity_payload = get_first(entities, ('payload
', 'commentEntityPayload
', {dict}))
3617 if not (comment_id := traverse_obj(comment_entity_payload, ('properties
', 'commentId
', {str}))):
3620 toolbar_entity_payload = get_first(entities, ('payload
', 'engagementToolbarStateEntityPayload
', {dict}))
3621 time_text = traverse_obj(comment_entity_payload, ('properties
', 'publishedTime
', {str})) or ''
3625 'parent
': parent or 'root
',
3626 **traverse_obj(comment_entity_payload, {
3627 'text
': ('properties
', 'content
', 'content
', {str}),
3628 'like_count
': ('toolbar
', 'likeCountA11y
', {parse_count}),
3629 'author_id
': ('author
', 'channelId
', {self.ucid_or_none}),
3630 'author
': ('author
', 'displayName
', {str}),
3631 'author_thumbnail
': ('author
', 'avatarThumbnailUrl
', {url_or_none}),
3632 'author_is_uploader
': ('author
', 'isCreator
', {bool}),
3633 'author_is_verified
': ('author
', 'isVerified
', {bool}),
3634 'author_url
': ('author
', 'channelCommand
', 'innertubeCommand
', (
3635 ('browseEndpoint
', 'canonicalBaseUrl
'), ('commandMetadata
', 'webCommandMetadata
', 'url
'),
3636 ), {urljoin('https
://www
.youtube
.com
')}),
3638 'is_favorited
': (None if toolbar_entity_payload is None else
3639 toolbar_entity_payload.get('heartState
') == 'TOOLBAR_HEART_STATE_HEARTED
'),
3640 '_time_text
': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3641 'timestamp
': self._parse_time_text(time_text),
3644 def _extract_comment_old(self, comment_renderer, parent=None):
3645 comment_id = comment_renderer.get('commentId
')
3651 'text
': self._get_text(comment_renderer, 'contentText
'),
3652 'like_count
': self._get_count(comment_renderer, 'voteCount
'),
3653 'author_id
': traverse_obj(comment_renderer, ('authorEndpoint
', 'browseEndpoint
', 'browseId
', {self.ucid_or_none})),
3654 'author
': self._get_text(comment_renderer, 'authorText
'),
3655 'author_thumbnail
': traverse_obj(comment_renderer, ('authorThumbnail
', 'thumbnails
', -1, 'url
', {url_or_none})),
3656 'parent
': parent or 'root
',
3659 # Timestamp is an estimate calculated from the current time and time_text
3660 time_text = self._get_text(comment_renderer, 'publishedTimeText
') or ''
3661 timestamp = self._parse_time_text(time_text)
3664 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3665 '_time_text
': time_text,
3666 'timestamp
': timestamp,
3669 info['author_url
'] = urljoin(
3670 'https
://www
.youtube
.com
', traverse_obj(comment_renderer, ('authorEndpoint
', (
3671 ('browseEndpoint
', 'canonicalBaseUrl
'), ('commandMetadata
', 'webCommandMetadata
', 'url
'))),
3672 expected_type=str, get_all=False))
3674 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner
')
3675 if author_is_uploader is not None:
3676 info['author_is_uploader
'] = author_is_uploader
3678 comment_abr = traverse_obj(
3679 comment_renderer, ('actionButtons
', 'commentActionButtonsRenderer
'), expected_type=dict)
3680 if comment_abr is not None:
3681 info['is_favorited
'] = 'creatorHeart
' in comment_abr
3683 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge
')])
3684 if self._has_badge(badges, BadgeType.VERIFIED):
3685 info['author_is_verified
'] = True
3687 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge
')
3689 info['is_pinned
'] = True
3693 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3695 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3697 def extract_header(contents):
3698 _continuation = None
3699 for content in contents:
3700 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer
')
3701 expected_comment_count = self._get_count(
3702 comments_header_renderer, 'countText
', 'commentsCount
')
3704 if expected_comment_count is not None:
3705 tracker['est_total
'] = expected_comment_count
3706 self.to_screen(f'Downloading ~{expected_comment_count} comments
')
3707 comment_sort_index = int(get_single_config_arg('comment_sort
') != 'top
') # 1 = new, 0 = top
3709 sort_menu_item = try_get(
3710 comments_header_renderer,
3711 lambda x: x['sortMenu
']['sortFilterSubMenuRenderer
']['subMenuItems
'][comment_sort_index], dict) or {}
3712 sort_continuation_ep = sort_menu_item.get('serviceEndpoint
') or {}
3714 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3715 if not _continuation:
3718 sort_text = str_or_none(sort_menu_item.get('title
'))
3720 sort_text = 'top comments
' if comment_sort_index == 0 else 'newest first
'
3721 self.to_screen(f'Sorting comments by
{sort_text
.lower()}')
3723 return _continuation
3725 def extract_thread(contents, entity_payloads):
3727 tracker['current_page_thread
'] = 0
3728 for content in contents:
3729 if not parent and tracker['total_parent_comments
'] >= max_parents:
3731 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer
'])
3733 # old comment format
3734 if not entity_payloads:
3735 comment_renderer = get_first(
3736 (comment_thread_renderer, content), [['commentRenderer
', ('comment
', 'commentRenderer
')]],
3737 expected_type=dict, default={})
3739 comment = self._extract_comment_old(comment_renderer, parent)
3741 # new comment format
3744 traverse_obj(comment_thread_renderer, ('commentViewModel
', 'commentViewModel
', {dict}))
3745 or traverse_obj(content, ('commentViewModel
', {dict})))
3746 comment_keys = traverse_obj(view_model, (('commentKey
', 'toolbarStateKey
'), {str}))
3747 if not comment_keys:
3749 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey
'] in comment_keys)
3750 comment = self._extract_comment(entities, parent)
3752 comment['is_pinned
'] = traverse_obj(view_model, ('pinnedText
', {str})) is not None
3756 comment_id = comment['id']
3758 if comment.get('is_pinned
'):
3759 tracker['pinned_comment_ids
'].add(comment_id)
3760 # Sometimes YouTube may break and give us infinite looping comments.
3761 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3762 if comment_id in tracker['seen_comment_ids
']:
3763 if comment_id in tracker['pinned_comment_ids
'] and not comment.get('is_pinned
'):
3764 # Pinned comments may appear a second time in newest first sort
3765 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3767 self.report_warning(
3768 'Detected YouTube comments looping
. Stopping comment extraction
'
3769 f'{"for this thread" if parent
else ""} as we probably cannot get any more
.')
3772 tracker['seen_comment_ids
'].add(comment['id'])
3774 tracker['running_total
'] += 1
3775 tracker['total_reply_comments
' if parent else 'total_parent_comments
'] += 1
3778 # Attempt to get the replies
3779 comment_replies_renderer = try_get(
3780 comment_thread_renderer, lambda x: x['replies
']['commentRepliesRenderer
'], dict)
3782 if comment_replies_renderer:
3783 tracker['current_page_thread
'] += 1
3784 comment_entries_iter = self._comment_entries(
3785 comment_replies_renderer, ytcfg, video_id,
3786 parent=comment.get('id'), tracker=tracker)
3787 yield from itertools.islice(comment_entries_iter, min(
3788 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments
'])))
3790 # Keeps track of counts across recursive calls
3795 'current_page_thread
': 0,
3796 'total_parent_comments
': 0,
3797 'total_reply_comments
': 0,
3798 'seen_comment_ids
': set(),
3799 'pinned_comment_ids
': set(),
3803 # YouTube comments have a max depth of 2
3804 max_depth = int_or_none(get_single_config_arg('max_comment_depth
'))
3806 self._downloader.deprecated_feature('[youtube
] max_comment_depth extractor argument
is deprecated
. '
3807 'Set
max replies
in the
max-comments extractor argument instead
')
3808 if max_depth == 1 and parent:
3811 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3812 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments
') + [''] * 4)
3814 continuation = self._extract_continuation(root_continuation_data)
3817 is_forced_continuation = False
3818 is_first_continuation = parent is None
3819 if is_first_continuation and not continuation:
3820 # Sometimes you can get comments by generating the continuation yourself,
3821 # even if YouTube initially reports them being disabled - e.g. stories comments.
3822 # Note: if the comment section is actually disabled, YouTube may return a response with
3823 # required check_get_keys missing. So we will disable that check initially in this case.
3824 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3825 is_forced_continuation = True
3827 continuation_items_path = (
3828 'onResponseReceivedEndpoints
', ..., ('reloadContinuationItemsCommand
', 'appendContinuationItemsAction
'), 'continuationItems
')
3829 for page_num in itertools.count(0):
3830 if not continuation:
3832 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3833 comment_prog_str = f"({tracker['running_total
']}/~{tracker['est_total
']})"
3835 if is_first_continuation:
3836 note_prefix = 'Downloading comment section API JSON
'
3838 note_prefix = ' Downloading comment API JSON reply thread
%d %s' % (
3839 tracker['current_page_thread
'], comment_prog_str)
3841 note_prefix = '{}Downloading comment
{} API JSON page
{} {}'.format(
3842 ' ' if parent else '', ' replies
' if parent else '',
3843 page_num, comment_prog_str)
3845 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3846 # Ignore check if YouTube says the comment count is 0.
3847 check_get_keys = None
3848 if not is_forced_continuation and not (tracker['est_total
'] == 0 and tracker['running_total
'] == 0):
3849 check_get_keys = [[*continuation_items_path, ..., (
3850 'commentsHeaderRenderer
' if is_first_continuation else ('commentThreadRenderer
', 'commentViewModel
', 'commentRenderer
'))]]
3852 response = self._extract_response(
3853 item_id=None, query=continuation,
3854 ep='next
', ytcfg=ytcfg, headers=headers, note=note_prefix,
3855 check_get_keys=check_get_keys)
3856 except ExtractorError as e:
3857 # Ignore incomplete data error for replies if retries didn't work
.
3858 # This is to allow any other parent comments and comment threads to be downloaded.
3859 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3860 if 'incomplete data' in str(e
).lower() and parent
:
3861 if self
.get_param('ignoreerrors') in (True, 'only_download'):
3862 self
.report_warning(
3863 'Received incomplete data for a comment reply thread and retrying did not help. '
3864 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3867 raise ExtractorError(
3868 'Incomplete data received for comment reply thread. '
3869 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3872 is_forced_continuation
= False
3874 mutations
= traverse_obj(response
, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}
))
3875 for continuation_items
in traverse_obj(response
, continuation_items_path
, expected_type
=list, default
=[]):
3876 if is_first_continuation
:
3877 continuation
= extract_header(continuation_items
)
3878 is_first_continuation
= False
3883 for entry
in extract_thread(continuation_items
, mutations
):
3887 continuation
= self
._extract
_continuation
({'contents': continuation_items
})
3891 message
= self
._get
_text
(root_continuation_data
, ('contents', ..., 'messageRenderer', 'text'), max_runs
=1)
3892 if message
and not parent
and tracker
['running_total'] == 0:
3893 self
.report_warning(f
'Youtube said: {message}', video_id
=video_id
, only_once
=True)
3894 raise self
.CommentsDisabled
3897 def _generate_comment_continuation(video_id
):
3899 Generates initial comment section continuation token from given video id
3901 token
= f
'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3902 return base64
.b64encode(token
.encode()).decode()
3904 def _get_comments(self
, ytcfg
, video_id
, contents
, webpage
):
3905 """Entry for comment extraction"""
3906 def _real_comment_extract(contents
):
3908 item
for item
in traverse_obj(contents
, (..., 'itemSectionRenderer'), default
={})
3909 if item
.get('sectionIdentifier') == 'comment-item-section'), None)
3910 yield from self
._comment
_entries
(renderer
, ytcfg
, video_id
)
3912 max_comments
= int_or_none(self
._configuration
_arg
('max_comments', [''])[0])
3913 return itertools
.islice(_real_comment_extract(contents
), 0, max_comments
)
3916 def _get_checkok_params():
3917 return {'contentCheckOk': True, 'racyCheckOk': True}
3920 def _generate_player_context(cls
, sts
=None):
3922 'html5Preference': 'HTML5_PREF_WANTS',
3925 context
['signatureTimestamp'] = sts
3927 'playbackContext': {
3928 'contentPlaybackContext': context
,
3930 **cls
._get
_checkok
_params
(),
3933 def _get_config_po_token(self
, client
):
3934 po_token_strs
= self
._configuration
_arg
('po_token', [], ie_key
=YoutubeIE
, casesense
=True)
3935 for token_str
in po_token_strs
:
3936 po_token_client
, sep
, po_token
= token_str
.partition('+')
3938 self
.report_warning(
3939 f
'Invalid po_token configuration format. Expected "client+po_token", got "{token_str}"', only_once
=True)
3941 if po_token_client
== client
:
3944 def fetch_po_token(self
, client
='web', visitor_data
=None, data_sync_id
=None, player_url
=None, **kwargs
):
3945 # PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
3946 if not visitor_data
and not self
.is_authenticated
and player_url
:
3947 self
.report_warning(
3948 f
'Unable to fetch PO Token for {client} client: Missing required Visitor Data. '
3949 f
'You may need to pass Visitor Data with --extractor-args "youtube:visitor_data=XXX"')
3952 config_po_token
= self
._get
_config
_po
_token
(client
)
3954 # PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
3955 # if using first channel in an account then we don't need the data_sync_id anymore...
3956 if not data_sync_id
and self
.is_authenticated
and player_url
:
3957 self
.report_warning(
3958 f
'Got a PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
3959 f
'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
3961 return config_po_token
3963 # Require PO Token if logged in for external fetching
3964 if not data_sync_id
and self
.is_authenticated
and player_url
:
3965 self
.report_warning(
3966 f
'Unable to fetch PO Token for {client} client: Missing required Data Sync ID for account. '
3967 f
'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
3970 return self
._fetch
_po
_token
(
3972 visitor_data
=visitor_data
,
3973 data_sync_id
=data_sync_id
,
3974 player_url
=player_url
,
3978 def _fetch_po_token(self
, client
, visitor_data
=None, data_sync_id
=None, player_url
=None, **kwargs
):
3979 """External PO Token fetch stub"""
3982 def _is_agegated(player_response
):
3983 if traverse_obj(player_response
, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3986 reasons
= traverse_obj(player_response
, ('playabilityStatus', ('status', 'reason')))
3987 AGE_GATE_REASONS
= (
3988 'confirm your age', 'age-restricted', 'inappropriate', # reason
3989 'age_verification_required', 'age_check_required', # status
3991 return any(expected
in reason
for expected
in AGE_GATE_REASONS
for reason
in reasons
)
3994 def _is_unplayable(player_response
):
3995 return traverse_obj(player_response
, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3997 def _extract_player_response(self
, client
, video_id
, master_ytcfg
, player_ytcfg
, player_url
, initial_pr
, visitor_data
, data_sync_id
, po_token
):
3998 headers
= self
.generate_api_headers(
4000 default_client
=client
,
4001 visitor_data
=visitor_data
,
4002 session_index
=self
._extract
_session
_index
(master_ytcfg
, player_ytcfg
),
4004 self
._data
_sync
_id
_to
_delegated
_session
_id
(data_sync_id
)
4005 or self
._extract
_account
_syncid
(master_ytcfg
, initial_pr
, player_ytcfg
)
4010 'videoId': video_id
,
4013 default_pp
= traverse_obj(
4014 INNERTUBE_CLIENTS
, (_split_innertube_client(client
)[0], 'PLAYER_PARAMS', {str}
))
4015 if player_params
:= self
._configuration
_arg
('player_params', [default_pp
], casesense
=True)[0]:
4016 yt_query
['params'] = player_params
4019 yt_query
['serviceIntegrityDimensions'] = {'poToken': po_token
}
4021 sts
= self
._extract
_signature
_timestamp
(video_id
, player_url
, master_ytcfg
, fatal
=False) if player_url
else None
4022 yt_query
.update(self
._generate
_player
_context
(sts
))
4023 return self
._extract
_response
(
4024 item_id
=video_id
, ep
='player', query
=yt_query
,
4025 ytcfg
=player_ytcfg
, headers
=headers
, fatal
=True,
4026 default_client
=client
,
4027 note
='Downloading {} player API JSON'.format(client
.replace('_', ' ').strip()),
4030 def _get_requested_clients(self
, url
, smuggled_data
):
4031 requested_clients
= []
4032 excluded_clients
= []
4033 allowed_clients
= sorted(
4034 (client
for client
in INNERTUBE_CLIENTS
if client
[:1] != '_'),
4035 key
=lambda client
: INNERTUBE_CLIENTS
[client
]['priority'], reverse
=True)
4036 for client
in self
._configuration
_arg
('player_client'):
4037 if client
== 'default':
4038 requested_clients
.extend(self
._DEFAULT
_CLIENTS
)
4039 elif client
== 'all':
4040 requested_clients
.extend(allowed_clients
)
4041 elif client
.startswith('-'):
4042 excluded_clients
.append(client
[1:])
4043 elif client
not in allowed_clients
:
4044 self
.report_warning(f
'Skipping unsupported client "{client}"')
4046 requested_clients
.append(client
)
4047 if not requested_clients
:
4048 requested_clients
.extend(self
._DEFAULT
_CLIENTS
)
4049 for excluded_client
in excluded_clients
:
4050 if excluded_client
in requested_clients
:
4051 requested_clients
.remove(excluded_client
)
4052 if not requested_clients
:
4053 raise ExtractorError('No player clients have been requested', expected
=True)
4055 if smuggled_data
.get('is_music_url') or self
.is_music_url(url
):
4056 for requested_client
in requested_clients
:
4057 _
, base_client
, variant
= _split_innertube_client(requested_client
)
4058 music_client
= f
'{base_client}_music' if base_client
!= 'mweb' else 'web_music'
4059 if variant
!= 'music' and music_client
in INNERTUBE_CLIENTS
:
4060 if not INNERTUBE_CLIENTS
[music_client
]['REQUIRE_AUTH'] or self
.is_authenticated
:
4061 requested_clients
.append(music_client
)
4063 return orderedSet(requested_clients
)
4065 def _invalid_player_response(self
, pr
, video_id
):
4066 # YouTube may return a different video player response than expected.
4067 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
4068 if (pr_id
:= traverse_obj(pr
, ('videoDetails', 'videoId'))) != video_id
:
4071 def _extract_player_responses(self
, clients
, video_id
, webpage
, master_ytcfg
, smuggled_data
):
4074 initial_pr
= self
._search
_json
(
4075 self
._YT
_INITIAL
_PLAYER
_RESPONSE
_RE
, webpage
, 'initial player response', video_id
, fatal
=False)
4078 deprioritized_prs
= []
4080 if initial_pr
and not self
._invalid
_player
_response
(initial_pr
, video_id
):
4081 # Android player_response does not have microFormats which are needed for
4082 # extraction of some data. So we return the initial_pr with formats
4083 # stripped out even if not requested by the user
4084 # See: https://github.com/yt-dlp/yt-dlp/issues/501
4085 prs
.append({**initial_pr
, 'streamingData': None})
4087 all_clients
= set(clients
)
4088 clients
= clients
[::-1]
4090 def append_client(*client_names
):
4091 """ Append the first client name that exists but not already used """
4092 for client_name
in client_names
:
4093 actual_client
= _split_innertube_client(client_name
)[0]
4094 if actual_client
in INNERTUBE_CLIENTS
:
4095 if actual_client
not in all_clients
:
4096 clients
.append(client_name
)
4097 all_clients
.add(actual_client
)
4100 tried_iframe_fallback
= False
4101 player_url
= visitor_data
= data_sync_id
= None
4102 skipped_clients
= {}
4104 deprioritize_pr
= False
4105 client
, base_client
, variant
= _split_innertube_client(clients
.pop())
4106 player_ytcfg
= master_ytcfg
if client
== 'web' else {}
4107 if 'configs' not in self
._configuration
_arg
('player_skip') and client
!= 'web':
4108 player_ytcfg
= self
._download
_ytcfg
(client
, video_id
) or player_ytcfg
4110 player_url
= player_url
or self
._extract
_player
_url
(master_ytcfg
, player_ytcfg
, webpage
=webpage
)
4111 require_js_player
= self
._get
_default
_ytcfg
(client
).get('REQUIRE_JS_PLAYER')
4112 if 'js' in self
._configuration
_arg
('player_skip'):
4113 require_js_player
= False
4116 if not player_url
and not tried_iframe_fallback
and require_js_player
:
4117 player_url
= self
._download
_player
_url
(video_id
)
4118 tried_iframe_fallback
= True
4120 visitor_data
= visitor_data
or self
._extract
_visitor
_data
(master_ytcfg
, initial_pr
, player_ytcfg
)
4121 data_sync_id
= data_sync_id
or self
._extract
_data
_sync
_id
(master_ytcfg
, initial_pr
, player_ytcfg
)
4122 po_token
= self
.fetch_po_token(
4123 client
=client
, visitor_data
=visitor_data
,
4124 data_sync_id
=data_sync_id
if self
.is_authenticated
else None,
4125 player_url
=player_url
if require_js_player
else None,
4128 require_po_token
= self
._get
_default
_ytcfg
(client
).get('REQUIRE_PO_TOKEN')
4129 if not po_token
and require_po_token
:
4130 self
.report_warning(
4131 f
'No PO Token provided for {client} client, '
4132 f
'which is required for working {client} formats. '
4133 f
'You can manually pass a PO Token for this client with '
4134 f
'--extractor-args "youtube:po_token={client}+XXX"',
4136 deprioritize_pr
= True
4138 pr
= initial_pr
if client
== 'web' else None
4140 pr
= pr
or self
._extract
_player
_response
(
4142 master_ytcfg
=player_ytcfg
or master_ytcfg
,
4143 player_ytcfg
=player_ytcfg
,
4144 player_url
=player_url
,
4145 initial_pr
=initial_pr
,
4146 visitor_data
=visitor_data
,
4147 data_sync_id
=data_sync_id
,
4149 except ExtractorError
as e
:
4150 self
.report_warning(e
)
4153 if pr_id
:= self
._invalid
_player
_response
(pr
, video_id
):
4154 skipped_clients
[client
] = pr_id
4156 # Save client name for introspection later
4157 sd
= traverse_obj(pr
, ('streamingData', {dict}
)) or {}
4158 sd
[STREAMING_DATA_CLIENT_NAME
] = client
4159 sd
[STREAMING_DATA_PO_TOKEN
] = po_token
4160 for f
in traverse_obj(sd
, (('formats', 'adaptiveFormats'), ..., {dict}
)):
4161 f
[STREAMING_DATA_CLIENT_NAME
] = client
4162 f
[STREAMING_DATA_PO_TOKEN
] = po_token
4164 deprioritized_prs
.append(pr
)
4168 # EU countries require age-verification for accounts to access age-restricted videos
4169 # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
4170 if self
.is_authenticated
and self
._is
_agegated
(pr
):
4172 f
'{video_id}: This video is age-restricted and YouTube is requiring '
4173 'account age-verification; some formats may be missing', only_once
=True)
4174 # web_creator can work around the age-verification requirement
4175 # android_vr and mediaconnect may also be able to work around age-verification
4176 # tv_embedded may(?) still work around age-verification if the video is embeddable
4177 append_client('web_creator')
4179 prs
.extend(deprioritized_prs
)
4182 self
.report_warning(
4183 f
'Skipping player responses from {"/".join(skipped_clients)} clients '
4184 f
'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
4186 raise ExtractorError(
4187 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected
=True)
4189 raise ExtractorError('Failed to extract any player response')
4190 return prs
, player_url
4192 def _needs_live_processing(self
, live_status
, duration
):
4193 if (live_status
== 'is_live' and self
.get_param('live_from_start')
4194 or live_status
== 'post_live' and (duration
or 0) > 2 * 3600):
4197 def _extract_formats_and_subtitles(self
, streaming_data
, video_id
, player_url
, live_status
, duration
):
4198 CHUNK_SIZE
= 10 << 20
4199 PREFERRED_LANG_VALUE
= 10
4200 original_language
= None
4201 itags
, stream_ids
= collections
.defaultdict(set), []
4202 itag_qualities
, res_qualities
= {}, {0: None}
4204 # Normally tiny is the smallest video-only formats. But
4205 # audio-only formats with unknown quality may get tagged as tiny
4207 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
4208 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
4210 streaming_formats
= traverse_obj(streaming_data
, (..., ('formats', 'adaptiveFormats'), ...))
4211 format_types
= self
._configuration
_arg
('formats')
4212 all_formats
= 'duplicate' in format_types
4213 if self
._configuration
_arg
('include_duplicate_formats'):
4215 self
._downloader
.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
4216 'Use formats=duplicate extractor argument instead')
4218 def build_fragments(f
):
4220 'url': update_url_query(f
['url'], {
4221 'range': f
'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
4223 } for range_start
in range(0, f
['filesize'], CHUNK_SIZE
))
4225 for fmt
in streaming_formats
:
4226 if fmt
.get('targetDurationSec'):
4229 itag
= str_or_none(fmt
.get('itag'))
4230 audio_track
= fmt
.get('audioTrack') or {}
4231 stream_id
= (itag
, audio_track
.get('id'), fmt
.get('isDrc'))
4233 if stream_id
in stream_ids
:
4236 quality
= fmt
.get('quality')
4237 height
= int_or_none(fmt
.get('height'))
4238 if quality
== 'tiny' or not quality
:
4239 quality
= fmt
.get('audioQuality', '').lower() or quality
4240 # The 3gp format (17) in android client has a quality of "small",
4241 # but is actually worse than other formats
4246 itag_qualities
[itag
] = quality
4248 res_qualities
[height
] = quality
4250 is_default
= audio_track
.get('audioIsDefault')
4251 is_descriptive
= 'descriptive' in (audio_track
.get('displayName') or '').lower()
4252 language_code
= audio_track
.get('id', '').split('.')[0]
4253 if language_code
and is_default
:
4254 original_language
= language_code
4256 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
4257 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
4258 # number of fragment that would subsequently requested with (`&sq=N`)
4259 if fmt
.get('type') == 'FORMAT_STREAM_TYPE_OTF':
4262 fmt_url
= fmt
.get('url')
4264 sc
= urllib
.parse
.parse_qs(fmt
.get('signatureCipher'))
4265 fmt_url
= url_or_none(try_get(sc
, lambda x
: x
['url'][0]))
4266 encrypted_sig
= try_get(sc
, lambda x
: x
['s'][0])
4267 if not all((sc
, fmt_url
, player_url
, encrypted_sig
)):
4270 fmt_url
+= '&{}={}'.format(
4271 traverse_obj(sc
, ('sp', -1)) or 'signature',
4272 self
._decrypt
_signature
(encrypted_sig
, video_id
, player_url
),
4274 except ExtractorError
as e
:
4275 self
.report_warning('Signature extraction failed: Some formats may be missing',
4276 video_id
=video_id
, only_once
=True)
4277 self
.write_debug(e
, only_once
=True)
4280 query
= parse_qs(fmt_url
)
4283 decrypt_nsig
= self
._cached
(self
._decrypt
_nsig
, 'nsig', query
['n'][0])
4284 fmt_url
= update_url_query(fmt_url
, {
4285 'n': decrypt_nsig(query
['n'][0], video_id
, player_url
),
4287 except ExtractorError
as e
:
4289 if isinstance(e
, JSInterpreter
.Exception):
4290 phantomjs_hint
= (f
' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
4291 f
'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
4293 self
.report_warning(
4294 f
'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
4295 f
' n = {query["n"][0]} ; player = {player_url}', video_id
=video_id
, only_once
=True)
4296 self
.write_debug(e
, only_once
=True)
4298 self
.report_warning(
4299 'Cannot decrypt nsig without player_url: Some formats may be missing',
4300 video_id
=video_id
, only_once
=True)
4303 tbr
= float_or_none(fmt
.get('averageBitrate') or fmt
.get('bitrate'), 1000)
4304 format_duration
= traverse_obj(fmt
, ('approxDurationMs', {float_or_none(scale
=1000)}))
4305 # Some formats may have much smaller duration than others (possibly damaged during encoding)
4306 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
4307 # Make sure to avoid false positives with small duration differences.
4308 # E.g. __2ABJjxzNo, ySuUZEjARPY
4309 is_damaged
= try_call(lambda: format_duration
< duration
// 2)
4311 self
.report_warning(
4312 f
'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once
=True)
4314 client_name
= fmt
[STREAMING_DATA_CLIENT_NAME
]
4315 po_token
= fmt
.get(STREAMING_DATA_PO_TOKEN
)
4318 fmt_url
= update_url_query(fmt_url
, {'pot': po_token
})
4320 # Clients that require PO Token return videoplayback URLs that may return 403
4321 is_broken
= (not po_token
and self
._get
_default
_ytcfg
(client_name
).get('REQUIRE_PO_TOKEN'))
4323 self
.report_warning(
4324 f
'{video_id}: {client_name} client formats require a PO Token which was not provided. '
4325 'They will be deprioritized as they may yield HTTP Error 403', only_once
=True)
4327 name
= fmt
.get('qualityLabel') or quality
.replace('audio_quality_', '') or ''
4328 fps
= int_or_none(fmt
.get('fps')) or 0
4330 'asr': int_or_none(fmt
.get('audioSampleRate')),
4331 'filesize': int_or_none(fmt
.get('contentLength')),
4332 'format_id': f
'{itag}{"-drc" if fmt.get("isDrc") else ""}',
4333 'format_note': join_nonempty(
4334 join_nonempty(audio_track
.get('displayName'), is_default
and ' (default)', delim
=''),
4335 name
, fmt
.get('isDrc') and 'DRC',
4336 try_get(fmt
, lambda x
: x
['projectionType'].replace('RECTANGULAR', '').lower()),
4337 try_get(fmt
, lambda x
: x
['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
4338 is_damaged
and 'DAMAGED', is_broken
and 'BROKEN',
4339 (self
.get_param('verbose') or all_formats
) and short_client_name(client_name
),
4341 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
4342 'source_preference': (-5 if itag
== '22' else -1) + (100 if 'Premium' in name
else 0),
4343 'fps': fps
if fps
> 1 else None, # For some formats, fps is wrongly returned as 1
4344 'audio_channels': fmt
.get('audioChannels'),
4346 'quality': q(quality
) - bool(fmt
.get('isDrc')) / 2,
4347 'has_drm': bool(fmt
.get('drmFamilies')),
4349 'filesize_approx': filesize_from_tbr(tbr
, format_duration
),
4351 'width': int_or_none(fmt
.get('width')),
4352 'language': join_nonempty(language_code
, 'desc' if is_descriptive
else '') or None,
4353 'language_preference': PREFERRED_LANG_VALUE
if is_default
else -10 if is_descriptive
else -1,
4354 # Strictly de-prioritize broken, damaged and 3gp formats
4355 'preference': -20 if is_broken
else -10 if is_damaged
else -2 if itag
== '17' else None,
4357 mime_mobj
= re
.match(
4358 r
'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt
.get('mimeType') or '')
4360 dct
['ext'] = mimetype2ext(mime_mobj
.group(1))
4361 dct
.update(parse_codecs(mime_mobj
.group(2)))
4363 itags
[itag
].add(('https', dct
.get('language')))
4364 stream_ids
.append(stream_id
)
4365 single_stream
= 'none' in (dct
.get('acodec'), dct
.get('vcodec'))
4366 if single_stream
and dct
.get('ext'):
4367 dct
['container'] = dct
['ext'] + '_dash'
4369 if (all_formats
or 'dashy' in format_types
) and dct
['filesize']:
4372 'format_id': f
'{dct["format_id"]}-dashy' if all_formats
else dct
['format_id'],
4373 'protocol': 'http_dash_segments',
4374 'fragments': build_fragments(dct
),
4376 if all_formats
or 'dashy' not in format_types
:
4377 dct
['downloader_options'] = {'http_chunk_size': CHUNK_SIZE
}
4380 needs_live_processing
= self
._needs
_live
_processing
(live_status
, duration
)
4381 skip_bad_formats
= 'incomplete' not in format_types
4382 if self
._configuration
_arg
('include_incomplete_formats'):
4383 skip_bad_formats
= False
4384 self
._downloader
.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
4385 'Use formats=incomplete extractor argument instead')
4387 skip_manifests
= set(self
._configuration
_arg
('skip'))
4388 if (not self
.get_param('youtube_include_hls_manifest', True)
4389 or needs_live_processing
== 'is_live' # These will be filtered out by YoutubeDL anyway
4390 or needs_live_processing
and skip_bad_formats
):
4391 skip_manifests
.add('hls')
4393 if not self
.get_param('youtube_include_dash_manifest', True):
4394 skip_manifests
.add('dash')
4395 if self
._configuration
_arg
('include_live_dash'):
4396 self
._downloader
.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
4397 'Use formats=incomplete extractor argument instead')
4398 elif skip_bad_formats
and live_status
== 'is_live' and needs_live_processing
!= 'is_live':
4399 skip_manifests
.add('dash')
4401 def process_manifest_format(f
, proto
, client_name
, itag
, po_token
):
4402 key
= (proto
, f
.get('language'))
4403 if not all_formats
and key
in itags
[itag
]:
4405 itags
[itag
].add(key
)
4407 if f
.get('source_preference') is None:
4408 f
['source_preference'] = -1
4410 # Clients that require PO Token return videoplayback URLs that may return 403
4411 # hls does not currently require PO Token
4412 if (not po_token
and self
._get
_default
_ytcfg
(client_name
).get('REQUIRE_PO_TOKEN')) and proto
!= 'hls':
4413 self
.report_warning(
4414 f
'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
4415 'They will be deprioritized as they may yield HTTP Error 403', only_once
=True)
4416 f
['format_note'] = join_nonempty(f
.get('format_note'), 'BROKEN', delim
=' ')
4417 f
['source_preference'] -= 20
4419 if itag
and all_formats
:
4420 f
['format_id'] = f
'{itag}-{proto}'
4421 elif any(p
!= proto
for p
, _
in itags
[itag
]):
4422 f
['format_id'] = f
'{itag}-{proto}'
4424 f
['format_id'] = itag
4426 if original_language
and f
.get('language') == original_language
:
4427 f
['format_note'] = join_nonempty(f
.get('format_note'), '(default)', delim
=' ')
4428 f
['language_preference'] = PREFERRED_LANG_VALUE
4430 if itag
in ('616', '235'):
4431 f
['format_note'] = join_nonempty(f
.get('format_note'), 'Premium', delim
=' ')
4432 f
['source_preference'] += 100
4434 f
['quality'] = q(itag_qualities
.get(try_get(f
, lambda f
: f
['format_id'].split('-')[0]), -1))
4435 if f
['quality'] == -1 and f
.get('height'):
4436 f
['quality'] = q(res_qualities
[min(res_qualities
, key
=lambda x
: abs(x
- f
['height']))])
4437 if self
.get_param('verbose') or all_formats
:
4438 f
['format_note'] = join_nonempty(
4439 f
.get('format_note'), short_client_name(client_name
), delim
=', ')
4440 if f
.get('fps') and f
['fps'] <= 1:
4443 if proto
== 'hls' and f
.get('has_drm'):
4444 f
['has_drm'] = 'maybe'
4445 f
['source_preference'] -= 5
4449 for sd
in streaming_data
:
4450 client_name
= sd
[STREAMING_DATA_CLIENT_NAME
]
4451 po_token
= sd
.get(STREAMING_DATA_PO_TOKEN
)
4452 hls_manifest_url
= 'hls' not in skip_manifests
and sd
.get('hlsManifestUrl')
4453 if hls_manifest_url
:
4455 hls_manifest_url
= hls_manifest_url
.rstrip('/') + f
'/pot/{po_token}'
4456 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
4457 hls_manifest_url
, video_id
, 'mp4', fatal
=False, live
=live_status
== 'is_live')
4458 subtitles
= self
._merge
_subtitles
(subs
, subtitles
)
4460 if process_manifest_format(f
, 'hls', client_name
, self
._search
_regex
(
4461 r
'/itag/(\d+)', f
['url'], 'itag', default
=None), po_token
):
4464 dash_manifest_url
= 'dash' not in skip_manifests
and sd
.get('dashManifestUrl')
4465 if dash_manifest_url
:
4467 dash_manifest_url
= dash_manifest_url
.rstrip('/') + f
'/pot/{po_token}'
4468 formats
, subs
= self
._extract
_mpd
_formats
_and
_subtitles
(dash_manifest_url
, video_id
, fatal
=False)
4469 subtitles
= self
._merge
_subtitles
(subs
, subtitles
) # Prioritize HLS subs over DASH
4471 if process_manifest_format(f
, 'dash', client_name
, f
['format_id'], po_token
):
4472 f
['filesize'] = int_or_none(self
._search
_regex
(
4473 r
'/clen/(\d+)', f
.get('fragment_base_url') or f
['url'], 'file size', default
=None))
4474 if needs_live_processing
:
4475 f
['is_from_start'] = True
4480 def _extract_storyboard(self
, player_responses
, duration
):
4482 player_responses
, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default
='').split('|')[::-1]
4483 base_url
= url_or_none(urljoin('https://i.ytimg.com/', spec
.pop() or None))
4487 for i
, args
in enumerate(spec
):
4488 args
= args
.split('#')
4489 counts
= list(map(int_or_none
, args
[:5]))
4490 if len(args
) != 8 or not all(counts
):
4491 self
.report_warning(f
'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4493 width
, height
, frame_count
, cols
, rows
= counts
4496 url
= base_url
.replace('$L', str(L
- i
)).replace('$N', N
) + f
'&sigh={sigh}'
4497 fragment_count
= frame_count
/ (cols
* rows
)
4498 fragment_duration
= duration
/ fragment_count
4500 'format_id': f
'sb{i}',
4501 'format_note': 'storyboard',
4503 'protocol': 'mhtml',
4509 'fps': frame_count
/ duration
,
4513 'url': url
.replace('$M', str(j
)),
4514 'duration': min(fragment_duration
, duration
- (j
* fragment_duration
)),
4515 } for j
in range(math
.ceil(fragment_count
))],
4518 def _download_player_responses(self
, url
, smuggled_data
, video_id
, webpage_url
):
4520 if 'webpage' not in self
._configuration
_arg
('player_skip'):
4521 query
= {'bpctr': '9999999999', 'has_verified': '1'}
4522 pp
= self
._configuration
_arg
('player_params', [None], casesense
=True)[0]
4525 webpage
= self
._download
_webpage
(
4526 webpage_url
, video_id
, fatal
=False, query
=query
, headers
=self
._generate
_webpage
_headers
())
4528 master_ytcfg
= self
.extract_ytcfg(video_id
, webpage
) or self
._get
_default
_ytcfg
()
4530 player_responses
, player_url
= self
._extract
_player
_responses
(
4531 self
._get
_requested
_clients
(url
, smuggled_data
),
4532 video_id
, webpage
, master_ytcfg
, smuggled_data
)
4534 return webpage
, master_ytcfg
, player_responses
, player_url
4536 def _list_formats(self
, video_id
, microformats
, video_details
, player_responses
, player_url
, duration
=None):
4537 live_broadcast_details
= traverse_obj(microformats
, (..., 'liveBroadcastDetails'))
4538 is_live
= get_first(video_details
, 'isLive')
4540 is_live
= get_first(live_broadcast_details
, 'isLiveNow')
4541 live_content
= get_first(video_details
, 'isLiveContent')
4542 is_upcoming
= get_first(video_details
, 'isUpcoming')
4543 post_live
= get_first(video_details
, 'isPostLiveDvr')
4544 live_status
= ('post_live' if post_live
4545 else 'is_live' if is_live
4546 else 'is_upcoming' if is_upcoming
4547 else 'was_live' if live_content
4548 else 'not_live' if False in (is_live
, live_content
)
4550 streaming_data
= traverse_obj(player_responses
, (..., 'streamingData'))
4551 *formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(streaming_data
, video_id
, player_url
, live_status
, duration
)
4552 if all(f
.get('has_drm') for f
in formats
):
4553 # If there are no formats that definitely don't have DRM, all have DRM
4557 return live_broadcast_details
, live_status
, streaming_data
, formats
, subtitles
4559 def _real_extract(self
, url
):
4560 url
, smuggled_data
= unsmuggle_url(url
, {})
4561 video_id
= self
._match
_id
(url
)
4563 base_url
= self
.http_scheme() + '//www.youtube.com/'
4564 webpage_url
= base_url
+ 'watch?v=' + video_id
4566 webpage
, master_ytcfg
, player_responses
, player_url
= self
._download
_player
_responses
(url
, smuggled_data
, video_id
, webpage_url
)
4568 playability_statuses
= traverse_obj(
4569 player_responses
, (..., 'playabilityStatus'), expected_type
=dict)
4571 trailer_video_id
= get_first(
4572 playability_statuses
,
4573 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4575 if trailer_video_id
:
4576 return self
.url_result(
4577 trailer_video_id
, self
.ie_key(), trailer_video_id
)
4579 search_meta
= ((lambda x
: self
._html
_search
_meta
(x
, webpage
, default
=None))
4580 if webpage
else (lambda x
: None))
4582 video_details
= traverse_obj(player_responses
, (..., 'videoDetails'), expected_type
=dict)
4583 microformats
= traverse_obj(
4584 player_responses
, (..., 'microformat', 'playerMicroformatRenderer'),
4587 translated_title
= self
._get
_text
(microformats
, (..., 'title'))
4588 video_title
= (self
._preferred
_lang
and translated_title
4589 or get_first(video_details
, 'title') # primary
4591 or search_meta(['og:title', 'twitter:title', 'title']))
4592 translated_description
= self
._get
_text
(microformats
, (..., 'description'))
4593 original_description
= get_first(video_details
, 'shortDescription')
4594 video_description
= (
4595 self
._preferred
_lang
and translated_description
4596 # If original description is blank, it will be an empty string.
4597 # Do not prefer translated description in this case.
4598 or original_description
if original_description
is not None else translated_description
)
4600 multifeed_metadata_list
= get_first(
4602 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4604 if multifeed_metadata_list
and not smuggled_data
.get('force_singlefeed'):
4605 if self
.get_param('noplaylist'):
4606 self
.to_screen(f
'Downloading just video {video_id} because of --no-playlist')
4610 for feed
in multifeed_metadata_list
.split(','):
4611 # Unquote should take place before split on comma (,) since textual
4612 # fields may contain comma as well (see
4613 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4614 feed_data
= urllib
.parse
.parse_qs(
4615 urllib
.parse
.unquote_plus(feed
))
4617 def feed_entry(name
):
4619 feed_data
, lambda x
: x
[name
][0], str)
4621 feed_id
= feed_entry('id')
4624 feed_title
= feed_entry('title')
4627 title
+= f
' ({feed_title})'
4629 '_type': 'url_transparent',
4630 'ie_key': 'Youtube',
4632 '{}watch?v={}'.format(base_url
, feed_data
['id'][0]),
4633 {'force_singlefeed': True}),
4636 feed_ids
.append(feed_id
)
4638 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4639 ', '.join(feed_ids
), video_id
))
4640 return self
.playlist_result(
4641 entries
, video_id
, video_title
, video_description
)
4643 duration
= (int_or_none(get_first(video_details
, 'lengthSeconds'))
4644 or int_or_none(get_first(microformats
, 'lengthSeconds'))
4645 or parse_duration(search_meta('duration')) or None)
4647 live_broadcast_details
, live_status
, streaming_data
, formats
, automatic_captions
= \
4648 self
._list
_formats
(video_id
, microformats
, video_details
, player_responses
, player_url
, duration
)
4649 if live_status
== 'post_live':
4650 self
.write_debug(f
'{video_id}: Video is in Post-Live Manifestless mode')
4653 if not self
.get_param('allow_unplayable_formats') and traverse_obj(streaming_data
, (..., 'licenseInfos')):
4654 self
.report_drm(video_id
)
4656 playability_statuses
,
4657 ('errorScreen', 'playerErrorMessageRenderer'), expected_type
=dict) or {}
4658 reason
= self
._get
_text
(pemr
, 'reason') or get_first(playability_statuses
, 'reason')
4659 subreason
= clean_html(self
._get
_text
(pemr
, 'subreason') or '')
4661 if subreason
== 'The uploader has not made this video available in your country.':
4662 countries
= get_first(microformats
, 'availableCountries')
4664 regions_allowed
= search_meta('regionsAllowed')
4665 countries
= regions_allowed
.split(',') if regions_allowed
else None
4666 self
.raise_geo_restricted(subreason
, countries
, metadata_available
=True)
4667 reason
+= f
'. {subreason}'
4669 self
.raise_no_formats(reason
, expected
=True)
4671 keywords
= get_first(video_details
, 'keywords', expected_type
=list) or []
4672 if not keywords
and webpage
:
4674 unescapeHTML(m
.group('content'))
4675 for m
in re
.finditer(self
._meta
_regex
('og:video:tag'), webpage
)]
4676 for keyword
in keywords
:
4677 if keyword
.startswith('yt:stretch='):
4678 mobj
= re
.search(r
'(\d+)\s*:\s*(\d+)', keyword
)
4680 # NB: float is intentional for forcing float division
4681 w
, h
= (float(v
) for v
in mobj
.groups())
4685 if f
.get('vcodec') != 'none':
4686 f
['stretched_ratio'] = ratio
4688 thumbnails
= self
._extract
_thumbnails
((video_details
, microformats
), (..., ..., 'thumbnail'))
4689 thumbnail_url
= search_meta(['og:image', 'twitter:image'])
4692 'url': thumbnail_url
,
4694 original_thumbnails
= thumbnails
.copy()
4696 # The best resolution thumbnails sometimes does not appear in the webpage
4697 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4698 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4700 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4701 # in resolution, these are not the custom thumbnail. So de-prioritize them
4702 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4703 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4705 n_thumbnail_names
= len(thumbnail_names
)
4707 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4708 video_id
=video_id
, name
=name
, ext
=ext
,
4709 webp
='_webp' if ext
== 'webp' else '', live
='_live' if live_status
== 'is_live' else ''),
4710 } for name
in thumbnail_names
for ext
in ('webp', 'jpg'))
4711 for thumb
in thumbnails
:
4712 i
= next((i
for i
, t
in enumerate(thumbnail_names
) if f
'/{video_id}/{t}' in thumb
['url']), n_thumbnail_names
)
4713 thumb
['preference'] = (0 if '.webp' in thumb
['url'] else -1) - (2 * i
)
4714 self
._remove
_duplicate
_formats
(thumbnails
)
4715 self
._downloader
._sort
_thumbnails
(original_thumbnails
)
4717 category
= get_first(microformats
, 'category') or search_meta('genre')
4718 channel_id
= self
.ucid_or_none(str_or_none(
4719 get_first(video_details
, 'channelId')
4720 or get_first(microformats
, 'externalChannelId')
4721 or search_meta('channelId')))
4722 owner_profile_url
= get_first(microformats
, 'ownerProfileUrl')
4724 live_start_time
= parse_iso8601(get_first(live_broadcast_details
, 'startTimestamp'))
4725 live_end_time
= parse_iso8601(get_first(live_broadcast_details
, 'endTimestamp'))
4726 if not duration
and live_end_time
and live_start_time
:
4727 duration
= live_end_time
- live_start_time
4729 needs_live_processing
= self
._needs
_live
_processing
(live_status
, duration
)
4731 def is_bad_format(fmt
):
4732 if needs_live_processing
and not fmt
.get('is_from_start'):
4734 elif (live_status
== 'is_live' and needs_live_processing
!= 'is_live'
4735 and fmt
.get('protocol') == 'http_dash_segments'):
4738 for fmt
in filter(is_bad_format
, formats
):
4739 fmt
['preference'] = (fmt
.get('preference') or -1) - 10
4740 fmt
['format_note'] = join_nonempty(fmt
.get('format_note'), '(Last 2 hours)', delim
=' ')
4742 if needs_live_processing
:
4743 self
._prepare
_live
_from
_start
_formats
(
4744 formats
, video_id
, live_start_time
, url
, webpage_url
, smuggled_data
, live_status
== 'is_live')
4746 formats
.extend(self
._extract
_storyboard
(player_responses
, duration
))
4748 channel_handle
= self
.handle_from_url(owner_profile_url
)
4752 'title': video_title
,
4754 'thumbnails': thumbnails
,
4755 # The best thumbnail that we are sure exists. Prevents unnecessary
4756 # URL checking if user don't care about getting the best possible thumbnail
4757 'thumbnail': traverse_obj(original_thumbnails
, (-1, 'url')),
4758 'description': video_description
,
4759 'channel_id': channel_id
,
4760 'channel_url': format_field(channel_id
, None, 'https://www.youtube.com/channel/%s', default
=None),
4761 'duration': duration
,
4762 'view_count': int_or_none(
4763 get_first((video_details
, microformats
), (..., 'viewCount'))
4764 or search_meta('interactionCount')),
4765 'average_rating': float_or_none(get_first(video_details
, 'averageRating')),
4766 'age_limit': 18 if (
4767 get_first(microformats
, 'isFamilySafe') is False
4768 or search_meta('isFamilyFriendly') == 'false'
4769 or search_meta('og:restrictions:age') == '18+') else 0,
4770 'webpage_url': webpage_url
,
4771 'categories': [category
] if category
else None,
4773 'playable_in_embed': get_first(playability_statuses
, 'playableInEmbed'),
4774 'live_status': live_status
,
4775 'release_timestamp': live_start_time
,
4776 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4777 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
4781 pctr
= traverse_obj(player_responses
, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type
=dict)
4783 def get_lang_code(track
):
4784 return (remove_start(track
.get('vssId') or '', '.').replace('.', '-')
4785 or track
.get('languageCode'))
4787 # Converted into dicts to remove duplicates
4789 get_lang_code(sub
): sub
4790 for sub
in traverse_obj(pctr
, (..., 'captionTracks', ...))}
4791 translation_languages
= {
4792 lang
.get('languageCode'): self
._get
_text
(lang
.get('languageName'), max_runs
=1)
4793 for lang
in traverse_obj(pctr
, (..., 'translationLanguages', ...))}
4795 def process_language(container
, base_url
, lang_code
, sub_name
, query
):
4796 lang_subs
= container
.setdefault(lang_code
, [])
4797 for fmt
in self
._SUBTITLE
_FORMATS
:
4803 'url': urljoin('https://www.youtube.com', update_url_query(base_url
, query
)),
4807 # NB: Constructing the full subtitle dictionary is slow
4808 get_translated_subs
= 'translated_subs' not in self
._configuration
_arg
('skip') and (
4809 self
.get_param('writeautomaticsub', False) or self
.get_param('listsubtitles'))
4810 for lang_code
, caption_track
in captions
.items():
4811 base_url
= caption_track
.get('baseUrl')
4812 orig_lang
= parse_qs(base_url
).get('lang', [None])[-1]
4815 lang_name
= self
._get
_text
(caption_track
, 'name', max_runs
=1)
4816 if caption_track
.get('kind') != 'asr':
4820 subtitles
, base_url
, lang_code
, lang_name
, {})
4821 if not caption_track
.get('isTranslatable'):
4823 for trans_code
, trans_name
in translation_languages
.items():
4826 orig_trans_code
= trans_code
4827 if caption_track
.get('kind') != 'asr' and trans_code
!= 'und':
4828 if not get_translated_subs
:
4830 trans_code
+= f
'-{lang_code}'
4831 trans_name
+= format_field(lang_name
, None, ' from %s')
4832 if lang_code
== f
'a-{orig_trans_code}':
4833 # Set audio language based on original subtitles
4835 if f
.get('acodec') != 'none' and not f
.get('language'):
4836 f
['language'] = orig_trans_code
4837 # Add an "-orig" label to the original language so that it can be distinguished.
4838 # The subs are returned without "-orig" as well for compatibility
4840 automatic_captions
, base_url
, f
'{trans_code}-orig', f
'{trans_name} (Original)', {})
4841 # Setting tlang=lang returns damaged subtitles.
4842 process_language(automatic_captions
, base_url
, trans_code
, trans_name
,
4843 {} if orig_lang
== orig_trans_code
else {'tlang': trans_code
})
4845 info
['automatic_captions'] = automatic_captions
4846 info
['subtitles'] = subtitles
4848 parsed_url
= urllib
.parse
.urlparse(url
)
4849 for component
in [parsed_url
.fragment
, parsed_url
.query
]:
4850 query
= urllib
.parse
.parse_qs(component
)
4851 for k
, v
in query
.items():
4852 for d_k
, s_ks
in [('start', ('start', 't')), ('end', ('end',))]:
4854 if d_k
not in info
and k
in s_ks
:
4855 info
[d_k
] = parse_duration(v
[0])
4857 # Youtube Music Auto-generated description
4858 if (video_description
or '').strip().endswith('\nAuto-generated by YouTube.'):
4859 # XXX: Causes catastrophic backtracking if description has "·"
4860 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4861 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4862 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4865 (?=(?P<track>[^\n·]+))(?P=track)·
4866 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4867 (?=(?P<album>[^\n]+))(?P=album)\n
4868 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4869 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4871 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4872 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4873 ''', video_description
)
4875 release_year
= mobj
.group('release_year')
4876 release_date
= mobj
.group('release_date')
4878 release_date
= release_date
.replace('-', '')
4879 if not release_year
:
4880 release_year
= release_date
[:4]
4882 'album': mobj
.group('album'.strip()),
4883 'artists': ([a
] if (a
:= mobj
.group('clean_artist'))
4884 else [a
.strip() for a
in mobj
.group('artist').split('·')]),
4885 'track': mobj
.group('track').strip(),
4886 'release_date': release_date
,
4887 'release_year': int_or_none(release_year
),
4892 initial_data
= self
.extract_yt_initial_data(video_id
, webpage
, fatal
=False)
4893 if not traverse_obj(initial_data
, 'contents'):
4894 self
.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4896 if not initial_data
:
4897 query
= {'videoId': video_id
}
4898 query
.update(self
._get
_checkok
_params
())
4899 initial_data
= self
._extract
_response
(
4900 item_id
=video_id
, ep
='next', fatal
=False,
4901 ytcfg
=master_ytcfg
, query
=query
, check_get_keys
='contents',
4902 headers
=self
.generate_api_headers(ytcfg
=master_ytcfg
),
4903 note
='Downloading initial data API JSON')
4905 COMMENTS_SECTION_IDS
= ('comment-item-section', 'engagement-panel-comments-section')
4906 info
['comment_count'] = traverse_obj(initial_data
, (
4907 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4908 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4910 'engagementPanels', lambda _
, v
: v
['engagementPanelSectionListRenderer']['panelIdentifier'] in COMMENTS_SECTION_IDS
,
4911 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4912 ), expected_type
=self
._get
_count
, get_all
=False)
4914 try: # This will error if there is no livechat
4915 initial_data
['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4916 except (KeyError, IndexError, TypeError):
4919 info
.setdefault('subtitles', {})['live_chat'] = [{
4920 # url is needed to set cookies
4921 'url': f
'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4922 'video_id': video_id
,
4924 'protocol': ('youtube_live_chat' if live_status
in ('is_live', 'is_upcoming')
4925 else 'youtube_live_chat_replay'),
4929 info
['chapters'] = (
4930 self
._extract
_chapters
_from
_json
(initial_data
, duration
)
4931 or self
._extract
_chapters
_from
_engagement
_panel
(initial_data
, duration
)
4932 or self
._extract
_chapters
_from
_description
(video_description
, duration
)
4935 info
['heatmap'] = self
._extract
_heatmap
(initial_data
)
4937 contents
= traverse_obj(
4938 initial_data
, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4939 expected_type
=list, default
=[])
4941 vpir
= get_first(contents
, 'videoPrimaryInfoRenderer')
4943 stl
= vpir
.get('superTitleLink')
4945 stl
= self
._get
_text
(stl
)
4948 lambda x
: x
['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4949 info
['location'] = stl
4951 mobj
= re
.search(r
'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl
)
4954 'series': mobj
.group(1),
4955 'season_number': int(mobj
.group(2)),
4956 'episode_number': int(mobj
.group(3)),
4958 for tlb
in (try_get(
4960 lambda x
: x
['videoActions']['menuRenderer']['topLevelButtons'],
4964 tlb
, ('toggleButtonRenderer', ...),
4965 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4967 for getter
, regex
in [(
4968 lambda x
: x
['defaultText']['accessibility']['accessibilityData'],
4969 r
'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4970 lambda x
: x
['accessibility'],
4971 lambda x
: x
['accessibilityData']['accessibilityData'],
4972 ], r
'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4973 label
= (try_get(tbr
, getter
, dict) or {}).get('label')
4975 mobj
= re
.match(regex
, label
)
4977 info
[mobj
.group('type') + '_count'] = str_to_int(mobj
.group('count'))
4980 info
['like_count'] = traverse_obj(vpir
, (
4981 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4982 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4983 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4984 'buttonViewModel', 'accessibilityText', {parse_count}
), get_all
=False)
4986 vcr
= traverse_obj(vpir
, ('viewCount', 'videoViewCountRenderer'))
4988 vc
= self
._get
_count
(vcr
, 'viewCount')
4989 # Upcoming premieres with waiting count are treated as live here
4990 if vcr
.get('isLive'):
4991 info
['concurrent_view_count'] = vc
4992 elif info
.get('view_count') is None:
4993 info
['view_count'] = vc
4995 vsir
= get_first(contents
, 'videoSecondaryInfoRenderer')
4997 vor
= traverse_obj(vsir
, ('owner', 'videoOwnerRenderer'))
4999 'channel': self
._get
_text
(vor
, 'title'),
5000 'channel_follower_count': self
._get
_count
(vor
, 'subscriberCountText')})
5002 if not channel_handle
:
5003 channel_handle
= self
.handle_from_url(
5005 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
5006 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
5007 {str}
), get_all
=False))
5011 lambda x
: x
['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
5013 multiple_songs
= False
5015 if try_get(row
, lambda x
: x
['metadataRowRenderer']['hasDividerLine']) is True:
5016 multiple_songs
= True
5019 mrr
= row
.get('metadataRowRenderer') or {}
5020 mrr_title
= mrr
.get('title')
5023 mrr_title
= self
._get
_text
(mrr
, 'title')
5024 mrr_contents_text
= self
._get
_text
(mrr
, ('contents', 0))
5025 if mrr_title
== 'License':
5026 info
['license'] = mrr_contents_text
5027 elif not multiple_songs
:
5028 if mrr_title
== 'Album':
5029 info
['album'] = mrr_contents_text
5030 elif mrr_title
== 'Artist':
5031 info
['artists'] = [mrr_contents_text
] if mrr_contents_text
else None
5032 elif mrr_title
== 'Song':
5033 info
['track'] = mrr_contents_text
5034 owner_badges
= self
._extract
_badges
(traverse_obj(vsir
, ('owner', 'videoOwnerRenderer', 'badges')))
5035 if self
._has
_badge
(owner_badges
, BadgeType
.VERIFIED
):
5036 info
['channel_is_verified'] = True
5039 'uploader': info
.get('channel'),
5040 'uploader_id': channel_handle
,
5041 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
5044 # We only want timestamp IF it has time precision AND a timezone
5045 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
5047 parse_iso8601(get_first(microformats
, 'uploadDate'), timezone
=NO_DEFAULT
)
5048 or parse_iso8601(search_meta('uploadDate'), timezone
=NO_DEFAULT
)
5051 dt
.datetime
.fromtimestamp(timestamp
, dt
.timezone
.utc
).strftime('%Y%m%d') if timestamp
else
5053 unified_strdate(get_first(microformats
, 'uploadDate'))
5054 or unified_strdate(search_meta('uploadDate'))
5057 # In the case we cannot get the timestamp:
5058 # The upload date for scheduled, live and past live streams / premieres in microformats
5059 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
5060 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
5061 if not upload_date
or (not timestamp
and live_status
in ('not_live', None)):
5062 # this should be in UTC, as configured in the cookie/client context
5063 upload_date
= strftime_or_none(
5064 self
._parse
_time
_text
(self
._get
_text
(vpir
, 'dateText'))) or upload_date
5066 info
['upload_date'] = upload_date
5067 info
['timestamp'] = timestamp
5069 if upload_date
and live_status
not in ('is_live', 'post_live', 'is_upcoming'):
5070 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
5071 upload_datetime
= datetime_from_str(upload_date
).replace(tzinfo
=dt
.timezone
.utc
)
5072 if upload_datetime
>= datetime_from_str('today-2days'):
5073 for fmt
in info
['formats']:
5074 if fmt
.get('protocol') == 'm3u8_native':
5075 fmt
['__needs_testing'] = True
5077 for s_k
, d_k
in [('artists', 'creators'), ('track', 'alt_title')]:
5082 badges
= self
._extract
_badges
(traverse_obj(vpir
, 'badges'))
5084 is_private
= (self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PRIVATE
)
5085 or get_first(video_details
, 'isPrivate', expected_type
=bool))
5087 info
['availability'] = (
5088 'public' if self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PUBLIC
)
5089 else self
._availability
(
5090 is_private
=is_private
,
5092 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PREMIUM
)
5093 or False if initial_data
and is_private
is not None else None),
5094 needs_subscription
=(
5095 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_SUBSCRIPTION
)
5096 or False if initial_data
and is_private
is not None else None),
5097 needs_auth
=info
['age_limit'] >= 18,
5098 is_unlisted
=None if is_private
is None else (
5099 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_UNLISTED
)
5100 or get_first(microformats
, 'isUnlisted', expected_type
=bool))))
5102 info
['__post_extractor'] = self
.extract_comments(master_ytcfg
, video_id
, contents
, webpage
)
5104 self
.mark_watched(video_id
, player_responses
)
5109 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor
):
5111 def passthrough_smuggled_data(func
):
5112 def _smuggle(info
, smuggled_data
):
5113 if info
.get('_type') not in ('url', 'url_transparent'):
5115 if smuggled_data
.get('is_music_url'):
5116 parsed_url
= urllib
.parse
.urlparse(info
['url'])
5117 if parsed_url
.netloc
in ('www.youtube.com', 'music.youtube.com'):
5118 smuggled_data
.pop('is_music_url')
5119 info
['url'] = urllib
.parse
.urlunparse(parsed_url
._replace
(netloc
='music.youtube.com'))
5121 info
['url'] = smuggle_url(info
['url'], smuggled_data
)
5124 @functools.wraps(func
)
5125 def wrapper(self
, url
):
5126 url
, smuggled_data
= unsmuggle_url(url
, {})
5127 if self
.is_music_url(url
):
5128 smuggled_data
['is_music_url'] = True
5129 info_dict
= func(self
, url
, smuggled_data
)
5131 _smuggle(info_dict
, smuggled_data
)
5132 if info_dict
.get('entries'):
5133 info_dict
['entries'] = (_smuggle(i
, smuggled_data
.copy()) for i
in info_dict
['entries'])
5138 def _extract_basic_item_renderer(item
):
5139 # Modified from _extract_grid_item_renderer
5140 known_basic_renderers
= (
5141 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
5143 for key
, renderer
in item
.items():
5144 if not isinstance(renderer
, dict):
5146 elif key
in known_basic_renderers
:
5148 elif key
.startswith('grid') and key
.endswith('Renderer'):
5151 def _extract_channel_renderer(self
, renderer
):
5152 channel_id
= self
.ucid_or_none(renderer
['channelId'])
5153 title
= self
._get
_text
(renderer
, 'title')
5154 channel_url
= format_field(channel_id
, None, 'https://www.youtube.com/channel/%s', default
=None)
5155 channel_handle
= self
.handle_from_url(
5156 traverse_obj(renderer
, (
5157 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
5158 ('browseEndpoint', 'canonicalBaseUrl')),
5159 {str}
), get_all
=False))
5160 if not channel_handle
:
5161 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
5162 channel_handle
= self
.handle_or_none(self
._get
_text
(renderer
, 'subscriberCountText'))
5167 'ie_key': YoutubeTabIE
.ie_key(),
5170 'channel_id': channel_id
,
5171 'channel_url': channel_url
,
5173 'uploader_id': channel_handle
,
5174 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
5175 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
5176 # However, in feed/channels this is set correctly to the subscriber count
5177 'channel_follower_count': traverse_obj(
5178 renderer
, 'subscriberCountText', 'videoCountText', expected_type
=self
._get
_count
),
5179 'thumbnails': self
._extract
_thumbnails
(renderer
, 'thumbnail'),
5181 # videoCountText may be the subscriber count
5182 self
._get
_count
(renderer
, 'videoCountText')
5183 if self
._get
_count
(renderer
, 'subscriberCountText') is not None else None),
5184 'description': self
._get
_text
(renderer
, 'descriptionSnippet'),
5185 'channel_is_verified': True if self
._has
_badge
(
5186 self
._extract
_badges
(traverse_obj(renderer
, 'ownerBadges')), BadgeType
.VERIFIED
) else None,
5189 def _grid_entries(self
, grid_renderer
):
5190 for item
in grid_renderer
['items']:
5191 if not isinstance(item
, dict):
5193 renderer
= self
._extract
_basic
_item
_renderer
(item
)
5194 if not isinstance(renderer
, dict):
5196 title
= self
._get
_text
(renderer
, 'title')
5199 playlist_id
= renderer
.get('playlistId')
5201 yield self
.url_result(
5202 f
'https://www.youtube.com/playlist?list={playlist_id}',
5203 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
,
5207 video_id
= renderer
.get('videoId')
5209 yield self
._extract
_video
(renderer
)
5212 channel_id
= renderer
.get('channelId')
5214 yield self
._extract
_channel
_renderer
(renderer
)
5216 # generic endpoint URL support
5217 ep_url
= urljoin('https://www.youtube.com/', try_get(
5218 renderer
, lambda x
: x
['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
5221 for ie
in (YoutubeTabIE
, YoutubePlaylistIE
, YoutubeIE
):
5222 if ie
.suitable(ep_url
):
5223 yield self
.url_result(
5224 ep_url
, ie
=ie
.ie_key(), video_id
=ie
._match
_id
(ep_url
), video_title
=title
)
5227 def _music_reponsive_list_entry(self
, renderer
):
5228 video_id
= traverse_obj(renderer
, ('playlistItemData', 'videoId'))
5230 title
= traverse_obj(renderer
, (
5231 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
5232 'text', 'runs', 0, 'text'))
5233 return self
.url_result(f
'https://music.youtube.com/watch?v={video_id}',
5234 ie
=YoutubeIE
.ie_key(), video_id
=video_id
, title
=title
)
5235 playlist_id
= traverse_obj(renderer
, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
5237 video_id
= traverse_obj(renderer
, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
5239 return self
.url_result(f
'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
5240 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
5241 return self
.url_result(f
'https://music.youtube.com/playlist?list={playlist_id}',
5242 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
5243 browse_id
= traverse_obj(renderer
, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
5245 return self
.url_result(f
'https://music.youtube.com/browse/{browse_id}',
5246 ie
=YoutubeTabIE
.ie_key(), video_id
=browse_id
)
5248 def _shelf_entries_from_content(self
, shelf_renderer
):
5249 content
= shelf_renderer
.get('content')
5250 if not isinstance(content
, dict):
5252 renderer
= content
.get('gridRenderer') or content
.get('expandedShelfContentsRenderer')
5254 # TODO: add support for nested playlists so each shelf is processed
5255 # as separate playlist
5256 # TODO: this includes only first N items
5257 yield from self
._grid
_entries
(renderer
)
5258 renderer
= content
.get('horizontalListRenderer')
5263 def _shelf_entries(self
, shelf_renderer
, skip_channels
=False):
5265 shelf_renderer
, lambda x
: x
['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5267 shelf_url
= urljoin('https://www.youtube.com', ep
)
5269 # Skipping links to another channels, note that checking for
5270 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
5272 if skip_channels
and '/channels?' in shelf_url
:
5274 title
= self
._get
_text
(shelf_renderer
, 'title')
5275 yield self
.url_result(shelf_url
, video_title
=title
)
5276 # Shelf may not contain shelf URL, fallback to extraction from content
5277 yield from self
._shelf
_entries
_from
_content
(shelf_renderer
)
5279 def _playlist_entries(self
, video_list_renderer
):
5280 for content
in video_list_renderer
['contents']:
5281 if not isinstance(content
, dict):
5283 renderer
= content
.get('playlistVideoRenderer') or content
.get('playlistPanelVideoRenderer')
5284 if not isinstance(renderer
, dict):
5286 video_id
= renderer
.get('videoId')
5289 yield self
._extract
_video
(renderer
)
5291 def _rich_entries(self
, rich_grid_renderer
):
5292 renderer
= traverse_obj(
5294 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any
)) or {}
5295 video_id
= renderer
.get('videoId')
5297 yield self
._extract
_video
(renderer
)
5299 playlist_id
= renderer
.get('playlistId')
5301 yield self
.url_result(
5302 f
'https://www.youtube.com/playlist?list={playlist_id}',
5303 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
,
5304 video_title
=self
._get
_text
(renderer
, 'title'))
5306 # shortsLockupViewModel extraction
5307 entity_id
= renderer
.get('entityId')
5309 video_id
= traverse_obj(renderer
, ('onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId', {str}
))
5312 yield self
.url_result(
5313 f
'https://www.youtube.com/shorts/{video_id}',
5314 ie
=YoutubeIE
, video_id
=video_id
,
5315 **traverse_obj(renderer
, ('overlayMetadata', {
5316 'title': ('primaryText', 'content', {str}
),
5317 'view_count': ('secondaryText', 'content', {parse_count}
),
5319 thumbnails
=self
._extract
_thumbnails
(renderer
, 'thumbnail', final_key
='sources'))
5322 def _video_entry(self
, video_renderer
):
5323 video_id
= video_renderer
.get('videoId')
5325 return self
._extract
_video
(video_renderer
)
5327 def _hashtag_tile_entry(self
, hashtag_tile_renderer
):
5328 url
= urljoin('https://youtube.com', traverse_obj(
5329 hashtag_tile_renderer
, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
5331 return self
.url_result(
5332 url
, ie
=YoutubeTabIE
.ie_key(), title
=self
._get
_text
(hashtag_tile_renderer
, 'hashtag'))
5334 def _post_thread_entries(self
, post_thread_renderer
):
5335 post_renderer
= try_get(
5336 post_thread_renderer
, lambda x
: x
['post']['backstagePostRenderer'], dict)
5337 if not post_renderer
:
5340 video_renderer
= try_get(
5341 post_renderer
, lambda x
: x
['backstageAttachment']['videoRenderer'], dict) or {}
5342 video_id
= video_renderer
.get('videoId')
5344 entry
= self
._extract
_video
(video_renderer
)
5347 # playlist attachment
5348 playlist_id
= try_get(
5349 post_renderer
, lambda x
: x
['backstageAttachment']['playlistRenderer']['playlistId'], str)
5351 yield self
.url_result(
5352 f
'https://www.youtube.com/playlist?list={playlist_id}',
5353 ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
5354 # inline video links
5355 runs
= try_get(post_renderer
, lambda x
: x
['contentText']['runs'], list) or []
5357 if not isinstance(run
, dict):
5360 run
, lambda x
: x
['navigationEndpoint']['urlEndpoint']['url'], str)
5363 if not YoutubeIE
.suitable(ep_url
):
5365 ep_video_id
= YoutubeIE
._match
_id
(ep_url
)
5366 if video_id
== ep_video_id
:
5368 yield self
.url_result(ep_url
, ie
=YoutubeIE
.ie_key(), video_id
=ep_video_id
)
5370 def _post_thread_continuation_entries(self
, post_thread_continuation
):
5371 contents
= post_thread_continuation
.get('contents')
5372 if not isinstance(contents
, list):
5374 for content
in contents
:
5375 renderer
= content
.get('backstagePostThreadRenderer')
5376 if isinstance(renderer
, dict):
5377 yield from self
._post
_thread
_entries
(renderer
)
5379 renderer
= content
.get('videoRenderer')
5380 if isinstance(renderer
, dict):
5381 yield self
._video
_entry
(renderer
)
5384 def _rich_grid_entries(self, contents):
5385 for content in contents:
5386 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
5388 entry = self._video_entry(video_renderer)
5393 def _report_history_entries(self
, renderer
):
5394 for url
in traverse_obj(renderer
, (
5395 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
5396 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
5397 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
5398 yield self
.url_result(urljoin('https://www.youtube.com', url
), YoutubeIE
)
5400 def _extract_entries(self
, parent_renderer
, continuation_list
):
5401 # continuation_list is modified in-place with continuation_list = [continuation_token]
5402 continuation_list
[:] = [None]
5403 contents
= try_get(parent_renderer
, lambda x
: x
['contents'], list) or []
5404 for content
in contents
:
5405 if not isinstance(content
, dict):
5407 is_renderer
= traverse_obj(
5408 content
, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
5411 if content
.get('richItemRenderer'):
5412 for entry
in self
._rich
_entries
(content
['richItemRenderer']):
5414 continuation_list
[0] = self
._extract
_continuation
(parent_renderer
)
5415 elif content
.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
5416 table
= traverse_obj(content
, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
5417 yield from self
._report
_history
_entries
(table
)
5418 continuation_list
[0] = self
._extract
_continuation
(table
)
5421 isr_contents
= try_get(is_renderer
, lambda x
: x
['contents'], list) or []
5422 for isr_content
in isr_contents
:
5423 if not isinstance(isr_content
, dict):
5427 'playlistVideoListRenderer': self
._playlist
_entries
,
5428 'gridRenderer': self
._grid
_entries
,
5429 'reelShelfRenderer': self
._grid
_entries
,
5430 'shelfRenderer': self
._shelf
_entries
,
5431 'musicResponsiveListItemRenderer': lambda x
: [self
._music
_reponsive
_list
_entry
(x
)],
5432 'backstagePostThreadRenderer': self
._post
_thread
_entries
,
5433 'videoRenderer': lambda x
: [self
._video
_entry
(x
)],
5434 'playlistRenderer': lambda x
: self
._grid
_entries
({'items': [{'playlistRenderer': x
}]}),
5435 'channelRenderer': lambda x
: self
._grid
_entries
({'items': [{'channelRenderer': x
}]}),
5436 'hashtagTileRenderer': lambda x
: [self
._hashtag
_tile
_entry
(x
)],
5437 'richGridRenderer': lambda x
: self
._extract
_entries
(x
, continuation_list
),
5439 for key
, renderer
in isr_content
.items():
5440 if key
not in known_renderers
:
5442 for entry
in known_renderers
[key
](renderer
):
5445 continuation_list
[0] = self
._extract
_continuation
(renderer
)
5448 if not continuation_list
[0]:
5449 continuation_list
[0] = self
._extract
_continuation
(is_renderer
)
5451 if not continuation_list
[0]:
5452 continuation_list
[0] = self
._extract
_continuation
(parent_renderer
)
5454 def _entries(self
, tab
, item_id
, ytcfg
, account_syncid
, visitor_data
):
5455 continuation_list
= [None]
5456 extract_entries
= lambda x
: self
._extract
_entries
(x
, continuation_list
)
5457 tab_content
= try_get(tab
, lambda x
: x
['content'], dict)
5461 try_get(tab_content
, lambda x
: x
['sectionListRenderer'], dict)
5462 or try_get(tab_content
, lambda x
: x
['richGridRenderer'], dict) or {})
5463 yield from extract_entries(parent_renderer
)
5464 continuation
= continuation_list
[0]
5465 seen_continuations
= set()
5466 for page_num
in itertools
.count(1):
5467 if not continuation
:
5469 continuation_token
= continuation
.get('continuation')
5470 if continuation_token
is not None and continuation_token
in seen_continuations
:
5471 self
.write_debug('Detected YouTube feed looping - assuming end of feed.')
5473 seen_continuations
.add(continuation_token
)
5474 headers
= self
.generate_api_headers(
5475 ytcfg
=ytcfg
, account_syncid
=account_syncid
, visitor_data
=visitor_data
)
5476 response
= self
._extract
_response
(
5477 item_id
=f
'{item_id} page {page_num}',
5478 query
=continuation
, headers
=headers
, ytcfg
=ytcfg
,
5479 check_get_keys
=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5483 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5484 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5485 visitor_data
= self
._extract
_visitor
_data
(response
) or visitor_data
5488 'videoRenderer': (self
._grid
_entries
, 'items'), # for membership tab
5489 'gridPlaylistRenderer': (self
._grid
_entries
, 'items'),
5490 'gridVideoRenderer': (self
._grid
_entries
, 'items'),
5491 'gridChannelRenderer': (self
._grid
_entries
, 'items'),
5492 'playlistVideoRenderer': (self
._playlist
_entries
, 'contents'),
5493 'itemSectionRenderer': (extract_entries
, 'contents'), # for feeds
5494 'richItemRenderer': (extract_entries
, 'contents'), # for hashtag
5495 'backstagePostThreadRenderer': (self
._post
_thread
_continuation
_entries
, 'contents'),
5496 'reportHistoryTableRowRenderer': (self
._report
_history
_entries
, 'rows'),
5497 'playlistVideoListContinuation': (self
._playlist
_entries
, None),
5498 'gridContinuation': (self
._grid
_entries
, None),
5499 'itemSectionContinuation': (self
._post
_thread
_continuation
_entries
, None),
5500 'sectionListContinuation': (extract_entries
, None), # for feeds
5503 continuation_items
= traverse_obj(response
, (
5504 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5505 'appendContinuationItemsAction', 'continuationItems',
5506 ), 'continuationContents', get_all
=False)
5507 continuation_item
= traverse_obj(continuation_items
, 0, None, expected_type
=dict, default
={})
5509 video_items_renderer
= None
5510 for key
in continuation_item
:
5511 if key
not in known_renderers
:
5513 func
, parent_key
= known_renderers
[key
]
5514 video_items_renderer
= {parent_key
: continuation_items
} if parent_key
else continuation_items
5515 continuation_list
= [None]
5516 yield from func(video_items_renderer
)
5517 continuation
= continuation_list
[0] or self
._extract
_continuation
(video_items_renderer
)
5519 if not video_items_renderer
:
5523 def _extract_selected_tab(tabs
, fatal
=True):
5524 for tab_renderer
in tabs
:
5525 if tab_renderer
.get('selected'):
5528 raise ExtractorError('Unable to find selected tab')
5531 def _extract_tab_renderers(response
):
5532 return traverse_obj(
5533 response
, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type
=dict)
5535 def _extract_from_tabs(self
, item_id
, ytcfg
, data
, tabs
):
5536 metadata
= self
._extract
_metadata
_from
_tabs
(item_id
, data
)
5538 selected_tab
= self
._extract
_selected
_tab
(tabs
)
5539 metadata
['title'] += format_field(selected_tab
, 'title', ' - %s')
5540 metadata
['title'] += format_field(selected_tab
, 'expandedText', ' - %s')
5542 return self
.playlist_result(
5544 selected_tab
, metadata
['id'], ytcfg
,
5545 self
._extract
_account
_syncid
(ytcfg
, data
),
5546 self
._extract
_visitor
_data
(data
, ytcfg
)),
5549 def _extract_metadata_from_tabs(self
, item_id
, data
):
5550 info
= {'id': item_id
}
5552 metadata_renderer
= traverse_obj(data
, ('metadata', 'channelMetadataRenderer'), expected_type
=dict)
5553 if metadata_renderer
:
5554 channel_id
= traverse_obj(metadata_renderer
, ('externalId', {self
.ucid_or_none
}),
5555 ('channelUrl', {self
.ucid_from_url
}))
5557 'channel': metadata_renderer
.get('title'),
5558 'channel_id': channel_id
,
5560 if info
['channel_id']:
5561 info
['id'] = info
['channel_id']
5563 metadata_renderer
= traverse_obj(data
, ('metadata', 'playlistMetadataRenderer'), expected_type
=dict)
5565 # pageHeaderViewModel slow rollout began April 2024
5566 page_header_view_model
= traverse_obj(data
, (
5567 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}
))
5569 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5570 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5571 def _get_uncropped(url
):
5572 return url_or_none((url
or '').split('=')[0] + '=s0')
5574 avatar_thumbnails
= self
._extract
_thumbnails
(metadata_renderer
, 'avatar')
5575 if avatar_thumbnails
:
5576 uncropped_avatar
= _get_uncropped(avatar_thumbnails
[0]['url'])
5577 if uncropped_avatar
:
5578 avatar_thumbnails
.append({
5579 'url': uncropped_avatar
,
5580 'id': 'avatar_uncropped',
5585 self
._extract
_thumbnails
(data
, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5586 or self
._extract
_thumbnails
(
5587 page_header_view_model
, ('banner', 'imageBannerViewModel', 'image'), final_key
='sources'))
5588 for banner
in channel_banners
:
5589 banner
['preference'] = -10
5592 uncropped_banner
= _get_uncropped(channel_banners
[0]['url'])
5593 if uncropped_banner
:
5594 channel_banners
.append({
5595 'url': uncropped_banner
,
5596 'id': 'banner_uncropped',
5600 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5601 primary_sidebar_renderer
= self
._extract
_sidebar
_info
_renderer
(data
, 'playlistSidebarPrimaryInfoRenderer')
5602 playlist_header_renderer
= traverse_obj(data
, ('header', 'playlistHeaderRenderer'), expected_type
=dict)
5604 primary_thumbnails
= self
._extract
_thumbnails
(
5605 primary_sidebar_renderer
, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5606 playlist_thumbnails
= self
._extract
_thumbnails
(
5607 playlist_header_renderer
, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5610 'title': (traverse_obj(metadata_renderer
, 'title')
5611 or self
._get
_text
(data
, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5613 'availability': self
._extract
_availability
(data
),
5614 'channel_follower_count': (
5615 self
._get
_count
(data
, ('header', ..., 'subscriberCountText'))
5616 or traverse_obj(page_header_view_model
, (
5617 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5618 lambda _
, v
: 'subscribers' in v
['text']['content'], 'text', 'content', {parse_count}
, any
))),
5619 'description': try_get(metadata_renderer
, lambda x
: x
.get('description', '')),
5620 'tags': (traverse_obj(data
, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}
))
5621 or traverse_obj(metadata_renderer
, ('keywords', {lambda x
: x
and shlex
.split(x
)}, ...))),
5622 'thumbnails': (primary_thumbnails
or playlist_thumbnails
) + avatar_thumbnails
+ channel_banners
,
5626 traverse_obj(metadata_renderer
, (('vanityChannelUrl', ('ownerUrls', ...)), {self
.handle_from_url
}), get_all
=False)
5627 or traverse_obj(data
, ('header', ..., 'channelHandleText', {self
.handle_or_none
}), get_all
=False))
5631 'uploader_id': channel_handle
,
5632 'uploader_url': format_field(channel_handle
, None, 'https://www.youtube.com/%s', default
=None),
5635 channel_badges
= self
._extract
_badges
(traverse_obj(data
, ('header', ..., 'badges'), get_all
=False))
5636 if self
._has
_badge
(channel_badges
, BadgeType
.VERIFIED
):
5637 info
['channel_is_verified'] = True
5638 # Playlist stats is a text runs array containing [video count, view count, last updated].
5639 # last updated or (view count and last updated) may be missing.
5640 playlist_stats
= get_first(
5641 (primary_sidebar_renderer
, playlist_header_renderer
), (('stats', 'briefStats', 'numVideosText'), ))
5643 last_updated_unix
= self
._parse
_time
_text
(
5644 self
._get
_text
(playlist_stats
, 2) # deprecated, remove when old layout discontinued
5645 or self
._get
_text
(playlist_header_renderer
, ('byline', 1, 'playlistBylineRenderer', 'text')))
5646 info
['modified_date'] = strftime_or_none(last_updated_unix
)
5648 info
['view_count'] = self
._get
_count
(playlist_stats
, 1)
5649 if info
['view_count'] is None: # 0 is allowed
5650 info
['view_count'] = self
._get
_count
(playlist_header_renderer
, 'viewCountText')
5651 if info
['view_count'] is None:
5652 info
['view_count'] = self
._get
_count
(data
, (
5653 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5654 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5656 info
['playlist_count'] = self
._get
_count
(playlist_stats
, 0)
5657 if info
['playlist_count'] is None: # 0 is allowed
5658 info
['playlist_count'] = self
._get
_count
(playlist_header_renderer
, ('byline', 0, 'playlistBylineRenderer', 'text'))
5660 if not info
.get('channel_id'):
5661 owner
= traverse_obj(playlist_header_renderer
, 'ownerText')
5662 if not owner
: # Deprecated
5663 owner
= traverse_obj(
5664 self
._extract
_sidebar
_info
_renderer
(data
, 'playlistSidebarSecondaryInfoRenderer'),
5665 ('videoOwner', 'videoOwnerRenderer', 'title'))
5666 owner_text
= self
._get
_text
(owner
)
5667 browse_ep
= traverse_obj(owner
, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5669 'channel': self
._search
_regex
(r
'^by (.+) and \d+ others?$', owner_text
, 'uploader', default
=owner_text
),
5670 'channel_id': self
.ucid_or_none(browse_ep
.get('browseId')),
5671 'uploader_id': self
.handle_from_url(urljoin('https://www.youtube.com', browse_ep
.get('canonicalBaseUrl'))),
5675 'uploader': info
['channel'],
5676 'channel_url': format_field(info
.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default
=None),
5677 'uploader_url': format_field(info
.get('uploader_id'), None, 'https://www.youtube.com/%s', default
=None),
5682 def _extract_inline_playlist(self
, playlist
, playlist_id
, data
, ytcfg
):
5683 first_id
= last_id
= response
= None
5684 for page_num
in itertools
.count(1):
5685 videos
= list(self
._playlist
_entries
(playlist
))
5688 start
= next((i
for i
, v
in enumerate(videos
) if v
['id'] == last_id
), -1) + 1
5689 if start
>= len(videos
):
5691 yield from videos
[start
:]
5692 first_id
= first_id
or videos
[0]['id']
5693 last_id
= videos
[-1]['id']
5694 watch_endpoint
= try_get(
5695 playlist
, lambda x
: x
['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5696 headers
= self
.generate_api_headers(
5697 ytcfg
=ytcfg
, account_syncid
=self
._extract
_account
_syncid
(ytcfg
, data
),
5698 visitor_data
=self
._extract
_visitor
_data
(response
, data
, ytcfg
))
5700 'playlistId': playlist_id
,
5701 'videoId': watch_endpoint
.get('videoId') or last_id
,
5702 'index': watch_endpoint
.get('index') or len(videos
),
5703 'params': watch_endpoint
.get('params') or 'OAE%3D',
5705 response
= self
._extract
_response
(
5706 item_id
=f
'{playlist_id} page {page_num}',
5707 query
=query
, ep
='next', headers
=headers
, ytcfg
=ytcfg
,
5708 check_get_keys
='contents',
5711 response
, lambda x
: x
['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5713 def _extract_from_playlist(self
, item_id
, url
, data
, playlist
, ytcfg
):
5714 title
= playlist
.get('title') or try_get(
5715 data
, lambda x
: x
['titleText']['simpleText'], str)
5716 playlist_id
= playlist
.get('playlistId') or item_id
5718 # Delegating everything except mix playlists to regular tab-based playlist URL
5719 playlist_url
= urljoin(url
, try_get(
5720 playlist
, lambda x
: x
['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5723 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5724 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5725 is_known_unviewable
= re
.fullmatch(r
'MLCT|RLTD[\w-]{22}', playlist_id
)
5727 if playlist_url
and playlist_url
!= url
and not is_known_unviewable
:
5728 return self
.url_result(
5729 playlist_url
, ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
,
5732 return self
.playlist_result(
5733 self
._extract
_inline
_playlist
(playlist
, playlist_id
, data
, ytcfg
),
5734 playlist_id
=playlist_id
, playlist_title
=title
)
5736 def _extract_availability(self
, data
):
5738 Gets the availability of a given playlist/tab.
5739 Note: Unless YouTube tells us explicitly, we do not assume it is public
5740 @param data: response
5742 sidebar_renderer
= self
._extract
_sidebar
_info
_renderer
(data
, 'playlistSidebarPrimaryInfoRenderer') or {}
5743 playlist_header_renderer
= traverse_obj(data
, ('header', 'playlistHeaderRenderer')) or {}
5744 player_header_privacy
= playlist_header_renderer
.get('privacy')
5746 badges
= self
._extract
_badges
(traverse_obj(sidebar_renderer
, 'badges'))
5748 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5749 privacy_setting_icon
= get_first(
5750 (playlist_header_renderer
, sidebar_renderer
),
5751 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5752 lambda _
, v
: v
['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5755 microformats_is_unlisted
= traverse_obj(
5756 data
, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type
=bool)
5760 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PUBLIC
)
5761 or player_header_privacy
== 'PUBLIC'
5762 or privacy_setting_icon
== 'PRIVACY_PUBLIC')
5763 else self
._availability
(
5765 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PRIVATE
)
5766 or player_header_privacy
== 'PRIVATE' if player_header_privacy
is not None
5767 else privacy_setting_icon
== 'PRIVACY_PRIVATE' if privacy_setting_icon
is not None else None),
5769 self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_UNLISTED
)
5770 or player_header_privacy
== 'UNLISTED' if player_header_privacy
is not None
5771 else privacy_setting_icon
== 'PRIVACY_UNLISTED' if privacy_setting_icon
is not None
5772 else microformats_is_unlisted
if microformats_is_unlisted
is not None else None),
5773 needs_subscription
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_SUBSCRIPTION
) or None,
5774 needs_premium
=self
._has
_badge
(badges
, BadgeType
.AVAILABILITY_PREMIUM
) or None,
5778 def _extract_sidebar_info_renderer(data
, info_renderer
, expected_type
=dict):
5779 sidebar_renderer
= try_get(
5780 data
, lambda x
: x
['sidebar']['playlistSidebarRenderer']['items'], list) or []
5781 for item
in sidebar_renderer
:
5782 renderer
= try_get(item
, lambda x
: x
[info_renderer
], expected_type
)
5786 def _reload_with_unavailable_videos(self
, item_id
, data
, ytcfg
):
5788 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5790 is_playlist
= bool(traverse_obj(
5791 data
, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5794 headers
= self
.generate_api_headers(
5795 ytcfg
=ytcfg
, account_syncid
=self
._extract
_account
_syncid
(ytcfg
, data
),
5796 visitor_data
=self
._extract
_visitor
_data
(data
, ytcfg
))
5798 'params': 'wgYCCAA=',
5799 'browseId': f
'VL{item_id}',
5801 return self
._extract
_response
(
5802 item_id
=item_id
, headers
=headers
, query
=query
,
5803 check_get_keys
='contents', fatal
=False, ytcfg
=ytcfg
,
5804 note
='Redownloading playlist API JSON with unavailable videos')
5806 @functools.cached_property
5807 def skip_webpage(self
):
5808 return 'webpage' in self
._configuration
_arg
('skip', ie_key
=YoutubeTabIE
.ie_key())
5810 def _extract_webpage(self
, url
, item_id
, fatal
=True):
5811 webpage
, data
= None, None
5812 for retry
in self
.RetryManager(fatal
=fatal
):
5814 webpage
= self
._download
_webpage
(url
, item_id
, note
='Downloading webpage', headers
=self
._generate
_webpage
_headers
())
5815 data
= self
.extract_yt_initial_data(item_id
, webpage
or '', fatal
=fatal
) or {}
5816 except ExtractorError
as e
:
5817 if isinstance(e
.cause
, network_exceptions
):
5818 if not isinstance(e
.cause
, HTTPError
) or e
.cause
.status
not in (403, 429):
5821 self
._error
_or
_warning
(e
, fatal
=fatal
)
5825 self
._extract
_and
_report
_alerts
(data
)
5826 except ExtractorError
as e
:
5827 self
._error
_or
_warning
(e
, fatal
=fatal
)
5830 # Sometimes youtube returns a webpage with incomplete ytInitialData
5831 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5832 if not traverse_obj(data
, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5833 retry
.error
= ExtractorError('Incomplete yt initial data received')
5837 return webpage
, data
5839 def _report_playlist_authcheck(self
, ytcfg
, fatal
=True):
5840 """Use if failed to extract ytcfg (and data) from initial webpage"""
5841 if not ytcfg
and self
.is_authenticated
:
5842 msg
= 'Playlists that require authentication may not extract correctly without a successful webpage download'
5843 if 'authcheck' not in self
._configuration
_arg
('skip', ie_key
=YoutubeTabIE
.ie_key()) and fatal
:
5844 raise ExtractorError(
5845 f
'{msg}. If you are not downloading private content, or '
5846 'your cookies are only for the first account and channel,'
5847 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5849 self
.report_warning(msg
, only_once
=True)
5851 def _extract_data(self
, url
, item_id
, ytcfg
=None, fatal
=True, webpage_fatal
=False, default_client
='web'):
5853 if not self
.skip_webpage
:
5854 webpage
, data
= self
._extract
_webpage
(url
, item_id
, fatal
=webpage_fatal
)
5855 ytcfg
= ytcfg
or self
.extract_ytcfg(item_id
, webpage
)
5856 # Reject webpage data if redirected to home page without explicitly requesting
5857 selected_tab
= self
._extract
_selected
_tab
(self
._extract
_tab
_renderers
(data
), fatal
=False) or {}
5858 if (url
!= 'https://www.youtube.com/feed/recommended'
5859 and selected_tab
.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5860 and 'no-youtube-channel-redirect' not in self
.get_param('compat_opts', [])):
5861 msg
= 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5863 raise ExtractorError(msg
, expected
=True)
5864 self
.report_warning(msg
, only_once
=True)
5866 self
._report
_playlist
_authcheck
(ytcfg
, fatal
=fatal
)
5867 data
= self
._extract
_tab
_endpoint
(url
, item_id
, ytcfg
, fatal
=fatal
, default_client
=default_client
)
5870 def _extract_tab_endpoint(self
, url
, item_id
, ytcfg
=None, fatal
=True, default_client
='web'):
5871 headers
= self
.generate_api_headers(ytcfg
=ytcfg
, default_client
=default_client
)
5872 resolve_response
= self
._extract
_response
(
5873 item_id
=item_id
, query
={'url': url
}, check_get_keys
='endpoint', headers
=headers
, ytcfg
=ytcfg
, fatal
=fatal
,
5874 ep
='navigation/resolve_url', note
='Downloading API parameters API JSON', default_client
=default_client
)
5875 endpoints
= {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5876 for ep_key
, ep
in endpoints
.items():
5877 params
= try_get(resolve_response
, lambda x
: x
['endpoint'][ep_key
], dict)
5879 return self
._extract
_response
(
5880 item_id
=item_id
, query
=params
, ep
=ep
, headers
=headers
,
5881 ytcfg
=ytcfg
, fatal
=fatal
, default_client
=default_client
,
5882 check_get_keys
=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5883 err_note
= 'Failed to resolve url (does the playlist exist?)'
5885 raise ExtractorError(err_note
, expected
=True)
5886 self
.report_warning(err_note
, item_id
)
5888 _SEARCH_PARAMS
= None
5890 def _search_results(self
, query
, params
=NO_DEFAULT
, default_client
='web'):
5891 data
= {'query': query
}
5892 if params
is NO_DEFAULT
:
5893 params
= self
._SEARCH
_PARAMS
5895 data
['params'] = params
5898 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5899 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5901 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5902 ('continuationContents', ),
5904 display_id
= f
'query "{query}"'
5905 check_get_keys
= tuple({keys
[0] for keys
in content_keys
})
5906 ytcfg
= self
._download
_ytcfg
(default_client
, display_id
) if not self
.skip_webpage
else {}
5907 self
._report
_playlist
_authcheck
(ytcfg
, fatal
=False)
5909 continuation_list
= [None]
5911 for page_num
in itertools
.count(1):
5912 data
.update(continuation_list
[0] or {})
5913 headers
= self
.generate_api_headers(
5914 ytcfg
=ytcfg
, visitor_data
=self
._extract
_visitor
_data
(search
), default_client
=default_client
)
5915 search
= self
._extract
_response
(
5916 item_id
=f
'{display_id} page {page_num}', ep
='search', query
=data
,
5917 default_client
=default_client
, check_get_keys
=check_get_keys
, ytcfg
=ytcfg
, headers
=headers
)
5918 slr_contents
= traverse_obj(search
, *content_keys
)
5919 yield from self
._extract
_entries
({'contents': list(variadic(slr_contents
))}, continuation_list
)
5920 if not continuation_list
[0]:
5924 class YoutubeTabIE(YoutubeTabBaseInfoExtractor
):
5925 IE_DESC
= 'YouTube Tabs'
5926 _VALID_URL
= r
'''(?x:
5928 (?!consent\.)(?:\w+\.)?
5930 youtube(?:kids)?\.com|
5934 (?P<channel_type>channel|c|user|browse)/|
5937 (?:playlist|watch)\?.*?\blist=
5939 (?!(?:{reserved_names})\b) # Direct URLs
5943 reserved_names
=YoutubeBaseInfoExtractor
._RESERVED
_NAMES
,
5944 invidious
='|'.join(YoutubeBaseInfoExtractor
._INVIDIOUS
_SITES
),
5946 IE_NAME
= 'youtube:tab'
5949 'note': 'playlists, multipage',
5950 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5951 'playlist_mincount': 94,
5953 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5954 'title': 'Igor Kleiner Ph.D. - Playlists',
5955 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5956 'uploader': 'Igor Kleiner Ph.D.',
5957 'uploader_id': '@IgorDataScience',
5958 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5959 'channel': 'Igor Kleiner Ph.D.',
5960 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5961 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5962 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5963 'channel_follower_count': int,
5966 'note': 'playlists, multipage, different order',
5967 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5968 'playlist_mincount': 94,
5970 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5971 'title': 'Igor Kleiner Ph.D. - Playlists',
5972 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5973 'uploader': 'Igor Kleiner Ph.D.',
5974 'uploader_id': '@IgorDataScience',
5975 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5976 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5977 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5978 'channel': 'Igor Kleiner Ph.D.',
5979 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5980 'channel_follower_count': int,
5983 'note': 'playlists, series',
5984 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5985 'playlist_mincount': 5,
5987 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5988 'title': '3Blue1Brown - Playlists',
5989 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5990 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5991 'channel': '3Blue1Brown',
5992 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5993 'uploader_id': '@3blue1brown',
5994 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5995 'uploader': '3Blue1Brown',
5996 'tags': ['Mathematics'],
5997 'channel_follower_count': int,
5998 'channel_is_verified': True,
6001 'note': 'playlists, singlepage',
6002 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
6003 'playlist_mincount': 4,
6005 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
6006 'title': 'ThirstForScience - Playlists',
6007 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
6008 'uploader': 'ThirstForScience',
6009 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
6010 'uploader_id': '@ThirstForScience',
6011 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
6012 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
6014 'channel': 'ThirstForScience',
6015 'channel_follower_count': int,
6018 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
6019 'only_matching': True,
6021 'note': 'basic, single video playlist',
6022 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
6024 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
6025 'title': 'youtube-dl public playlist',
6029 'modified_date': '20201130',
6030 'channel': 'Sergey M.',
6031 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
6032 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6033 'availability': 'public',
6034 'uploader': 'Sergey M.',
6035 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
6036 'uploader_id': '@sergeym.6173',
6038 'playlist_count': 1,
6040 'note': 'empty playlist',
6041 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
6043 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
6044 'title': 'youtube-dl empty playlist',
6046 'channel': 'Sergey M.',
6048 'modified_date': '20230921',
6049 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
6050 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6051 'availability': 'unlisted',
6052 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
6053 'uploader_id': '@sergeym.6173',
6054 'uploader': 'Sergey M.',
6056 'playlist_count': 0,
6059 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
6061 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6062 'title': 'lex will - Home',
6063 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6064 'uploader': 'lex will',
6065 'uploader_id': '@lexwill718',
6066 'channel': 'lex will',
6067 'tags': ['bible', 'history', 'prophesy'],
6068 'uploader_url': 'https://www.youtube.com/@lexwill718',
6069 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6070 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6071 'channel_follower_count': int,
6073 'playlist_mincount': 2,
6075 'note': 'Videos tab',
6076 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
6078 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6079 'title': 'lex will - Videos',
6080 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6081 'uploader': 'lex will',
6082 'uploader_id': '@lexwill718',
6083 'tags': ['bible', 'history', 'prophesy'],
6084 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6085 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6086 'uploader_url': 'https://www.youtube.com/@lexwill718',
6087 'channel': 'lex will',
6088 'channel_follower_count': int,
6090 'playlist_mincount': 975,
6092 'note': 'Videos tab, sorted by popular',
6093 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
6095 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6096 'title': 'lex will - Videos',
6097 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6098 'uploader': 'lex will',
6099 'uploader_id': '@lexwill718',
6100 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6101 'uploader_url': 'https://www.youtube.com/@lexwill718',
6102 'channel': 'lex will',
6103 'tags': ['bible', 'history', 'prophesy'],
6104 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6105 'channel_follower_count': int,
6107 'playlist_mincount': 199,
6109 'note': 'Playlists tab',
6110 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
6112 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6113 'title': 'lex will - Playlists',
6114 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6115 'uploader': 'lex will',
6116 'uploader_id': '@lexwill718',
6117 'uploader_url': 'https://www.youtube.com/@lexwill718',
6118 'channel': 'lex will',
6119 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6120 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6121 'tags': ['bible', 'history', 'prophesy'],
6122 'channel_follower_count': int,
6124 'playlist_mincount': 17,
6126 'note': 'Community tab',
6127 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
6129 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6130 'title': 'lex will - Community',
6131 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6132 'channel': 'lex will',
6133 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6134 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6135 'tags': ['bible', 'history', 'prophesy'],
6136 'channel_follower_count': int,
6137 'uploader_url': 'https://www.youtube.com/@lexwill718',
6138 'uploader_id': '@lexwill718',
6139 'uploader': 'lex will',
6141 'playlist_mincount': 18,
6143 'note': 'Channels tab',
6144 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
6146 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6147 'title': 'lex will - Channels',
6148 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6149 'channel': 'lex will',
6150 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6151 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6152 'tags': ['bible', 'history', 'prophesy'],
6153 'channel_follower_count': int,
6154 'uploader_url': 'https://www.youtube.com/@lexwill718',
6155 'uploader_id': '@lexwill718',
6156 'uploader': 'lex will',
6158 'playlist_mincount': 12,
6160 'note': 'Search tab',
6161 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
6162 'playlist_mincount': 40,
6164 'id': 'UCYO_jab_esuFRV4b17AJtAw',
6165 'title': '3Blue1Brown - Search - linear algebra',
6166 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6167 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6168 'tags': ['Mathematics'],
6169 'channel': '3Blue1Brown',
6170 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6171 'channel_follower_count': int,
6172 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6173 'uploader_id': '@3blue1brown',
6174 'uploader': '3Blue1Brown',
6175 'channel_is_verified': True,
6178 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6179 'only_matching': True,
6181 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6182 'only_matching': True,
6184 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6185 'only_matching': True,
6187 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
6188 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
6190 'title': '29C3: Not my department',
6191 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
6192 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
6195 'modified_date': '20150605',
6196 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
6197 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
6198 'channel': 'Christiaan008',
6199 'availability': 'public',
6200 'uploader_id': '@ChRiStIaAn008',
6201 'uploader': 'Christiaan008',
6202 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
6204 'playlist_count': 96,
6206 'note': 'Large playlist',
6207 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6209 'title': 'Uploads from Cauchemar',
6210 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
6211 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
6213 'modified_date': r
're:\d{8}',
6214 'channel': 'Cauchemar',
6217 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
6218 'availability': 'public',
6219 'uploader_id': '@Cauchemar89',
6220 'uploader': 'Cauchemar',
6221 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
6223 'playlist_mincount': 1123,
6224 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
6226 'note': 'even larger playlist, 8832 videos',
6227 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
6228 'only_matching': True,
6230 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
6231 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
6233 'title': 'Uploads from Interstellar Movie',
6234 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
6237 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
6238 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
6239 'channel': 'Interstellar Movie',
6241 'modified_date': r
're:\d{8}',
6242 'availability': 'public',
6243 'uploader_id': '@InterstellarMovie',
6244 'uploader': 'Interstellar Movie',
6245 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
6247 'playlist_mincount': 21,
6249 'note': 'Playlist with "show unavailable videos" button',
6250 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
6252 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
6253 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
6255 'channel': 'Phim Siêu Nhân Nhật Bản',
6258 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
6259 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
6260 'modified_date': r
're:\d{8}',
6261 'availability': 'public',
6262 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
6263 'uploader_id': '@phimsieunhannhatban',
6264 'uploader': 'Phim Siêu Nhân Nhật Bản',
6266 'playlist_mincount': 200,
6267 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
6269 'note': 'Playlist with unavailable videos in page 7',
6270 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
6272 'title': 'Uploads from BlankTV',
6273 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
6274 'channel': 'BlankTV',
6275 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
6276 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
6279 'modified_date': r
're:\d{8}',
6281 'availability': 'public',
6282 'uploader_id': '@blanktv',
6283 'uploader': 'BlankTV',
6284 'uploader_url': 'https://www.youtube.com/@blanktv',
6286 'playlist_mincount': 1000,
6287 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
6289 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
6290 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
6292 'title': 'Data Analysis with Dr Mike Pound',
6293 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
6294 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
6297 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
6298 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
6299 'channel': 'Computerphile',
6300 'availability': 'public',
6301 'modified_date': '20190712',
6302 'uploader_id': '@Computerphile',
6303 'uploader': 'Computerphile',
6304 'uploader_url': 'https://www.youtube.com/@Computerphile',
6306 'playlist_mincount': 11,
6308 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
6309 'only_matching': True,
6311 'note': 'Playlist URL that does not actually serve a playlist',
6312 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
6314 'id': 'FqZTN594JQw',
6316 'title': "Smiley's People 01 detective, Adventure Series, Action",
6317 'upload_date': '20150526',
6318 'license': 'Standard YouTube License',
6319 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
6320 'categories': ['People & Blogs'],
6326 'skip_download': True,
6328 'skip': 'This video is not available.',
6329 'add_ie': [YoutubeIE
.ie_key()],
6331 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
6332 'only_matching': True,
6334 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
6335 'only_matching': True,
6337 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
6339 'id': 'hGkQjiJLjWQ', # This will keep changing
6342 'upload_date': r
're:\d{8}',
6344 'categories': ['News & Politics'],
6347 'release_timestamp': int,
6348 'channel': 'Sky News',
6349 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
6352 'thumbnail': r
're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
6353 'playable_in_embed': True,
6354 'release_date': r
're:\d+',
6355 'availability': 'public',
6356 'live_status': 'is_live',
6357 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6358 'channel_follower_count': int,
6359 'concurrent_view_count': int,
6360 'uploader_url': 'https://www.youtube.com/@SkyNews',
6361 'uploader_id': '@SkyNews',
6362 'uploader': 'Sky News',
6363 'channel_is_verified': True,
6366 'skip_download': True,
6368 'expected_warnings': ['Ignoring subtitle tracks found in '],
6370 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
6372 'id': 'a48o2S1cPoo',
6374 'title': 'The Young Turks - Live Main Show',
6375 'upload_date': '20150715',
6376 'license': 'Standard YouTube License',
6377 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
6378 'categories': ['News & Politics'],
6379 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
6383 'skip_download': True,
6385 'only_matching': True,
6387 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
6388 'only_matching': True,
6390 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
6391 'only_matching': True,
6393 'note': 'A channel that is not live. Should raise error',
6394 'url': 'https://www.youtube.com/user/numberphile/live',
6395 'only_matching': True,
6397 'url': 'https://www.youtube.com/feed/trending',
6398 'only_matching': True,
6400 'url': 'https://www.youtube.com/feed/library',
6401 'only_matching': True,
6403 'url': 'https://www.youtube.com/feed/history',
6404 'only_matching': True,
6406 'url': 'https://www.youtube.com/feed/subscriptions',
6407 'only_matching': True,
6409 'url': 'https://www.youtube.com/feed/watch_later',
6410 'only_matching': True,
6412 'note': 'Recommended - redirects to home page.',
6413 'url': 'https://www.youtube.com/feed/recommended',
6414 'only_matching': True,
6416 'note': 'inline playlist with not always working continuations',
6417 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
6418 'only_matching': True,
6420 'url': 'https://www.youtube.com/course',
6421 'only_matching': True,
6423 'url': 'https://www.youtube.com/zsecurity',
6424 'only_matching': True,
6426 'url': 'http://www.youtube.com/NASAgovVideo/videos',
6427 'only_matching': True,
6429 'url': 'https://www.youtube.com/TheYoungTurks/live',
6430 'only_matching': True,
6432 'url': 'https://www.youtube.com/hashtag/cctv9',
6435 'title': 'cctv9 - All',
6438 'playlist_mincount': 300, # not consistent but should be over 300
6440 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6441 'only_matching': True,
6443 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6444 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6445 'only_matching': True,
6447 'note': '/browse/ should redirect to /channel/',
6448 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6449 'only_matching': True,
6451 'note': 'VLPL, should redirect to playlist?list=PL...',
6452 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6454 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6455 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6456 'title': 'NCS : All Releases 💿',
6457 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6458 'modified_date': r
're:\d{8}',
6460 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6462 'channel': 'NoCopyrightSounds',
6463 'availability': 'public',
6464 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6465 'uploader': 'NoCopyrightSounds',
6466 'uploader_id': '@NoCopyrightSounds',
6468 'playlist_mincount': 166,
6469 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6471 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6472 'note': 'Topic, should redirect to playlist?list=UU...',
6473 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6475 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6476 'title': 'Uploads from Royalty Free Music - Topic',
6478 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6479 'channel': 'Royalty Free Music - Topic',
6481 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6482 'modified_date': r
're:\d{8}',
6484 'availability': 'public',
6485 'uploader': 'Royalty Free Music - Topic',
6487 'playlist_mincount': 101,
6488 'expected_warnings': ['YouTube Music is not directly supported', r
'[Uu]navailable videos (are|will be) hidden'],
6490 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6491 # Treat as a general feed
6492 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6494 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6495 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6498 'playlist_mincount': 9,
6500 'note': 'Youtube music Album',
6501 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6503 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6504 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6508 'availability': 'unlisted',
6509 'modified_date': r
're:\d{8}',
6511 'playlist_count': 50,
6512 'expected_warnings': ['YouTube Music is not directly supported'],
6514 'note': 'unlisted single video playlist',
6515 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6517 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6518 'title': 'yt-dlp unlisted playlist test',
6519 'availability': 'unlisted',
6521 'modified_date': '20220418',
6522 'channel': 'colethedj',
6525 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6526 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6527 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6528 'uploader_id': '@colethedj1894',
6529 'uploader': 'colethedj',
6533 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6534 'id': 'BaW_jenozKc',
6536 'ie_key': 'Youtube',
6538 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6539 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6542 'channel': 'Philipp Hagemeister',
6543 'uploader_id': '@PhilippHagemeister',
6544 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6545 'uploader': 'Philipp Hagemeister',
6548 'playlist_count': 1,
6549 'params': {'extract_flat': True},
6551 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6552 'url': 'https://www.youtube.com/feed/recommended',
6554 'id': 'recommended',
6555 'title': 'recommended',
6558 'playlist_mincount': 50,
6560 'skip_download': True,
6561 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6564 'note': 'API Fallback: /videos tab, sorted by oldest first',
6565 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6567 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6568 'title': 'Cody\'sLab - Videos',
6569 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6570 'channel': 'Cody\'sLab',
6571 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6573 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6574 'channel_follower_count': int,
6576 'playlist_mincount': 650,
6578 'skip_download': True,
6579 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6581 'skip': 'Query for sorting no longer works',
6583 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6584 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6586 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6587 'title': 'Uploads from Royalty Free Music - Topic',
6588 'modified_date': r
're:\d{8}',
6589 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6591 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6593 'channel': 'Royalty Free Music - Topic',
6595 'availability': 'public',
6596 'uploader': 'Royalty Free Music - Topic',
6598 'playlist_mincount': 101,
6600 'skip_download': True,
6601 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6603 'expected_warnings': ['YouTube Music is not directly supported', r
'[Uu]navailable videos (are|will be) hidden'],
6605 'note': 'non-standard redirect to regional channel',
6606 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6607 'only_matching': True,
6609 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6610 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6612 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6613 'modified_date': '20220407',
6614 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6616 'availability': 'unlisted',
6617 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6618 'channel': 'pukkandan',
6619 'description': 'Test for collaborative playlist',
6620 'title': 'yt-dlp test - collaborative playlist',
6622 'uploader_url': 'https://www.youtube.com/@pukkandan',
6623 'uploader_id': '@pukkandan',
6624 'uploader': 'pukkandan',
6626 'playlist_mincount': 2,
6628 'note': 'translated tab name',
6629 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6631 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6633 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6634 'description': 'test description',
6635 'title': 'cole-dlp-test-acc - 再生リスト',
6636 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6637 'channel': 'cole-dlp-test-acc',
6638 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6639 'uploader_id': '@coletdjnz',
6640 'uploader': 'cole-dlp-test-acc',
6642 'playlist_mincount': 1,
6643 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6644 'expected_warnings': ['Preferring "ja"'],
6646 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6647 'note': 'preferred lang set with playlist with translated video titles',
6648 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6650 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6653 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6654 'channel': 'cole-dlp-test-acc',
6655 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6656 'description': 'test',
6657 'title': 'dlp test playlist',
6658 'availability': 'public',
6659 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6660 'uploader_id': '@coletdjnz',
6661 'uploader': 'cole-dlp-test-acc',
6663 'playlist_mincount': 1,
6664 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6665 'expected_warnings': ['Preferring "ja"'],
6667 # shorts audio pivot for 2GtVksBMYFM.
6668 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6670 'id': 'sfv_audio_pivot',
6671 'title': 'sfv_audio_pivot',
6674 'playlist_mincount': 50,
6677 # Channel with a real live tab (not to be mistaken with streams tab)
6678 # Do not treat like it should redirect to live stream
6679 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6681 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6682 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6685 'playlist_mincount': 20,
6687 # Tab name is not the same as tab id
6688 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6690 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6691 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6694 'playlist_mincount': 8,
6696 # Home tab id is literally home. Not to get mistaken with featured
6697 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6699 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6700 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6703 'playlist_mincount': 8,
6705 # Should get three playlists for videos, shorts and streams tabs
6706 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6708 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6709 'title': 'Polka Ch. 尾丸ポルカ',
6710 'channel_follower_count': int,
6711 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6712 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6713 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6714 'channel': 'Polka Ch. 尾丸ポルカ',
6716 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6717 'uploader': 'Polka Ch. 尾丸ポルカ',
6718 'uploader_id': '@OmaruPolka',
6719 'channel_is_verified': True,
6721 'playlist_count': 3,
6723 # Shorts tab with channel with handle
6724 # TODO: fix channel description
6725 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6727 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6728 'title': 'Not Just Bikes - Shorts',
6730 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6731 'description': 'md5:5e82545b3a041345927a92d0585df247',
6732 'channel_follower_count': int,
6733 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6734 'channel': 'Not Just Bikes',
6735 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6736 'uploader': 'Not Just Bikes',
6737 'uploader_id': '@NotJustBikes',
6738 'channel_is_verified': True,
6740 'playlist_mincount': 10,
6743 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6745 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6746 'title': '中村悠一 - Live',
6748 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6749 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6751 'channel_follower_count': int,
6752 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6753 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6754 'uploader_id': '@Yuichi-Nakamura',
6757 'playlist_mincount': 60,
6759 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6760 # See test_youtube_lists
6761 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6762 'only_matching': True,
6764 # No uploads and no UCID given. Should fail with no uploads error
6765 # See test_youtube_lists
6766 'url': 'https://www.youtube.com/news',
6767 'only_matching': True,
6769 # No videos tab but has a shorts tab
6770 'url': 'https://www.youtube.com/c/TKFShorts',
6772 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6773 'title': 'Shorts Break - Shorts',
6775 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6776 'channel': 'Shorts Break',
6777 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6778 'channel_follower_count': int,
6779 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6780 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6781 'uploader': 'Shorts Break',
6782 'uploader_id': '@ShortsBreak_Official',
6784 'playlist_mincount': 30,
6786 # Trending Now Tab. tab id is empty
6787 'url': 'https://www.youtube.com/feed/trending',
6790 'title': 'trending - Now',
6793 'playlist_mincount': 30,
6795 # Trending Gaming Tab. tab id is empty
6796 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6799 'title': 'trending - Gaming',
6802 'playlist_mincount': 30,
6804 # Shorts url result in shorts tab
6805 # TODO: Fix channel id extraction
6806 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6808 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6809 'title': 'cole-dlp-test-acc - Shorts',
6810 'channel': 'cole-dlp-test-acc',
6811 'description': 'test description',
6812 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6813 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6815 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6816 'uploader_id': '@coletdjnz',
6817 'uploader': 'cole-dlp-test-acc',
6821 # Channel data is not currently available for short renderers (as of 2023-03-01)
6823 'ie_key': 'Youtube',
6824 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6825 'id': 'sSM9J5YH_60',
6826 'title': 'SHORT short',
6831 'params': {'extract_flat': True},
6833 # Live video status should be extracted
6834 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6836 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6837 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6843 'ie_key': 'Youtube',
6844 'url': 'startswith:https://www.youtube.com/watch?v=',
6847 'live_status': 'is_live',
6850 'concurrent_view_count': int,
6853 'uploader_url': str,
6855 'channel_is_verified': bool, # this will keep changing
6858 'params': {'extract_flat': True, 'playlist_items': '1'},
6859 'playlist_mincount': 1,
6861 # Channel renderer metadata. Contains number of videos on the channel
6862 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6864 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6865 'title': 'cole-dlp-test-acc - Channels',
6866 'channel': 'cole-dlp-test-acc',
6867 'description': 'test description',
6868 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6869 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6871 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6872 'uploader_id': '@coletdjnz',
6873 'uploader': 'cole-dlp-test-acc',
6878 'ie_key': 'YoutubeTab',
6879 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6880 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6881 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6882 'title': 'PewDiePie',
6883 'channel': 'PewDiePie',
6884 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6886 'channel_follower_count': int,
6887 'playlist_count': int,
6888 'uploader': 'PewDiePie',
6889 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6890 'uploader_id': '@PewDiePie',
6891 'channel_is_verified': True,
6894 'params': {'extract_flat': True},
6896 'url': 'https://www.youtube.com/@3blue1brown/about',
6898 'id': '@3blue1brown',
6899 'tags': ['Mathematics'],
6900 'title': '3Blue1Brown',
6901 'channel_follower_count': int,
6902 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6903 'channel': '3Blue1Brown',
6904 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6905 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6906 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6907 'uploader_id': '@3blue1brown',
6908 'uploader': '3Blue1Brown',
6909 'channel_is_verified': True,
6911 'playlist_count': 0,
6913 # Podcasts tab, with rich entry playlistRenderers
6914 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6916 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6917 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6918 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6919 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6920 'title': '99 Percent Invisible - Podcasts',
6921 'uploader': '99 Percent Invisible',
6922 'channel_follower_count': int,
6923 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6925 'channel': '99 Percent Invisible',
6926 'uploader_id': '@99percentinvisiblepodcast',
6928 'playlist_count': 0,
6930 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6931 'url': 'https://www.youtube.com/@AHimitsu/releases',
6933 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6934 'channel': 'A Himitsu',
6935 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6936 'title': 'A Himitsu - Releases',
6937 'uploader_id': '@AHimitsu',
6938 'uploader': 'A Himitsu',
6939 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6941 'description': 'I make music',
6942 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6943 'channel_follower_count': int,
6944 'channel_is_verified': True,
6946 'playlist_mincount': 10,
6948 # Playlist with only shorts, shown as reel renderers
6949 # FIXME: future: YouTube currently doesn't give continuation for this,
6951 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6953 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6954 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6956 'uploader_id': '@BangyShorts',
6958 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6959 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6960 'channel': 'Bangy Shorts',
6961 'uploader': 'Bangy Shorts',
6963 'availability': 'public',
6964 'modified_date': r
're:\d{8}',
6965 'title': 'Uploads from Bangy Shorts',
6967 'playlist_mincount': 100,
6968 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
6970 'note': 'Tags containing spaces',
6971 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6972 'playlist_count': 3,
6974 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6975 'channel': 'Markiplier',
6976 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6977 'title': 'Markiplier',
6978 'channel_follower_count': int,
6979 'description': 'md5:0c010910558658824402809750dc5d97',
6980 'uploader_id': '@markiplier',
6981 'uploader_url': 'https://www.youtube.com/@markiplier',
6982 'uploader': 'Markiplier',
6983 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6984 'channel_is_verified': True,
6985 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6986 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6987 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6993 def suitable(cls
, url
):
6994 return False if YoutubeIE
.suitable(url
) else super().suitable(url
)
6996 _URL_RE
= re
.compile(rf
'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6998 def _get_url_mobj(self
, url
):
6999 mobj
= self
._URL
_RE
.match(url
).groupdict()
7000 mobj
.update((k
, '') for k
, v
in mobj
.items() if v
is None)
7003 def _extract_tab_id_and_name(self
, tab
, base_url
='https://www.youtube.com'):
7004 tab_name
= (tab
.get('title') or '').lower()
7005 tab_url
= urljoin(base_url
, traverse_obj(
7006 tab
, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
7008 tab_id
= (tab_url
and self
._get
_url
_mobj
(tab_url
)['tab'][1:]
7009 or traverse_obj(tab
, 'tabIdentifier', expected_type
=str))
7012 'TAB_ID_SPONSORSHIPS': 'membership',
7013 }.get(tab_id
, tab_id
), tab_name
7015 # Fallback to tab name if we cannot get the tab id.
7016 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
7017 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
7019 self
.write_debug(f
'Falling back to selected tab name: {tab_name}')
7023 }.get(tab_name
, tab_name
), tab_name
7025 def _has_tab(self
, tabs
, tab_id
):
7026 return any(self
._extract
_tab
_id
_and
_name
(tab
)[0] == tab_id
for tab
in tabs
)
7028 def _empty_playlist(self
, item_id
, data
):
7029 return self
.playlist_result([], item_id
, **self
._extract
_metadata
_from
_tabs
(item_id
, data
))
7031 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
7032 def _real_extract(self
, url
, smuggled_data
):
7033 item_id
= self
._match
_id
(url
)
7034 url
= urllib
.parse
.urlunparse(
7035 urllib
.parse
.urlparse(url
)._replace
(netloc
='www.youtube.com'))
7036 compat_opts
= self
.get_param('compat_opts', [])
7038 mobj
= self
._get
_url
_mobj
(url
)
7039 pre
, tab
, post
, is_channel
= mobj
['pre'], mobj
['tab'], mobj
['post'], not mobj
['not_channel']
7040 if is_channel
and smuggled_data
.get('is_music_url'):
7041 if item_id
[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
7042 return self
.url_result(
7043 f
'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE
, item_id
[2:])
7044 elif item_id
[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
7045 mdata
= self
._extract
_tab
_endpoint
(
7046 f
'https://music.youtube.com/channel/{item_id}', item_id
, default_client
='web_music')
7047 murl
= traverse_obj(mdata
, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
7048 get_all
=False, expected_type
=str)
7050 raise ExtractorError('Failed to resolve album to playlist')
7051 return self
.url_result(murl
, YoutubeTabIE
)
7052 elif mobj
['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
7053 return self
.url_result(
7054 f
'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE
, item_id
)
7056 original_tab_id
, display_id
= tab
[1:], f
'{item_id}{tab}'
7057 if is_channel
and not tab
and 'no-youtube-channel-redirect' not in compat_opts
:
7058 url
= f
'{pre}/videos{post}'
7059 if smuggled_data
.get('is_music_url'):
7060 self
.report_warning(f
'YouTube Music is not directly supported. Redirecting to {url}')
7062 # Handle both video/playlist URLs
7064 video_id
, playlist_id
= (traverse_obj(qs
, (key
, 0)) for key
in ('v', 'list'))
7065 if not video_id
and mobj
['not_channel'].startswith('watch'):
7067 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
7068 raise ExtractorError('A video URL was given without video ID', expected
=True)
7069 # Common mistake: https://www.youtube.com/watch?list=playlist_id
7070 self
.report_warning(f
'A video URL was given without video ID. Trying to download playlist {playlist_id}')
7071 return self
.url_result(
7072 f
'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE
, playlist_id
)
7074 if not self
._yes
_playlist
(playlist_id
, video_id
):
7075 return self
.url_result(
7076 f
'https://www.youtube.com/watch?v={video_id}', YoutubeIE
, video_id
)
7078 data
, ytcfg
= self
._extract
_data
(url
, display_id
)
7080 # YouTube may provide a non-standard redirect to the regional channel
7081 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
7082 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7083 redirect_url
= traverse_obj(
7084 data
, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all
=False)
7085 if redirect_url
and 'no-youtube-channel-redirect' not in compat_opts
:
7086 redirect_url
= ''.join((urljoin('https://www.youtube.com', redirect_url
), tab
, post
))
7087 self
.to_screen(f
'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
7088 return self
.url_result(redirect_url
, YoutubeTabIE
)
7090 tabs
, extra_tabs
= self
._extract
_tab
_renderers
(data
), []
7091 if is_channel
and tabs
and 'no-youtube-channel-redirect' not in compat_opts
:
7092 selected_tab
= self
._extract
_selected
_tab
(tabs
)
7093 selected_tab_id
, selected_tab_name
= self
._extract
_tab
_id
_and
_name
(selected_tab
, url
) # NB: Name may be translated
7094 self
.write_debug(f
'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
7096 # /about is no longer a tab
7097 if original_tab_id
== 'about':
7098 return self
._empty
_playlist
(item_id
, data
)
7100 if not original_tab_id
and selected_tab_name
:
7101 self
.to_screen('Downloading all uploads of the channel. '
7102 'To download only the videos in a specific tab, pass the tab\'s URL')
7103 if self
._has
_tab
(tabs
, 'streams'):
7104 extra_tabs
.append(''.join((pre
, '/streams', post
)))
7105 if self
._has
_tab
(tabs
, 'shorts'):
7106 extra_tabs
.append(''.join((pre
, '/shorts', post
)))
7107 # XXX: Members-only tab should also be extracted
7109 if not extra_tabs
and selected_tab_id
!= 'videos':
7110 # Channel does not have streams, shorts or videos tabs
7111 if item_id
[:2] != 'UC':
7112 return self
._empty
_playlist
(item_id
, data
)
7114 # Topic channels don't have /videos. Use the equivalent playlist instead
7115 pl_id
= f
'UU{item_id[2:]}'
7116 pl_url
= f
'https://www.youtube.com/playlist?list={pl_id}'
7118 data
, ytcfg
= self
._extract
_data
(pl_url
, pl_id
, ytcfg
=ytcfg
, fatal
=True, webpage_fatal
=True)
7119 except ExtractorError
:
7120 return self
._empty
_playlist
(item_id
, data
)
7122 item_id
, url
= pl_id
, pl_url
7124 f
'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
7126 elif extra_tabs
and selected_tab_id
!= 'videos':
7127 # When there are shorts/live tabs but not videos tab
7128 url
, data
= f
'{pre}{post}', None
7130 elif (original_tab_id
or 'videos') != selected_tab_id
:
7131 if original_tab_id
== 'live':
7132 # Live tab should have redirected to the video
7133 # Except in the case the channel has an actual live tab
7134 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
7135 raise UserNotLive(video_id
=item_id
)
7136 elif selected_tab_name
:
7137 raise ExtractorError(f
'This channel does not have a {original_tab_id} tab', expected
=True)
7139 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
7140 url
= f
'{pre}{post}'
7142 # YouTube sometimes provides a button to reload playlist with unavailable videos.
7143 if 'no-youtube-unavailable-videos' not in compat_opts
:
7144 data
= self
._reload
_with
_unavailable
_videos
(display_id
, data
, ytcfg
) or data
7145 self
._extract
_and
_report
_alerts
(data
, only_once
=True)
7147 tabs
, entries
= self
._extract
_tab
_renderers
(data
), []
7149 entries
= [self
._extract
_from
_tabs
(item_id
, ytcfg
, data
, tabs
)]
7151 'extractor_key': YoutubeTabIE
.ie_key(),
7152 'extractor': YoutubeTabIE
.IE_NAME
,
7155 if self
.get_param('playlist_items') == '0':
7156 entries
.extend(self
.url_result(u
, YoutubeTabIE
) for u
in extra_tabs
)
7157 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
7158 entries
.extend(map(self
._real
_extract
, extra_tabs
))
7160 if len(entries
) == 1:
7163 metadata
= self
._extract
_metadata
_from
_tabs
(item_id
, data
)
7164 uploads_url
= 'the Uploads (UU) playlist URL'
7165 if try_get(metadata
, lambda x
: x
['channel_id'].startswith('UC')):
7166 uploads_url
= f
'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
7168 'Downloading as multiple playlists, separated by tabs. '
7169 f
'To download as a single playlist instead, pass {uploads_url}')
7170 return self
.playlist_result(entries
, item_id
, **metadata
)
7173 playlist
= traverse_obj(
7174 data
, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type
=dict)
7176 return self
._extract
_from
_playlist
(item_id
, url
, data
, playlist
, ytcfg
)
7178 video_id
= traverse_obj(
7179 data
, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type
=str) or video_id
7181 if tab
!= '/live': # live tab is expected to redirect to video
7182 self
.report_warning(f
'Unable to recognize playlist. Downloading just video {video_id}')
7183 return self
.url_result(f
'https://www.youtube.com/watch?v={video_id}', YoutubeIE
, video_id
)
7185 raise ExtractorError('Unable to recognize tab page')
7188 class YoutubePlaylistIE(YoutubeBaseInfoExtractor
):
7189 IE_DESC
= 'YouTube playlists'
7190 _VALID_URL
= r
'''(?x)(?:
7195 youtube(?:kids)?\.com|
7200 (?P<id>{playlist_id})
7202 playlist_id
=YoutubeBaseInfoExtractor
._PLAYLIST
_ID
_RE
,
7203 invidious
='|'.join(YoutubeBaseInfoExtractor
._INVIDIOUS
_SITES
),
7205 IE_NAME
= 'youtube:playlist'
7207 'note': 'issue #673',
7208 'url': 'PLBB231211A4F62143',
7210 'title': '[OLD]Team Fortress 2 (Class-based LP)',
7211 'id': 'PLBB231211A4F62143',
7212 'uploader': 'Wickman',
7213 'uploader_id': '@WickmanVT',
7214 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
7216 'uploader_url': 'https://www.youtube.com/@WickmanVT',
7217 'modified_date': r
're:\d{8}',
7218 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
7219 'channel': 'Wickman',
7221 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
7222 'availability': 'public',
7224 'playlist_mincount': 29,
7226 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
7228 'title': 'YDL_safe_search',
7229 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
7231 'playlist_count': 2,
7232 'skip': 'This playlist is private',
7235 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
7236 'playlist_count': 4,
7239 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
7240 'uploader': 'milan',
7241 'uploader_id': '@milan5503',
7243 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
7245 'modified_date': '20140919',
7248 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7249 'uploader_url': 'https://www.youtube.com/@milan5503',
7250 'availability': 'public',
7252 'expected_warnings': [r
'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
7254 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
7255 'playlist_mincount': 455,
7257 'title': '2018 Chinese New Singles (11/6 updated)',
7258 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
7260 'uploader_id': '@music_king',
7261 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
7264 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
7266 'uploader_url': 'https://www.youtube.com/@music_king',
7267 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
7268 'modified_date': r
're:\d{8}',
7269 'availability': 'public',
7271 'expected_warnings': [r
'[Uu]navailable videos (are|will be) hidden'],
7273 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
7274 'only_matching': True,
7276 # music album playlist
7277 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
7278 'only_matching': True,
7282 def suitable(cls
, url
):
7283 if YoutubeTabIE
.suitable(url
):
7285 from ..utils
import parse_qs
7287 if qs
.get('v', [None])[0]:
7289 return super().suitable(url
)
7291 def _real_extract(self
, url
):
7292 playlist_id
= self
._match
_id
(url
)
7293 is_music_url
= YoutubeBaseInfoExtractor
.is_music_url(url
)
7294 url
= update_url_query(
7295 'https://www.youtube.com/playlist',
7296 parse_qs(url
) or {'list': playlist_id
})
7298 url
= smuggle_url(url
, {'is_music_url': True})
7299 return self
.url_result(url
, ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
7302 class YoutubeYtBeIE(YoutubeBaseInfoExtractor
):
7303 IE_DESC
= 'youtu.be'
7304 _VALID_URL
= rf
'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
7306 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
7308 'id': 'yeWKywCrFtk',
7310 'title': 'Small Scale Baler and Braiding Rugs',
7311 'uploader': 'Backus-Page House Museum',
7312 'uploader_id': '@backuspagemuseum',
7313 'uploader_url': r
're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
7314 'upload_date': '20161008',
7315 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
7316 'categories': ['Nonprofits & Activism'],
7320 'playable_in_embed': True,
7321 'thumbnail': r
're:^https?://.*\.webp',
7322 'channel': 'Backus-Page House Museum',
7323 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
7324 'live_status': 'not_live',
7326 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
7327 'availability': 'public',
7329 'comment_count': int,
7330 'channel_follower_count': int,
7334 'skip_download': True,
7337 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
7338 'only_matching': True,
7341 def _real_extract(self
, url
):
7342 mobj
= self
._match
_valid
_url
(url
)
7343 video_id
= mobj
.group('id')
7344 playlist_id
= mobj
.group('playlist_id')
7345 return self
.url_result(
7346 update_url_query('https://www.youtube.com/watch', {
7348 'list': playlist_id
,
7349 'feature': 'youtu.be',
7350 }), ie
=YoutubeTabIE
.ie_key(), video_id
=playlist_id
)
7353 class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor
):
7354 IE_DESC
= 'YouTube livestream embeds'
7355 _VALID_URL
= r
'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
7357 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
7358 'only_matching': True,
7361 def _real_extract(self
, url
):
7362 channel_id
= self
._match
_id
(url
)
7363 return self
.url_result(
7364 f
'https://www.youtube.com/channel/{channel_id}/live',
7365 ie
=YoutubeTabIE
.ie_key(), video_id
=channel_id
)
7368 class YoutubeYtUserIE(YoutubeBaseInfoExtractor
):
7369 IE_DESC
= 'YouTube user videos; "ytuser:" prefix'
7370 IE_NAME
= 'youtube:user'
7371 _VALID_URL
= r
'ytuser:(?P<id>.+)'
7373 'url': 'ytuser:phihag',
7374 'only_matching': True,
7377 def _real_extract(self
, url
):
7378 user_id
= self
._match
_id
(url
)
7379 return self
.url_result(f
'https://www.youtube.com/user/{user_id}', YoutubeTabIE
, user_id
)
7382 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor
):
7383 IE_NAME
= 'youtube:favorites'
7384 IE_DESC
= 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
7385 _VALID_URL
= r
':ytfav(?:ou?rite)?s?'
7386 _LOGIN_REQUIRED
= True
7389 'only_matching': True,
7391 'url': ':ytfavorites',
7392 'only_matching': True,
7395 def _real_extract(self
, url
):
7396 return self
.url_result(
7397 'https://www.youtube.com/playlist?list=LL',
7398 ie
=YoutubeTabIE
.ie_key())
7401 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor
):
7402 IE_NAME
= 'youtube:notif'
7403 IE_DESC
= 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
7404 _VALID_URL
= r
':ytnotif(?:ication)?s?'
7405 _LOGIN_REQUIRED
= True
7408 'only_matching': True,
7410 'url': ':ytnotifications',
7411 'only_matching': True,
7414 def _extract_notification_menu(self
, response
, continuation_list
):
7415 notification_list
= traverse_obj(
7417 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
7418 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
7419 expected_type
=list) or []
7420 continuation_list
[0] = None
7421 for item
in notification_list
:
7422 entry
= self
._extract
_notification
_renderer
(item
.get('notificationRenderer'))
7425 continuation
= item
.get('continuationItemRenderer')
7427 continuation_list
[0] = continuation
7429 def _extract_notification_renderer(self
, notification
):
7430 video_id
= traverse_obj(
7431 notification
, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type
=str)
7432 url
= f
'https://www.youtube.com/watch?v={video_id}'
7435 browse_ep
= traverse_obj(
7436 notification
, ('navigationEndpoint', 'browseEndpoint'), expected_type
=dict)
7437 channel_id
= self
.ucid_or_none(traverse_obj(browse_ep
, 'browseId', expected_type
=str))
7438 post_id
= self
._search
_regex
(
7439 r
'/post/(.+)', traverse_obj(browse_ep
, 'canonicalBaseUrl', expected_type
=str),
7440 'post id', default
=None)
7441 if not channel_id
or not post_id
:
7443 # The direct /post url redirects to this in the browser
7444 url
= f
'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7446 channel
= traverse_obj(
7447 notification
, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7449 notification_title
= self
._get
_text
(notification
, 'shortMessage')
7450 if notification_title
:
7451 notification_title
= notification_title
.replace('\xad', '') # remove soft hyphens
7452 # TODO: handle recommended videos
7453 title
= self
._search
_regex
(
7454 rf
'{re.escape(channel or "")}[^:]+: (.+)', notification_title
,
7455 'video title', default
=None)
7456 timestamp
= (self
._parse
_time
_text
(self
._get
_text
(notification
, 'sentTimeText'))
7457 if self
._configuration
_arg
('approximate_date', ie_key
=YoutubeTabIE
)
7462 'ie_key': (YoutubeIE
if video_id
else YoutubeTabIE
).ie_key(),
7463 'video_id': video_id
,
7465 'channel_id': channel_id
,
7467 'uploader': channel
,
7468 'thumbnails': self
._extract
_thumbnails
(notification
, 'videoThumbnail'),
7469 'timestamp': timestamp
,
7472 def _notification_menu_entries(self
, ytcfg
):
7473 continuation_list
= [None]
7475 for page
in itertools
.count(1):
7476 ctoken
= traverse_obj(
7477 continuation_list
, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type
=str)
7478 response
= self
._extract
_response
(
7479 item_id
=f
'page {page}', query
={'ctoken': ctoken
} if ctoken
else {}, ytcfg
=ytcfg
,
7480 ep
='notification/get_notification_menu', check_get_keys
='actions',
7481 headers
=self
.generate_api_headers(ytcfg
=ytcfg
, visitor_data
=self
._extract
_visitor
_data
(response
)))
7482 yield from self
._extract
_notification
_menu
(response
, continuation_list
)
7483 if not continuation_list
[0]:
7486 def _real_extract(self
, url
):
7487 display_id
= 'notifications'
7488 ytcfg
= self
._download
_ytcfg
('web', display_id
) if not self
.skip_webpage
else {}
7489 self
._report
_playlist
_authcheck
(ytcfg
)
7490 return self
.playlist_result(self
._notification
_menu
_entries
(ytcfg
), display_id
, display_id
)
7493 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor
, SearchInfoExtractor
):
7494 IE_DESC
= 'YouTube search'
7495 IE_NAME
= 'youtube:search'
7496 _SEARCH_KEY
= 'ytsearch'
7497 _SEARCH_PARAMS
= 'EgIQAfABAQ==' # Videos only
7499 'url': 'ytsearch5:youtube-dl test video',
7500 'playlist_count': 5,
7502 'id': 'youtube-dl test video',
7503 'title': 'youtube-dl test video',
7506 'note': 'Suicide/self-harm search warning',
7507 'url': 'ytsearch1:i hate myself and i wanna die',
7508 'playlist_count': 1,
7510 'id': 'i hate myself and i wanna die',
7511 'title': 'i hate myself and i wanna die',
7516 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor
, SearchInfoExtractor
):
7517 IE_NAME
= YoutubeSearchIE
.IE_NAME
+ ':date'
7518 _SEARCH_KEY
= 'ytsearchdate'
7519 IE_DESC
= 'YouTube search, newest videos first'
7520 _SEARCH_PARAMS
= 'CAISAhAB8AEB' # Videos only, sorted by date
7522 'url': 'ytsearchdate5:youtube-dl test video',
7523 'playlist_count': 5,
7525 'id': 'youtube-dl test video',
7526 'title': 'youtube-dl test video',
7531 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor
):
7532 IE_DESC
= 'YouTube search URLs with sorting and filter support'
7533 IE_NAME
= YoutubeSearchIE
.IE_NAME
+ '_url'
7534 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7536 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7537 'playlist_mincount': 5,
7539 'id': 'youtube-dl test video',
7540 'title': 'youtube-dl test video',
7543 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7544 'playlist_mincount': 5,
7550 'url': 'https://www.youtube.com/results?search_query=%23cats',
7551 'playlist_mincount': 1,
7555 # The test suite does not have support for nested playlists
7557 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7563 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7566 'title': 'kurzgesagt',
7571 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7572 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7573 'ie_key': 'YoutubeTab',
7574 'channel': 'Kurzgesagt – In a Nutshell',
7575 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7576 'title': 'Kurzgesagt – In a Nutshell',
7577 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7578 # No longer available for search as it is set to the handle.
7579 # 'playlist_count': int,
7580 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7582 'uploader_id': '@kurzgesagt',
7583 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7584 'uploader': 'Kurzgesagt – In a Nutshell',
7585 'channel_is_verified': True,
7586 'channel_follower_count': int,
7589 'params': {'extract_flat': True, 'playlist_items': '1'},
7590 'playlist_mincount': 1,
7592 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7593 'only_matching': True,
7596 def _real_extract(self
, url
):
7598 query
= (qs
.get('search_query') or qs
.get('q'))[0]
7599 return self
.playlist_result(self
._search
_results
(query
, qs
.get('sp', (None,))[0]), query
, query
)
7602 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor
):
7603 IE_DESC
= 'YouTube music search URLs with selectable sections, e.g. #songs'
7604 IE_NAME
= 'youtube:music:search_url'
7605 _VALID_URL
= r
'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7607 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7608 'playlist_count': 16,
7610 'id': 'royalty free music',
7611 'title': 'royalty free music',
7614 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7615 'playlist_mincount': 30,
7617 'id': 'royalty free music - songs',
7618 'title': 'royalty free music - songs',
7620 'params': {'extract_flat': 'in_playlist'},
7622 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7623 'playlist_mincount': 30,
7625 'id': 'royalty free music - community playlists',
7626 'title': 'royalty free music - community playlists',
7628 'params': {'extract_flat': 'in_playlist'},
7632 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7633 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7634 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7635 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7636 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7637 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7640 def _real_extract(self
, url
):
7642 query
= (qs
.get('search_query') or qs
.get('q'))[0]
7643 params
= qs
.get('sp', (None,))[0]
7645 section
= next((k
for k
, v
in self
._SECTIONS
.items() if v
== params
), params
)
7647 section
= urllib
.parse
.unquote_plus(([*url
.split('#'), ''])[1]).lower()
7648 params
= self
._SECTIONS
.get(section
)
7651 title
= join_nonempty(query
, section
, delim
=' - ')
7652 return self
.playlist_result(self
._search
_results
(query
, params
, default_client
='web_music'), title
, title
)
7655 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor
):
7657 Base class for feed extractors
7658 Subclasses must re-define the _FEED_NAME property.
7660 _LOGIN_REQUIRED
= True
7661 _FEED_NAME
= 'feeds'
7665 return f
'youtube:{cls._FEED_NAME}'
7667 def _real_extract(self
, url
):
7668 return self
.url_result(
7669 f
'https://www.youtube.com/feed/{self._FEED_NAME}', ie
=YoutubeTabIE
.ie_key())
7672 class YoutubeWatchLaterIE(YoutubeBaseInfoExtractor
):
7673 IE_NAME
= 'youtube:watchlater'
7674 IE_DESC
= 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7675 _VALID_URL
= r
':ytwatchlater'
7677 'url': ':ytwatchlater',
7678 'only_matching': True,
7681 def _real_extract(self
, url
):
7682 return self
.url_result(
7683 'https://www.youtube.com/playlist?list=WL', ie
=YoutubeTabIE
.ie_key())
7686 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor
):
7687 IE_DESC
= 'YouTube recommended videos; ":ytrec" keyword'
7688 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7689 _FEED_NAME
= 'recommended'
7690 _LOGIN_REQUIRED
= False
7693 'only_matching': True,
7695 'url': ':ytrecommended',
7696 'only_matching': True,
7698 'url': 'https://youtube.com',
7699 'only_matching': True,
7703 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor
):
7704 IE_DESC
= 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7705 _VALID_URL
= r
':ytsub(?:scription)?s?'
7706 _FEED_NAME
= 'subscriptions'
7709 'only_matching': True,
7711 'url': ':ytsubscriptions',
7712 'only_matching': True,
7716 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor
):
7717 IE_DESC
= 'Youtube watch history; ":ythis" keyword (requires cookies)'
7718 _VALID_URL
= r
':ythis(?:tory)?'
7719 _FEED_NAME
= 'history'
7721 'url': ':ythistory',
7722 'only_matching': True,
7726 class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor
):
7727 IE_DESC
= 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7728 IE_NAME
= 'youtube:shorts:pivot:audio'
7729 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7731 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7732 'only_matching': True,
7736 def _generate_audio_pivot_params(video_id
):
7738 Generates sfv_audio_pivot browse params for this video id
7740 pb_params
= b
'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id
.encode(),) * 3)
7741 return urllib
.parse
.quote(base64
.b64encode(pb_params
).decode())
7743 def _real_extract(self
, url
):
7744 video_id
= self
._match
_id
(url
)
7745 return self
.url_result(
7746 f
'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7750 class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor
):
7751 IE_NAME
= 'youtube:truncated_url'
7752 IE_DESC
= False # Do not list
7753 _VALID_URL
= r
'''(?x)
7755 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7758 annotation_id=annotation_[^&]+|
7764 attribution_link\?a=[^&]+
7770 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7771 'only_matching': True,
7773 'url': 'https://www.youtube.com/watch?',
7774 'only_matching': True,
7776 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7777 'only_matching': True,
7779 'url': 'https://www.youtube.com/watch?feature=foo',
7780 'only_matching': True,
7782 'url': 'https://www.youtube.com/watch?hl=en-GB',
7783 'only_matching': True,
7785 'url': 'https://www.youtube.com/watch?t=2372',
7786 'only_matching': True,
7789 def _real_extract(self
, url
):
7790 raise ExtractorError(
7791 'Did you forget to quote the URL? Remember that & is a meta '
7792 'character in most shells, so you want to put the URL in quotes, '
7794 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7795 ' or simply yt-dlp BaW_jenozKc .',
7799 class YoutubeClipIE(YoutubeTabBaseInfoExtractor
):
7800 IE_NAME
= 'youtube:clip'
7801 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7803 # FIXME: Other metadata should be extracted from the clip, not from the base video
7804 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7806 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7808 'section_start': 29.0,
7809 'section_end': 39.7,
7812 'availability': 'public',
7813 'categories': ['Gaming'],
7814 'channel': 'Scott The Woz',
7815 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7816 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7817 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7819 'playable_in_embed': True,
7821 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7822 'title': 'Mobile Games on Console - Scott The Woz',
7823 'upload_date': '20210920',
7824 'uploader': 'Scott The Woz',
7825 'uploader_id': '@ScottTheWoz',
7826 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7828 'live_status': 'not_live',
7829 'channel_follower_count': int,
7830 'chapters': 'count:20',
7831 'comment_count': int,
7832 'heatmap': 'count:100',
7836 def _real_extract(self
, url
):
7837 clip_id
= self
._match
_id
(url
)
7838 _
, data
= self
._extract
_webpage
(url
, clip_id
)
7840 video_id
= traverse_obj(data
, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7842 raise ExtractorError('Unable to find video ID')
7844 clip_data
= traverse_obj(data
, (
7845 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7846 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7847 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7848 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all
=False)
7851 '_type': 'url_transparent',
7852 'url': f
'https://www.youtube.com/watch?v={video_id}',
7853 'ie_key': YoutubeIE
.ie_key(),
7855 'section_start': int(clip_data
['startTimeMs']) / 1000,
7856 'section_end': int(clip_data
['endTimeMs']) / 1000,
7857 '_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
7858 'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang'),
7862 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor
):
7863 IE_NAME
= 'youtube:consent'
7864 IE_DESC
= False # Do not list
7865 _VALID_URL
= r
'https?://consent\.youtube\.com/m\?'
7867 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7869 'id': 'qVv6vCqciTM',
7872 'uploader_id': '@sana_natori',
7873 'comment_count': int,
7874 'chapters': 'count:13',
7875 'upload_date': '20221223',
7876 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7877 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7878 'uploader_url': 'https://www.youtube.com/@sana_natori',
7880 'release_date': '20221223',
7881 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7882 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7884 'playable_in_embed': True,
7886 'availability': 'public',
7887 'channel_follower_count': int,
7888 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7889 'categories': ['Entertainment'],
7890 'live_status': 'was_live',
7891 'release_timestamp': 1671793345,
7892 'channel': 'さなちゃんねる',
7893 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7894 'uploader': 'さなちゃんねる',
7895 'channel_is_verified': True,
7896 'heatmap': 'count:100',
7898 'add_ie': ['Youtube'],
7899 'params': {'skip_download': 'Youtube'},
7902 def _real_extract(self
, url
):
7903 redirect_url
= url_or_none(parse_qs(url
).get('continue', [None])[-1])
7904 if not redirect_url
:
7905 raise ExtractorError('Invalid cookie consent redirect URL', expected
=True)
7906 return self
.url_result(redirect_url
)
7909 class YoutubeTruncatedIDIE(YoutubeBaseInfoExtractor
):
7910 IE_NAME
= 'youtube:truncated_id'
7911 IE_DESC
= False # Do not list
7912 _VALID_URL
= r
'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7915 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7916 'only_matching': True,
7919 def _real_extract(self
, url
):
7920 video_id
= self
._match
_id
(url
)
7921 raise ExtractorError(
7922 f
'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',