[ie/youtube] Player client maintenance (#11528)
[yt-dlp3.git] / yt_dlp / extractor / youtube.py
blob2c57ee60050e4d350f40ea8b167739beeb87fe7d
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime as dt
6 import enum
7 import functools
8 import hashlib
9 import itertools
10 import json
11 import math
12 import os.path
13 import random
14 import re
15 import shlex
16 import sys
17 import threading
18 import time
19 import traceback
20 import urllib.parse
22 from .common import InfoExtractor, SearchInfoExtractor
23 from .openload import PhantomJSwrapper
24 from ..jsinterp import JSInterpreter
25 from ..networking.exceptions import HTTPError, TransportError, network_exceptions
26 from ..utils import (
27 NO_DEFAULT,
28 ExtractorError,
29 LazyList,
30 UserNotLive,
31 bug_reports_message,
32 classproperty,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 filesize_from_tbr,
37 filter_dict,
38 float_or_none,
39 format_field,
40 get_first,
41 int_or_none,
42 is_html,
43 join_nonempty,
44 js_to_json,
45 mimetype2ext,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 strftime_or_none,
58 time_seconds,
59 traverse_obj,
60 try_call,
61 try_get,
62 unescapeHTML,
63 unified_strdate,
64 unified_timestamp,
65 unsmuggle_url,
66 update_url_query,
67 url_or_none,
68 urljoin,
69 variadic,
72 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
73 STREAMING_DATA_PO_TOKEN = '__yt_dlp_po_token'
75 # any clients starting with _ cannot be explicitly requested by the user
76 INNERTUBE_CLIENTS = {
77 'web': {
78 'INNERTUBE_CONTEXT': {
79 'client': {
80 'clientName': 'WEB',
81 'clientVersion': '2.20240726.00.00',
84 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
85 'REQUIRE_PO_TOKEN': True,
87 # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
88 'web_safari': {
89 'INNERTUBE_CONTEXT': {
90 'client': {
91 'clientName': 'WEB',
92 'clientVersion': '2.20240726.00.00',
93 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
96 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
97 'REQUIRE_PO_TOKEN': True,
99 'web_embedded': {
100 'INNERTUBE_CONTEXT': {
101 'client': {
102 'clientName': 'WEB_EMBEDDED_PLAYER',
103 'clientVersion': '1.20240723.01.00',
106 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
108 'web_music': {
109 'INNERTUBE_HOST': 'music.youtube.com',
110 'INNERTUBE_CONTEXT': {
111 'client': {
112 'clientName': 'WEB_REMIX',
113 'clientVersion': '1.20240724.00.00',
116 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
118 # This client now requires sign-in for every video
119 'web_creator': {
120 'INNERTUBE_CONTEXT': {
121 'client': {
122 'clientName': 'WEB_CREATOR',
123 'clientVersion': '1.20240723.03.00',
126 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
127 'REQUIRE_AUTH': True,
129 'android': {
130 'INNERTUBE_CONTEXT': {
131 'client': {
132 'clientName': 'ANDROID',
133 'clientVersion': '19.44.38',
134 'androidSdkVersion': 30,
135 'userAgent': 'com.google.android.youtube/19.44.38 (Linux; U; Android 11) gzip',
136 'osName': 'Android',
137 'osVersion': '11',
140 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
141 'REQUIRE_JS_PLAYER': False,
142 'REQUIRE_PO_TOKEN': True,
144 # This client now requires sign-in for every video
145 'android_music': {
146 'INNERTUBE_CONTEXT': {
147 'client': {
148 'clientName': 'ANDROID_MUSIC',
149 'clientVersion': '7.27.52',
150 'androidSdkVersion': 30,
151 'userAgent': 'com.google.android.apps.youtube.music/7.27.52 (Linux; U; Android 11) gzip',
152 'osName': 'Android',
153 'osVersion': '11',
156 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
157 'REQUIRE_JS_PLAYER': False,
158 'REQUIRE_PO_TOKEN': True,
159 'REQUIRE_AUTH': True,
161 # This client now requires sign-in for every video
162 'android_creator': {
163 'INNERTUBE_CONTEXT': {
164 'client': {
165 'clientName': 'ANDROID_CREATOR',
166 'clientVersion': '24.45.100',
167 'androidSdkVersion': 30,
168 'userAgent': 'com.google.android.apps.youtube.creator/24.45.100 (Linux; U; Android 11) gzip',
169 'osName': 'Android',
170 'osVersion': '11',
173 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
174 'REQUIRE_JS_PLAYER': False,
175 'REQUIRE_PO_TOKEN': True,
176 'REQUIRE_AUTH': True,
178 # YouTube Kids videos aren't returned on this client for some reason
179 'android_vr': {
180 'INNERTUBE_CONTEXT': {
181 'client': {
182 'clientName': 'ANDROID_VR',
183 'clientVersion': '1.60.19',
184 'deviceMake': 'Oculus',
185 'deviceModel': 'Quest 3',
186 'androidSdkVersion': 32,
187 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
188 'osName': 'Android',
189 'osVersion': '12L',
192 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
193 'REQUIRE_JS_PLAYER': False,
195 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
196 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
197 'ios': {
198 'INNERTUBE_CONTEXT': {
199 'client': {
200 'clientName': 'IOS',
201 'clientVersion': '19.45.4',
202 'deviceMake': 'Apple',
203 'deviceModel': 'iPhone16,2',
204 'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
205 'osName': 'iPhone',
206 'osVersion': '18.1.0.22B83',
209 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
210 'REQUIRE_JS_PLAYER': False,
212 # This client now requires sign-in for every video
213 'ios_music': {
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'IOS_MUSIC',
217 'clientVersion': '7.27.0',
218 'deviceMake': 'Apple',
219 'deviceModel': 'iPhone16,2',
220 'userAgent': 'com.google.ios.youtubemusic/7.27.0 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
221 'osName': 'iPhone',
222 'osVersion': '18.1.0.22B83',
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
226 'REQUIRE_JS_PLAYER': False,
227 'REQUIRE_AUTH': True,
229 # This client now requires sign-in for every video
230 'ios_creator': {
231 'INNERTUBE_CONTEXT': {
232 'client': {
233 'clientName': 'IOS_CREATOR',
234 'clientVersion': '24.45.100',
235 'deviceMake': 'Apple',
236 'deviceModel': 'iPhone16,2',
237 'userAgent': 'com.google.ios.ytcreator/24.45.100 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)',
238 'osName': 'iPhone',
239 'osVersion': '18.1.0.22B83',
242 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
243 'REQUIRE_JS_PLAYER': False,
244 'REQUIRE_AUTH': True,
246 # mweb has 'ultralow' formats
247 # See: https://github.com/yt-dlp/yt-dlp/pull/557
248 'mweb': {
249 'INNERTUBE_CONTEXT': {
250 'client': {
251 'clientName': 'MWEB',
252 'clientVersion': '2.20240726.01.00',
255 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
257 'tv': {
258 'INNERTUBE_CONTEXT': {
259 'client': {
260 'clientName': 'TVHTML5',
261 'clientVersion': '7.20240724.13.00',
264 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
266 # This client now requires sign-in for every video
267 # It was previously an age-gate workaround for videos that were `playable_in_embed`
268 # It may still be useful if signed into an EU account that is not age-verified
269 'tv_embedded': {
270 'INNERTUBE_CONTEXT': {
271 'client': {
272 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
273 'clientVersion': '2.0',
276 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
277 'REQUIRE_AUTH': True,
279 # This client now requires sign-in for every video
280 # It may be able to receive pre-merged video+audio 720p/1080p streams
281 'mediaconnect': {
282 'INNERTUBE_CONTEXT': {
283 'client': {
284 'clientName': 'MEDIA_CONNECT_FRONTEND',
285 'clientVersion': '0.1',
288 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
289 'REQUIRE_JS_PLAYER': False,
290 'REQUIRE_AUTH': True,
295 def _split_innertube_client(client_name):
296 variant, *base = client_name.rsplit('.', 1)
297 if base:
298 return variant, base[0], variant
299 base, *variant = client_name.split('_', 1)
300 return client_name, base, variant[0] if variant else None
303 def short_client_name(client_name):
304 main, *parts = _split_innertube_client(client_name)[0].split('_')
305 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
308 def build_innertube_clients():
309 THIRD_PARTY = {
310 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
312 BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
313 priority = qualities(BASE_CLIENTS[::-1])
315 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
316 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
317 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
318 ytcfg.setdefault('REQUIRE_PO_TOKEN', False)
319 ytcfg.setdefault('REQUIRE_AUTH', False)
320 ytcfg.setdefault('PLAYER_PARAMS', None)
321 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
323 _, base_client, variant = _split_innertube_client(client)
324 ytcfg['priority'] = 10 * priority(base_client)
326 if variant == 'embedded':
327 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
328 ytcfg['priority'] -= 2
329 elif variant:
330 ytcfg['priority'] -= 3
333 build_innertube_clients()
336 class BadgeType(enum.Enum):
337 AVAILABILITY_UNLISTED = enum.auto()
338 AVAILABILITY_PRIVATE = enum.auto()
339 AVAILABILITY_PUBLIC = enum.auto()
340 AVAILABILITY_PREMIUM = enum.auto()
341 AVAILABILITY_SUBSCRIPTION = enum.auto()
342 LIVE_NOW = enum.auto()
343 VERIFIED = enum.auto()
346 class YoutubeBaseInfoExtractor(InfoExtractor):
347 """Provide base functions for Youtube extractors"""
349 _RESERVED_NAMES = (
350 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
351 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
352 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
353 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
355 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
357 # _NETRC_MACHINE = 'youtube'
359 # If True it will raise an error if no login info is provided
360 _LOGIN_REQUIRED = False
362 _INVIDIOUS_SITES = (
363 # invidious-redirect websites
364 r'(?:www\.)?redirect\.invidious\.io',
365 r'(?:(?:www|dev)\.)?invidio\.us',
366 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
367 r'(?:www\.)?invidious\.pussthecat\.org',
368 r'(?:www\.)?invidious\.zee\.li',
369 r'(?:www\.)?invidious\.ethibox\.fr',
370 r'(?:www\.)?iv\.ggtyler\.dev',
371 r'(?:www\.)?inv\.vern\.i2p',
372 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
373 r'(?:www\.)?inv\.riverside\.rocks',
374 r'(?:www\.)?invidious\.silur\.me',
375 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
376 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
377 r'(?:www\.)?invidious\.slipfox\.xyz',
378 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
379 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
380 r'(?:www\.)?invidious\.tiekoetter\.com',
381 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
382 r'(?:www\.)?invidious\.nerdvpn\.de',
383 r'(?:www\.)?invidious\.weblibre\.org',
384 r'(?:www\.)?inv\.odyssey346\.dev',
385 r'(?:www\.)?invidious\.dhusch\.de',
386 r'(?:www\.)?iv\.melmac\.space',
387 r'(?:www\.)?watch\.thekitty\.zone',
388 r'(?:www\.)?invidious\.privacydev\.net',
389 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
390 r'(?:www\.)?invidious\.drivet\.xyz',
391 r'(?:www\.)?vid\.priv\.au',
392 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
393 r'(?:www\.)?inv\.vern\.cc',
394 r'(?:www\.)?invidious\.esmailelbob\.xyz',
395 r'(?:www\.)?invidious\.sethforprivacy\.com',
396 r'(?:www\.)?yt\.oelrichsgarcia\.de',
397 r'(?:www\.)?yt\.artemislena\.eu',
398 r'(?:www\.)?invidious\.flokinet\.to',
399 r'(?:www\.)?invidious\.baczek\.me',
400 r'(?:www\.)?y\.com\.sb',
401 r'(?:www\.)?invidious\.epicsite\.xyz',
402 r'(?:www\.)?invidious\.lidarshield\.cloud',
403 r'(?:www\.)?yt\.funami\.tech',
404 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
405 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
406 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
407 # youtube-dl invidious instances list
408 r'(?:(?:www|no)\.)?invidiou\.sh',
409 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
410 r'(?:www\.)?invidious\.kabi\.tk',
411 r'(?:www\.)?invidious\.mastodon\.host',
412 r'(?:www\.)?invidious\.zapashcanon\.fr',
413 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
414 r'(?:www\.)?invidious\.tinfoil-hat\.net',
415 r'(?:www\.)?invidious\.himiko\.cloud',
416 r'(?:www\.)?invidious\.reallyancient\.tech',
417 r'(?:www\.)?invidious\.tube',
418 r'(?:www\.)?invidiou\.site',
419 r'(?:www\.)?invidious\.site',
420 r'(?:www\.)?invidious\.xyz',
421 r'(?:www\.)?invidious\.nixnet\.xyz',
422 r'(?:www\.)?invidious\.048596\.xyz',
423 r'(?:www\.)?invidious\.drycat\.fr',
424 r'(?:www\.)?inv\.skyn3t\.in',
425 r'(?:www\.)?tube\.poal\.co',
426 r'(?:www\.)?tube\.connect\.cafe',
427 r'(?:www\.)?vid\.wxzm\.sx',
428 r'(?:www\.)?vid\.mint\.lgbt',
429 r'(?:www\.)?vid\.puffyan\.us',
430 r'(?:www\.)?yewtu\.be',
431 r'(?:www\.)?yt\.elukerio\.org',
432 r'(?:www\.)?yt\.lelux\.fi',
433 r'(?:www\.)?invidious\.ggc-project\.de',
434 r'(?:www\.)?yt\.maisputain\.ovh',
435 r'(?:www\.)?ytprivate\.com',
436 r'(?:www\.)?invidious\.13ad\.de',
437 r'(?:www\.)?invidious\.toot\.koeln',
438 r'(?:www\.)?invidious\.fdn\.fr',
439 r'(?:www\.)?watch\.nettohikari\.com',
440 r'(?:www\.)?invidious\.namazso\.eu',
441 r'(?:www\.)?invidious\.silkky\.cloud',
442 r'(?:www\.)?invidious\.exonip\.de',
443 r'(?:www\.)?invidious\.riverside\.rocks',
444 r'(?:www\.)?invidious\.blamefran\.net',
445 r'(?:www\.)?invidious\.moomoo\.de',
446 r'(?:www\.)?ytb\.trom\.tf',
447 r'(?:www\.)?yt\.cyberhost\.uk',
448 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
449 r'(?:www\.)?qklhadlycap4cnod\.onion',
450 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
451 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
452 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
453 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
454 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
455 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
456 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
457 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
458 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
459 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
460 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
461 r'(?:www\.)?piped\.kavin\.rocks',
462 r'(?:www\.)?piped\.tokhmi\.xyz',
463 r'(?:www\.)?piped\.syncpundit\.io',
464 r'(?:www\.)?piped\.mha\.fi',
465 r'(?:www\.)?watch\.whatever\.social',
466 r'(?:www\.)?piped\.garudalinux\.org',
467 r'(?:www\.)?piped\.rivo\.lol',
468 r'(?:www\.)?piped-libre\.kavin\.rocks',
469 r'(?:www\.)?yt\.jae\.fi',
470 r'(?:www\.)?piped\.mint\.lgbt',
471 r'(?:www\.)?il\.ax',
472 r'(?:www\.)?piped\.esmailelbob\.xyz',
473 r'(?:www\.)?piped\.projectsegfau\.lt',
474 r'(?:www\.)?piped\.privacydev\.net',
475 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
476 r'(?:www\.)?piped\.smnz\.de',
477 r'(?:www\.)?piped\.adminforge\.de',
478 r'(?:www\.)?watch\.whatevertinfoil\.de',
479 r'(?:www\.)?piped\.qdi\.fi',
480 r'(?:(?:www|cf)\.)?piped\.video',
481 r'(?:www\.)?piped\.aeong\.one',
482 r'(?:www\.)?piped\.moomoo\.me',
483 r'(?:www\.)?piped\.chauvet\.pro',
484 r'(?:www\.)?watch\.leptons\.xyz',
485 r'(?:www\.)?pd\.vern\.cc',
486 r'(?:www\.)?piped\.hostux\.net',
487 r'(?:www\.)?piped\.lunar\.icu',
488 # Hyperpipe instances from https://hyperpipe.codeberg.page/
489 r'(?:www\.)?hyperpipe\.surge\.sh',
490 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
491 r'(?:www\.)?listen\.whatever\.social',
492 r'(?:www\.)?music\.adminforge\.de',
495 # extracted from account/account_menu ep
496 # XXX: These are the supported YouTube UI and API languages,
497 # which is slightly different from languages supported for translation in YouTube studio
498 _SUPPORTED_LANG_CODES = [
499 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
500 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
501 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
502 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
503 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
504 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
507 _IGNORED_WARNINGS = {
508 'Unavailable videos will be hidden during playback',
509 'Unavailable videos are hidden',
512 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
513 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
515 _NETRC_MACHINE = 'youtube'
517 def ucid_or_none(self, ucid):
518 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
520 def handle_or_none(self, handle):
521 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
523 def handle_from_url(self, url):
524 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
525 url, 'channel handle', default=None)
527 def ucid_from_url(self, url):
528 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
529 url, 'channel id', default=None)
531 @functools.cached_property
532 def _preferred_lang(self):
534 Returns a language code supported by YouTube for the user preferred language.
535 Returns None if no preferred language set.
537 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
538 if not preferred_lang:
539 return
540 if preferred_lang not in self._SUPPORTED_LANG_CODES:
541 raise ExtractorError(
542 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
543 expected=True)
544 elif preferred_lang != 'en':
545 self.report_warning(
546 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
547 return preferred_lang
549 def _initialize_consent(self):
550 cookies = self._get_cookies('https://www.youtube.com/')
551 if cookies.get('__Secure-3PSID'):
552 return
553 socs = cookies.get('SOCS')
554 if socs and not socs.value.startswith('CAA'): # not consented
555 return
556 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
558 def _initialize_pref(self):
559 cookies = self._get_cookies('https://www.youtube.com/')
560 pref_cookie = cookies.get('PREF')
561 pref = {}
562 if pref_cookie:
563 try:
564 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
565 except ValueError:
566 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
567 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
568 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
570 def _real_initialize(self):
571 self._initialize_pref()
572 self._initialize_consent()
573 self._check_login_required()
575 def _perform_login(self, username, password):
576 auth_type, _, user = (username or '').partition('+')
578 if auth_type != 'oauth':
579 raise ExtractorError(self._youtube_login_hint, expected=True)
581 self._initialize_oauth(user, password)
584 OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv).
586 For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see:
587 - https://developers.google.com/identity/protocols/oauth2/limited-input-device
588 - https://accounts.google.com/.well-known/openid-configuration
589 - https://www.rfc-editor.org/rfc/rfc8628
590 - https://www.rfc-editor.org/rfc/rfc6749
592 Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2,
593 which applies some modifications to the response (such as returning errors as 200 OK).
594 Since the client works with the standard API, we will use that as it is well-documented.
597 _OAUTH_PROFILE = None
598 _OAUTH_ACCESS_TOKEN_CACHE = {}
599 _OAUTH_DISPLAY_ID = 'oauth'
601 # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv
602 _OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
603 _OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT'
604 _OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content'
606 # From https://accounts.google.com/.well-known/openid-configuration
607 # Technically, these should be fetched dynamically and not hard-coded.
608 # However, as these endpoints rarely change, we can risk saving an extra request for every invocation.
609 _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code'
610 _OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token'
612 @property
613 def _oauth_cache_key(self):
614 return f'oauth_refresh_token_{self._OAUTH_PROFILE}'
616 def _read_oauth_error_response(self, response):
617 return traverse_obj(
618 self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False),
619 ({json.loads}, 'error', {str}))
621 def _set_oauth_info(self, token_response):
622 YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({
623 'access_token': token_response['access_token'],
624 'token_type': token_response['token_type'],
625 'expiry': time_seconds(
626 seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10),
628 refresh_token = traverse_obj(token_response, ('refresh_token', {str}))
629 if refresh_token:
630 self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
631 YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
633 def _initialize_oauth(self, user, refresh_token):
634 self._OAUTH_PROFILE = user or 'default'
636 if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE:
637 self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"')
638 return
640 YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
642 if refresh_token:
643 msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
644 if self.get_param('cachedir') is not False:
645 msg += ' and caching token to disk; you should supply an empty password next time'
646 self.to_screen(msg)
647 self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
648 else:
649 refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
651 if refresh_token:
652 YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
653 try:
654 token_response = self._refresh_token(refresh_token)
655 except ExtractorError as e:
656 error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '')
657 self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}')
658 token_response = self._oauth_authorize
659 else:
660 token_response = self._oauth_authorize
662 self._set_oauth_info(token_response)
663 self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"')
665 def _refresh_token(self, refresh_token):
666 try:
667 token_response = self._download_json(
668 self._OAUTH_TOKEN_ENDPOINT,
669 video_id=self._OAUTH_DISPLAY_ID,
670 note='Refreshing access token',
671 data=json.dumps({
672 'client_id': self._OAUTH_CLIENT_ID,
673 'client_secret': self._OAUTH_CLIENT_SECRET,
674 'refresh_token': refresh_token,
675 'grant_type': 'refresh_token',
676 }).encode(),
677 headers={'Content-Type': 'application/json'})
678 except ExtractorError as e:
679 if isinstance(e.cause, HTTPError):
680 error = self._read_oauth_error_response(e.cause.response)
681 if error == 'invalid_grant':
682 # RFC6749 § 5.2
683 raise ExtractorError(
684 'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)',
685 expected=True, video_id=self._OAUTH_DISPLAY_ID)
686 raise ExtractorError(
687 f'Failed to refresh access token: Authorization server returned error {error}',
688 video_id=self._OAUTH_DISPLAY_ID)
689 raise
690 return token_response
692 @property
693 def _oauth_authorize(self):
694 code_response = self._download_json(
695 self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT,
696 video_id=self._OAUTH_DISPLAY_ID,
697 note='Initializing authorization flow',
698 data=json.dumps({
699 'client_id': self._OAUTH_CLIENT_ID,
700 'scope': self._OAUTH_SCOPE,
701 }).encode(),
702 headers={'Content-Type': 'application/json'})
704 verification_url = traverse_obj(code_response, ('verification_url', {str}))
705 user_code = traverse_obj(code_response, ('user_code', {str}))
706 if not verification_url or not user_code:
707 raise ExtractorError(
708 'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID)
710 # note: The whitespace is intentional
711 self.to_screen(
712 f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, '
713 f'go to {verification_url} and enter code {user_code}')
715 # RFC8628 § 3.5: default poll interval is 5 seconds if not provided
716 poll_interval = traverse_obj(code_response, ('interval', {int}), default=5)
718 for retry in self.RetryManager():
719 while True:
720 try:
721 token_response = self._download_json(
722 self._OAUTH_TOKEN_ENDPOINT,
723 video_id=self._OAUTH_DISPLAY_ID,
724 note=False,
725 errnote='Failed to request access token',
726 data=json.dumps({
727 'client_id': self._OAUTH_CLIENT_ID,
728 'client_secret': self._OAUTH_CLIENT_SECRET,
729 'device_code': code_response['device_code'],
730 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code',
731 }).encode(),
732 headers={'Content-Type': 'application/json'})
733 except ExtractorError as e:
734 if isinstance(e.cause, TransportError):
735 retry.error = e
736 break
737 elif isinstance(e.cause, HTTPError):
738 error = self._read_oauth_error_response(e.cause.response)
739 if not error:
740 retry.error = e
741 break
743 if error == 'authorization_pending':
744 time.sleep(poll_interval)
745 continue
746 elif error == 'expired_token':
747 raise ExtractorError(
748 'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID)
749 elif error == 'access_denied':
750 raise ExtractorError(
751 'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID)
752 elif error == 'slow_down':
753 # RFC8628 § 3.5: add 5 seconds to the poll interval
754 poll_interval += 5
755 time.sleep(poll_interval)
756 continue
757 else:
758 raise ExtractorError(
759 f'Authorization server returned an error when fetching access token: {error}',
760 video_id=self._OAUTH_DISPLAY_ID)
761 raise
763 return token_response
765 def _update_oauth(self):
766 token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
767 if token is None or token['expiry'] > time.time():
768 return
770 self._set_oauth_info(self._refresh_token(token['refresh_token']))
772 @property
773 def _youtube_login_hint(self):
774 return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, '
775 f'or else u{self._login_hint(method="cookies")[1:]}. '
776 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. '
777 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies')
779 def _check_login_required(self):
780 if self._LOGIN_REQUIRED and not self.is_authenticated:
781 self.raise_login_required(
782 f'Login details are needed to download this content. {self._youtube_login_hint}', method=None)
784 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
785 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
787 def _get_default_ytcfg(self, client='web'):
788 return copy.deepcopy(INNERTUBE_CLIENTS[client])
790 def _get_innertube_host(self, client='web'):
791 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
793 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
794 # try_get but with fallback to default ytcfg client values when present
795 _func = lambda y: try_get(y, getter, expected_type)
796 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
798 def _extract_client_name(self, ytcfg, default_client='web'):
799 return self._ytcfg_get_safe(
800 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
801 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
803 def _extract_client_version(self, ytcfg, default_client='web'):
804 return self._ytcfg_get_safe(
805 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
806 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
808 def _select_api_hostname(self, req_api_hostname, default_client=None):
809 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
810 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
812 def _extract_context(self, ytcfg=None, default_client='web'):
813 context = get_first(
814 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
815 # Enforce language and tz for extraction
816 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
817 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
818 return context
820 _SAPISID = None
822 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
823 time_now = round(time.time())
824 if self._SAPISID is None:
825 yt_cookies = self._get_cookies('https://www.youtube.com')
826 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
827 # See: https://github.com/yt-dlp/yt-dlp/issues/393
828 sapisid_cookie = dict_get(
829 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
830 if sapisid_cookie and sapisid_cookie.value:
831 self._SAPISID = sapisid_cookie.value
832 self.write_debug('Extracted SAPISID cookie')
833 # SAPISID cookie is required if not already present
834 if not yt_cookies.get('SAPISID'):
835 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
836 self._set_cookie(
837 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
838 else:
839 self._SAPISID = False
840 if not self._SAPISID:
841 return None
842 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
843 sapisidhash = hashlib.sha1(
844 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
845 return f'SAPISIDHASH {time_now}_{sapisidhash}'
847 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
848 note='Downloading API JSON', errnote='Unable to download API page',
849 context=None, api_key=None, api_hostname=None, default_client='web'):
851 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
852 data.update(query)
853 real_headers = self.generate_api_headers(default_client=default_client)
854 real_headers.update({'content-type': 'application/json'})
855 if headers:
856 real_headers.update(headers)
857 return self._download_json(
858 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
859 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
860 data=json.dumps(data).encode('utf8'), headers=real_headers,
861 query=filter_dict({
862 'key': self._configuration_arg(
863 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
864 'prettyPrint': 'false',
865 }, cndn=lambda _, v: v))
867 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
868 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
870 @staticmethod
871 def _extract_session_index(*data):
873 Index of current account in account list.
874 See: https://github.com/yt-dlp/yt-dlp/pull/519
876 for ytcfg in data:
877 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
878 if session_index is not None:
879 return session_index
881 def _data_sync_id_to_delegated_session_id(self, data_sync_id):
882 if not data_sync_id:
883 return
884 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
885 # and just "user_syncid||" for primary channel. We only want the channel_syncid
886 channel_syncid, _, user_syncid = data_sync_id.partition('||')
887 if user_syncid:
888 return channel_syncid
890 def _extract_account_syncid(self, *args):
892 Extract current session ID required to download private playlists of secondary channels
893 @params response and/or ytcfg
895 # ytcfg includes channel_syncid if on secondary channel
896 if delegated_sid := traverse_obj(args, (..., 'DELEGATED_SESSION_ID', {str}, any)):
897 return delegated_sid
899 data_sync_id = self._extract_data_sync_id(*args)
900 return self._data_sync_id_to_delegated_session_id(data_sync_id)
902 def _extract_data_sync_id(self, *args):
904 Extract current account dataSyncId.
905 In the format DELEGATED_SESSION_ID||USER_SESSION_ID or USER_SESSION_ID||
906 @params response and/or ytcfg
908 if data_sync_id := self._configuration_arg('data_sync_id', [None], ie_key=YoutubeIE, casesense=True)[0]:
909 return data_sync_id
911 return traverse_obj(
912 args, (..., ('DATASYNC_ID', ('responseContext', 'mainAppWebResponseContext', 'datasyncId')), {str}, any))
914 def _extract_visitor_data(self, *args):
916 Extracts visitorData from an API response or ytcfg
917 Appears to be used to track session state
919 if visitor_data := self._configuration_arg('visitor_data', [None], ie_key=YoutubeIE, casesense=True)[0]:
920 return visitor_data
921 return get_first(
922 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
923 expected_type=str)
925 @functools.cached_property
926 def is_authenticated(self):
927 return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header())
929 def extract_ytcfg(self, video_id, webpage):
930 if not webpage:
931 return {}
932 return self._parse_json(
933 self._search_regex(
934 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
935 default='{}'), video_id, fatal=False) or {}
937 def _generate_oauth_headers(self):
938 self._update_oauth()
939 oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
940 if not oauth_token:
941 return {}
943 return {
944 'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}',
947 def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
948 headers = {}
949 account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
950 if account_syncid:
951 headers['X-Goog-PageId'] = account_syncid
952 if session_index is None:
953 session_index = self._extract_session_index(ytcfg)
954 if account_syncid or session_index is not None:
955 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
957 auth = self._generate_sapisidhash_header(origin)
958 if auth is not None:
959 headers['Authorization'] = auth
960 headers['X-Origin'] = origin
962 return headers
964 def generate_api_headers(
965 self, *, ytcfg=None, account_syncid=None, session_index=None,
966 visitor_data=None, api_hostname=None, default_client='web', **kwargs):
968 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
969 headers = {
970 'X-YouTube-Client-Name': str(
971 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
972 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
973 'Origin': origin,
974 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
975 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
976 **self._generate_oauth_headers(),
977 **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
979 return filter_dict(headers)
981 def _generate_webpage_headers(self):
982 return self._generate_oauth_headers()
984 def _download_ytcfg(self, client, video_id):
985 url = {
986 'web': 'https://www.youtube.com',
987 'web_music': 'https://music.youtube.com',
988 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
989 }.get(client)
990 if not url:
991 return {}
992 webpage = self._download_webpage(
993 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config',
994 headers=self._generate_webpage_headers())
995 return self.extract_ytcfg(video_id, webpage) or {}
997 @staticmethod
998 def _build_api_continuation_query(continuation, ctp=None):
999 query = {
1000 'continuation': continuation,
1002 # TODO: Inconsistency with clickTrackingParams.
1003 # Currently we have a fixed ctp contained within context (from ytcfg)
1004 # and a ctp in root query for continuation.
1005 if ctp:
1006 query['clickTracking'] = {'clickTrackingParams': ctp}
1007 return query
1009 @classmethod
1010 def _extract_next_continuation_data(cls, renderer):
1011 next_continuation = try_get(
1012 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
1013 lambda x: x['continuation']['reloadContinuationData']), dict)
1014 if not next_continuation:
1015 return
1016 continuation = next_continuation.get('continuation')
1017 if not continuation:
1018 return
1019 ctp = next_continuation.get('clickTrackingParams')
1020 return cls._build_api_continuation_query(continuation, ctp)
1022 @classmethod
1023 def _extract_continuation_ep_data(cls, continuation_ep: dict):
1024 if isinstance(continuation_ep, dict):
1025 continuation = try_get(
1026 continuation_ep, lambda x: x['continuationCommand']['token'], str)
1027 if not continuation:
1028 return
1029 ctp = continuation_ep.get('clickTrackingParams')
1030 return cls._build_api_continuation_query(continuation, ctp)
1032 @classmethod
1033 def _extract_continuation(cls, renderer):
1034 next_continuation = cls._extract_next_continuation_data(renderer)
1035 if next_continuation:
1036 return next_continuation
1038 return traverse_obj(renderer, (
1039 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
1040 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
1041 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
1043 @classmethod
1044 def _extract_alerts(cls, data):
1045 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
1046 if not isinstance(alert_dict, dict):
1047 continue
1048 for alert in alert_dict.values():
1049 alert_type = alert.get('type')
1050 if not alert_type:
1051 continue
1052 message = cls._get_text(alert, 'text')
1053 if message:
1054 yield alert_type, message
1056 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
1057 errors, warnings = [], []
1058 for alert_type, alert_message in alerts:
1059 if alert_type.lower() == 'error' and fatal:
1060 errors.append([alert_type, alert_message])
1061 elif alert_message not in self._IGNORED_WARNINGS:
1062 warnings.append([alert_type, alert_message])
1064 for alert_type, alert_message in (warnings + errors[:-1]):
1065 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
1066 if errors:
1067 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
1069 def _extract_and_report_alerts(self, data, *args, **kwargs):
1070 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
1072 def _extract_badges(self, badge_list: list):
1074 Extract known BadgeType's from a list of badge renderers.
1075 @returns [{'type': BadgeType}]
1077 icon_type_map = {
1078 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
1079 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
1080 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
1081 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
1082 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
1083 'CHECK': BadgeType.VERIFIED,
1086 badge_style_map = {
1087 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
1088 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
1089 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
1090 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
1091 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
1094 label_map = {
1095 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
1096 'private': BadgeType.AVAILABILITY_PRIVATE,
1097 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
1098 'live': BadgeType.LIVE_NOW,
1099 'premium': BadgeType.AVAILABILITY_PREMIUM,
1100 'verified': BadgeType.VERIFIED,
1101 'official artist channel': BadgeType.VERIFIED,
1104 badges = []
1105 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
1106 badge_type = (
1107 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
1108 or badge_style_map.get(traverse_obj(badge, 'style'))
1110 if badge_type:
1111 badges.append({'type': badge_type})
1112 continue
1114 # fallback, won't work in some languages
1115 label = traverse_obj(
1116 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
1117 for match, label_badge_type in label_map.items():
1118 if match in label.lower():
1119 badges.append({'type': label_badge_type})
1120 break
1122 return badges
1124 @staticmethod
1125 def _has_badge(badges, badge_type):
1126 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
1128 @staticmethod
1129 def _get_text(data, *path_list, max_runs=None):
1130 for path in path_list or [None]:
1131 if path is None:
1132 obj = [data]
1133 else:
1134 obj = traverse_obj(data, path, default=[])
1135 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
1136 obj = [obj]
1137 for item in obj:
1138 text = try_get(item, lambda x: x['simpleText'], str)
1139 if text:
1140 return text
1141 runs = try_get(item, lambda x: x['runs'], list) or []
1142 if not runs and isinstance(item, list):
1143 runs = item
1145 runs = runs[:min(len(runs), max_runs or len(runs))]
1146 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
1147 if text:
1148 return text
1150 def _get_count(self, data, *path_list):
1151 count_text = self._get_text(data, *path_list) or ''
1152 count = parse_count(count_text)
1153 if count is None:
1154 count = str_to_int(
1155 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
1156 return count
1158 @staticmethod
1159 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
1161 Extract thumbnails from thumbnails dict
1162 @param path_list: path list to level that contains 'thumbnails' key
1164 thumbnails = []
1165 for path in path_list or [()]:
1166 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
1167 thumbnail_url = url_or_none(thumbnail.get('url'))
1168 if not thumbnail_url:
1169 continue
1170 # Sometimes youtube gives a wrong thumbnail URL. See:
1171 # https://github.com/yt-dlp/yt-dlp/issues/233
1172 # https://github.com/ytdl-org/youtube-dl/issues/28023
1173 if 'maxresdefault' in thumbnail_url:
1174 thumbnail_url = thumbnail_url.split('?')[0]
1175 thumbnails.append({
1176 'url': thumbnail_url,
1177 'height': int_or_none(thumbnail.get('height')),
1178 'width': int_or_none(thumbnail.get('width')),
1180 return thumbnails
1182 @staticmethod
1183 def extract_relative_time(relative_time_text):
1185 Extracts a relative time from string and converts to dt object
1186 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
1189 # XXX: this could be moved to a general function in utils/_utils.py
1190 # The relative time text strings are roughly the same as what
1191 # Javascript's Intl.RelativeTimeFormat function generates.
1192 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
1193 mobj = re.search(
1194 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
1195 relative_time_text)
1196 if mobj:
1197 start = mobj.group('start')
1198 if start:
1199 return datetime_from_str(start)
1200 try:
1201 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
1202 except ValueError:
1203 return None
1205 def _parse_time_text(self, text):
1206 if not text:
1207 return
1208 dt_ = self.extract_relative_time(text)
1209 timestamp = None
1210 if isinstance(dt_, dt.datetime):
1211 timestamp = calendar.timegm(dt_.timetuple())
1213 if timestamp is None:
1214 timestamp = (
1215 unified_timestamp(text) or unified_timestamp(
1216 self._search_regex(
1217 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
1218 text.lower(), 'time text', default=None)))
1220 if text and timestamp is None and self._preferred_lang in (None, 'en'):
1221 self.report_warning(
1222 f'Cannot parse localized time text "{text}"', only_once=True)
1223 return timestamp
1225 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
1226 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
1227 default_client='web'):
1228 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
1229 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
1230 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
1231 icd_rm = next(icd_retries)
1232 main_retries = iter(self.RetryManager())
1233 main_rm = next(main_retries)
1234 # Manual retry loop for multiple RetryManagers
1235 # The proper RetryManager MUST be advanced after an error
1236 # and its result MUST be checked if the manager is non fatal
1237 while True:
1238 try:
1239 response = self._call_api(
1240 ep=ep, fatal=True, headers=headers,
1241 video_id=item_id, query=query, note=note,
1242 context=self._extract_context(ytcfg, default_client),
1243 api_hostname=api_hostname, default_client=default_client)
1244 except ExtractorError as e:
1245 if not isinstance(e.cause, network_exceptions):
1246 return self._error_or_warning(e, fatal=fatal)
1247 elif not isinstance(e.cause, HTTPError):
1248 main_rm.error = e
1249 next(main_retries)
1250 continue
1252 first_bytes = e.cause.response.read(512)
1253 if not is_html(first_bytes):
1254 yt_error = try_get(
1255 self._parse_json(
1256 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
1257 lambda x: x['error']['message'], str)
1258 if yt_error:
1259 self._report_alerts([('ERROR', yt_error)], fatal=False)
1260 # Downloading page may result in intermittent 5xx HTTP error
1261 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1262 # We also want to catch all other network exceptions since errors in later pages can be troublesome
1263 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
1264 if e.cause.status not in (403, 429):
1265 main_rm.error = e
1266 next(main_retries)
1267 continue
1268 return self._error_or_warning(e, fatal=fatal)
1270 try:
1271 self._extract_and_report_alerts(response, only_once=True)
1272 except ExtractorError as e:
1273 # YouTube's servers may return errors we want to retry on in a 200 OK response
1274 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1275 if 'unknown error' in e.msg.lower():
1276 main_rm.error = e
1277 next(main_retries)
1278 continue
1279 return self._error_or_warning(e, fatal=fatal)
1280 # Youtube sometimes sends incomplete data
1281 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1282 if not traverse_obj(response, *variadic(check_get_keys)):
1283 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1284 should_retry = next(icd_retries, None)
1285 if not should_retry:
1286 return None
1287 continue
1289 return response
1291 @staticmethod
1292 def is_music_url(url):
1293 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1295 def _extract_video(self, renderer):
1296 video_id = renderer.get('videoId')
1298 reel_header_renderer = traverse_obj(renderer, (
1299 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1300 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1302 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1303 description = self._get_text(renderer, 'descriptionSnippet')
1305 duration = int_or_none(renderer.get('lengthSeconds'))
1306 if duration is None:
1307 duration = parse_duration(self._get_text(
1308 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1309 if duration is None:
1310 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1311 duration = parse_duration(self._search_regex(
1312 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1313 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1314 video_id, default=None, group='duration'))
1316 channel_id = traverse_obj(
1317 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1318 expected_type=str, get_all=False)
1319 if not channel_id:
1320 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1322 channel_id = self.ucid_or_none(channel_id)
1324 overlay_style = traverse_obj(
1325 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1326 get_all=False, expected_type=str)
1327 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1328 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1329 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1330 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1331 expected_type=str)) or ''
1332 url = f'https://www.youtube.com/watch?v={video_id}'
1333 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1334 url = f'https://www.youtube.com/shorts/{video_id}'
1336 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1337 or self._get_text(reel_header_renderer, 'timestampText') or '')
1338 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1340 live_status = (
1341 'is_upcoming' if scheduled_timestamp is not None
1342 else 'was_live' if 'streamed' in time_text.lower()
1343 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1344 else None)
1346 # videoInfo is a string like '50K views • 10 years ago'.
1347 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1348 view_count = (0 if 'no views' in view_count_text.lower()
1349 else self._get_count({'simpleText': view_count_text}))
1350 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1352 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1353 or self._get_text(reel_header_renderer, 'channelTitleText'))
1355 channel_handle = traverse_obj(renderer, (
1356 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1357 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1358 expected_type=self.handle_from_url, get_all=False)
1359 return {
1360 '_type': 'url',
1361 'ie_key': YoutubeIE.ie_key(),
1362 'id': video_id,
1363 'url': url,
1364 'title': title,
1365 'description': description,
1366 'duration': duration,
1367 'channel_id': channel_id,
1368 'channel': channel,
1369 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1370 'uploader': channel,
1371 'uploader_id': channel_handle,
1372 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1373 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1374 'timestamp': (self._parse_time_text(time_text)
1375 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1376 else None),
1377 'release_timestamp': scheduled_timestamp,
1378 'availability':
1379 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1380 else self._availability(
1381 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1382 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1383 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1384 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1385 view_count_field: view_count,
1386 'live_status': live_status,
1387 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
1391 class YoutubeIE(YoutubeBaseInfoExtractor):
1392 IE_DESC = 'YouTube'
1393 _VALID_URL = r'''(?x)^
1395 (?:https?://|//) # http(s):// or protocol-independent URL
1396 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1397 (?:www\.)?deturl\.com/www\.youtube\.com|
1398 (?:www\.)?pwnyoutube\.com|
1399 (?:www\.)?hooktube\.com|
1400 (?:www\.)?yourepeat\.com|
1401 tube\.majestyc\.net|
1402 {invidious}|
1403 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1404 (?:.*?\#/)? # handle anchor (#/) redirect urls
1405 (?: # the various things that can precede the ID:
1406 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1407 |(?: # or the v= param in all its forms
1408 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1409 (?:\?|\#!?) # the params delimiter ? or # or #!
1410 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1414 |(?:
1415 youtu\.be| # just youtu.be/xxxx
1416 vid\.plus| # or vid.plus/xxxx
1417 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1418 {invidious}
1420 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1422 )? # all until now is optional -> you can pass the naked ID
1423 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1424 (?(1).+)? # if we found the ID, everything can follow
1425 (?:\#|$)'''.format(
1426 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1428 _EMBED_REGEX = [
1429 r'''(?x)
1431 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1432 data-video-url=|
1433 <embed[^>]+?src=|
1434 embedSWF\(?:\s*|
1435 <object[^>]+data=|
1436 new\s+SWFObject\(
1438 (["\'])
1439 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1440 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1441 \1''',
1442 # https://wordpress.org/plugins/lazy-load-for-videos/
1443 r'''(?xs)
1444 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1445 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1447 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1449 _PLAYER_INFO_RE = (
1450 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1451 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1452 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1454 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1455 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1456 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1457 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1458 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1459 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1460 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1461 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1462 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1463 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1464 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1465 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1466 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1467 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1468 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1469 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1470 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1471 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1472 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1475 # 3D videos
1476 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1477 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1478 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1479 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1480 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1481 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1482 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1484 # Apple HTTP Live Streaming
1485 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1486 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1487 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1488 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1489 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1490 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1491 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1492 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1494 # DASH mp4 video
1495 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1496 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1497 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1498 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1499 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1500 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1501 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1502 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1503 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1504 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1505 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1506 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1508 # Dash mp4 audio
1509 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1510 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1511 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1512 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1513 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1514 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1515 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1517 # Dash webm
1518 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1519 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1520 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1521 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1522 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1523 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1524 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1525 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1526 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1527 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1528 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1529 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1530 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1531 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1532 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1533 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1534 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1535 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1536 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1537 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1538 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1539 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1541 # Dash webm audio
1542 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1543 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1545 # Dash webm audio with opus inside
1546 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1547 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1548 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1550 # RTMP (unnamed)
1551 '_rtmp': {'protocol': 'rtmp'},
1553 # av01 video only formats sometimes served with "unknown" codecs
1554 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1555 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1556 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1557 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1558 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1559 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1560 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1561 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1563 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1564 _DEFAULT_CLIENTS = ('ios', 'mweb')
1566 _GEO_BYPASS = False
1568 IE_NAME = 'youtube'
1569 _TESTS = [
1571 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1572 'info_dict': {
1573 'id': 'BaW_jenozKc',
1574 'ext': 'mp4',
1575 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1576 'channel': 'Philipp Hagemeister',
1577 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1578 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1579 'upload_date': '20121002',
1580 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1581 'categories': ['Science & Technology'],
1582 'tags': ['youtube-dl'],
1583 'duration': 10,
1584 'view_count': int,
1585 'like_count': int,
1586 'availability': 'public',
1587 'playable_in_embed': True,
1588 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1589 'live_status': 'not_live',
1590 'age_limit': 0,
1591 'start_time': 1,
1592 'end_time': 9,
1593 'comment_count': int,
1594 'channel_follower_count': int,
1595 'uploader': 'Philipp Hagemeister',
1596 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1597 'uploader_id': '@PhilippHagemeister',
1598 'heatmap': 'count:100',
1599 'timestamp': 1349198244,
1603 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1604 'note': 'Embed-only video (#1746)',
1605 'info_dict': {
1606 'id': 'yZIXLfi8CZQ',
1607 'ext': 'mp4',
1608 'upload_date': '20120608',
1609 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1610 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1611 'age_limit': 18,
1613 'skip': 'Private video',
1616 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1617 'note': 'Use the first video ID in the URL',
1618 'info_dict': {
1619 'id': 'BaW_jenozKc',
1620 'ext': 'mp4',
1621 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1622 'channel': 'Philipp Hagemeister',
1623 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1624 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1625 'upload_date': '20121002',
1626 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1627 'categories': ['Science & Technology'],
1628 'tags': ['youtube-dl'],
1629 'duration': 10,
1630 'view_count': int,
1631 'like_count': int,
1632 'availability': 'public',
1633 'playable_in_embed': True,
1634 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1635 'live_status': 'not_live',
1636 'age_limit': 0,
1637 'comment_count': int,
1638 'channel_follower_count': int,
1639 'uploader': 'Philipp Hagemeister',
1640 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1641 'uploader_id': '@PhilippHagemeister',
1642 'heatmap': 'count:100',
1643 'timestamp': 1349198244,
1645 'params': {
1646 'skip_download': True,
1650 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1651 'note': '256k DASH audio (format 141) via DASH manifest',
1652 'info_dict': {
1653 'id': 'a9LDPn-MO4I',
1654 'ext': 'm4a',
1655 'upload_date': '20121002',
1656 'description': '',
1657 'title': 'UHDTV TEST 8K VIDEO.mp4',
1659 'params': {
1660 'youtube_include_dash_manifest': True,
1661 'format': '141',
1663 'skip': 'format 141 not served anymore',
1665 # DASH manifest with encrypted signature
1667 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1668 'info_dict': {
1669 'id': 'IB3lcPjvWLA',
1670 'ext': 'm4a',
1671 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1672 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1673 'duration': 244,
1674 'upload_date': '20131011',
1675 'abr': 129.495,
1676 'like_count': int,
1677 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1678 'playable_in_embed': True,
1679 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1680 'view_count': int,
1681 'track': 'The Spark',
1682 'live_status': 'not_live',
1683 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1684 'channel': 'Afrojack',
1685 'tags': 'count:19',
1686 'availability': 'public',
1687 'categories': ['Music'],
1688 'age_limit': 0,
1689 'alt_title': 'The Spark',
1690 'channel_follower_count': int,
1691 'uploader': 'Afrojack',
1692 'uploader_url': 'https://www.youtube.com/@Afrojack',
1693 'uploader_id': '@Afrojack',
1695 'params': {
1696 'youtube_include_dash_manifest': True,
1697 'format': '141/bestaudio[ext=m4a]',
1700 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1702 'note': 'Embed allowed age-gate video',
1703 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1704 'info_dict': {
1705 'id': 'HtVdAasjOgU',
1706 'ext': 'mp4',
1707 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1708 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1709 'duration': 142,
1710 'upload_date': '20140605',
1711 'age_limit': 18,
1712 'categories': ['Gaming'],
1713 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1714 'availability': 'needs_auth',
1715 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1716 'like_count': int,
1717 'channel': 'The Witcher',
1718 'live_status': 'not_live',
1719 'tags': 'count:17',
1720 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1721 'playable_in_embed': True,
1722 'view_count': int,
1723 'channel_follower_count': int,
1724 'uploader': 'The Witcher',
1725 'uploader_url': 'https://www.youtube.com/@thewitcher',
1726 'uploader_id': '@thewitcher',
1727 'comment_count': int,
1728 'channel_is_verified': True,
1729 'heatmap': 'count:100',
1730 'timestamp': 1401991663,
1732 'skip': 'Age-restricted; requires authentication',
1735 'note': 'Age-gate video with embed allowed in public site',
1736 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1737 'info_dict': {
1738 'id': 'HsUATh_Nc2U',
1739 'ext': 'mp4',
1740 'title': 'Godzilla 2 (Official Video)',
1741 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1742 'upload_date': '20200408',
1743 'age_limit': 18,
1744 'availability': 'needs_auth',
1745 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1746 'channel': 'FlyingKitty',
1747 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1748 'view_count': int,
1749 'categories': ['Entertainment'],
1750 'live_status': 'not_live',
1751 'tags': ['Flyingkitty', 'godzilla 2'],
1752 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1753 'like_count': int,
1754 'duration': 177,
1755 'playable_in_embed': True,
1756 'channel_follower_count': int,
1757 'uploader': 'FlyingKitty',
1758 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1759 'uploader_id': '@FlyingKitty900',
1760 'comment_count': int,
1761 'channel_is_verified': True,
1763 'skip': 'Age-restricted; requires authentication',
1766 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1767 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1768 'info_dict': {
1769 'id': 'Tq92D6wQ1mg',
1770 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1771 'ext': 'mp4',
1772 'upload_date': '20191228',
1773 'description': 'md5:17eccca93a786d51bc67646756894066',
1774 'age_limit': 18,
1775 'like_count': int,
1776 'availability': 'needs_auth',
1777 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1778 'view_count': int,
1779 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1780 'channel': 'Projekt Melody',
1781 'live_status': 'not_live',
1782 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1783 'playable_in_embed': True,
1784 'categories': ['Entertainment'],
1785 'duration': 106,
1786 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1787 'comment_count': int,
1788 'channel_follower_count': int,
1789 'uploader': 'Projekt Melody',
1790 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1791 'uploader_id': '@ProjektMelody',
1792 'timestamp': 1577508724,
1794 'skip': 'Age-restricted; requires authentication',
1797 'note': 'Non-Agegated non-embeddable video',
1798 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1799 'info_dict': {
1800 'id': 'MeJVWBSsPAY',
1801 'ext': 'mp4',
1802 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1803 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1804 'upload_date': '20130730',
1805 'track': 'Such mich find mich',
1806 'age_limit': 0,
1807 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1808 'like_count': int,
1809 'playable_in_embed': False,
1810 'creator': 'OOMPH!',
1811 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1812 'view_count': int,
1813 'alt_title': 'Such mich find mich',
1814 'duration': 210,
1815 'channel': 'Herr Lurik',
1816 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1817 'categories': ['Music'],
1818 'availability': 'public',
1819 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1820 'live_status': 'not_live',
1821 'artist': 'OOMPH!',
1822 'channel_follower_count': int,
1823 'uploader': 'Herr Lurik',
1824 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1825 'uploader_id': '@HerrLurik',
1829 'note': 'Non-bypassable age-gated video',
1830 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1831 'only_matching': True,
1833 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1834 # YouTube Red ad is not captured for creator
1836 'url': '__2ABJjxzNo',
1837 'info_dict': {
1838 'id': '__2ABJjxzNo',
1839 'ext': 'mp4',
1840 'duration': 266,
1841 'upload_date': '20100430',
1842 'creator': 'deadmau5',
1843 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1844 'title': 'Deadmau5 - Some Chords (HD)',
1845 'alt_title': 'Some Chords',
1846 'availability': 'public',
1847 'tags': 'count:14',
1848 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1849 'view_count': int,
1850 'live_status': 'not_live',
1851 'channel': 'deadmau5',
1852 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1853 'like_count': int,
1854 'track': 'Some Chords',
1855 'artist': 'deadmau5',
1856 'playable_in_embed': True,
1857 'age_limit': 0,
1858 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1859 'categories': ['Music'],
1860 'album': 'Some Chords',
1861 'channel_follower_count': int,
1862 'uploader': 'deadmau5',
1863 'uploader_url': 'https://www.youtube.com/@deadmau5',
1864 'uploader_id': '@deadmau5',
1866 'expected_warnings': [
1867 'DASH manifest missing',
1870 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1872 'url': 'lqQg6PlCWgI',
1873 'info_dict': {
1874 'id': 'lqQg6PlCWgI',
1875 'ext': 'mp4',
1876 'duration': 6085,
1877 'upload_date': '20150827',
1878 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1879 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1880 'like_count': int,
1881 'release_timestamp': 1343767800,
1882 'playable_in_embed': True,
1883 'categories': ['Sports'],
1884 'release_date': '20120731',
1885 'channel': 'Olympics',
1886 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1887 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1888 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1889 'age_limit': 0,
1890 'availability': 'public',
1891 'live_status': 'was_live',
1892 'view_count': int,
1893 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1894 'channel_follower_count': int,
1895 'uploader': 'Olympics',
1896 'uploader_url': 'https://www.youtube.com/@Olympics',
1897 'uploader_id': '@Olympics',
1898 'channel_is_verified': True,
1899 'timestamp': 1440707674,
1901 'params': {
1902 'skip_download': 'requires avconv',
1905 # Non-square pixels
1907 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1908 'info_dict': {
1909 'id': '_b-2C3KPAM0',
1910 'ext': 'mp4',
1911 'stretched_ratio': 16 / 9.,
1912 'duration': 85,
1913 'upload_date': '20110310',
1914 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1915 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1916 'playable_in_embed': True,
1917 'channel': '孫ᄋᄅ',
1918 'age_limit': 0,
1919 'tags': 'count:11',
1920 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1921 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1922 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1923 'view_count': int,
1924 'categories': ['People & Blogs'],
1925 'like_count': int,
1926 'live_status': 'not_live',
1927 'availability': 'unlisted',
1928 'comment_count': int,
1929 'channel_follower_count': int,
1930 'uploader': '孫ᄋᄅ',
1931 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1932 'uploader_id': '@AllenMeow',
1933 'timestamp': 1299776999,
1936 # url_encoded_fmt_stream_map is empty string
1938 'url': 'qEJwOuvDf7I',
1939 'info_dict': {
1940 'id': 'qEJwOuvDf7I',
1941 'ext': 'webm',
1942 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1943 'description': '',
1944 'upload_date': '20150404',
1946 'params': {
1947 'skip_download': 'requires avconv',
1949 'skip': 'This live event has ended.',
1951 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1953 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1954 'info_dict': {
1955 'id': 'FIl7x6_3R5Y',
1956 'ext': 'webm',
1957 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1958 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1959 'duration': 220,
1960 'upload_date': '20150625',
1961 'formats': 'mincount:31',
1963 'skip': 'not actual anymore',
1965 # DASH manifest with segment_list
1967 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1968 'md5': '8ce563a1d667b599d21064e982ab9e31',
1969 'info_dict': {
1970 'id': 'CsmdDsKjzN8',
1971 'ext': 'mp4',
1972 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1973 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1974 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1976 'params': {
1977 'youtube_include_dash_manifest': True,
1978 'format': '135', # bestvideo
1980 'skip': 'This live event has ended.',
1983 # Multifeed videos (multiple cameras), URL can be of any Camera
1984 # TODO: fix multifeed titles
1985 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1986 'info_dict': {
1987 'id': 'zaPI8MvL8pg',
1988 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1989 'description': 'md5:563ccbc698b39298481ca3c571169519',
1991 'playlist': [{
1992 'info_dict': {
1993 'id': 'j5yGuxZ8lLU',
1994 'ext': 'mp4',
1995 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1996 'description': 'md5:563ccbc698b39298481ca3c571169519',
1997 'duration': 10120,
1998 'channel_follower_count': int,
1999 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
2000 'availability': 'public',
2001 'playable_in_embed': True,
2002 'upload_date': '20131105',
2003 'categories': ['Gaming'],
2004 'live_status': 'was_live',
2005 'tags': 'count:24',
2006 'release_timestamp': 1383701910,
2007 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
2008 'comment_count': int,
2009 'age_limit': 0,
2010 'like_count': int,
2011 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
2012 'channel': 'WiiLikeToPlay',
2013 'view_count': int,
2014 'release_date': '20131106',
2015 'uploader': 'WiiLikeToPlay',
2016 'uploader_id': '@WLTP',
2017 'uploader_url': 'https://www.youtube.com/@WLTP',
2019 }, {
2020 'info_dict': {
2021 'id': 'zaPI8MvL8pg',
2022 'ext': 'mp4',
2023 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
2024 'availability': 'public',
2025 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
2026 'channel': 'WiiLikeToPlay',
2027 'channel_follower_count': int,
2028 'description': 'md5:563ccbc698b39298481ca3c571169519',
2029 'duration': 10108,
2030 'age_limit': 0,
2031 'like_count': int,
2032 'tags': 'count:24',
2033 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
2034 'release_timestamp': 1383701915,
2035 'comment_count': int,
2036 'upload_date': '20131105',
2037 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
2038 'release_date': '20131106',
2039 'playable_in_embed': True,
2040 'live_status': 'was_live',
2041 'categories': ['Gaming'],
2042 'view_count': int,
2043 'uploader': 'WiiLikeToPlay',
2044 'uploader_id': '@WLTP',
2045 'uploader_url': 'https://www.youtube.com/@WLTP',
2047 }, {
2048 'info_dict': {
2049 'id': 'R7r3vfO7Hao',
2050 'ext': 'mp4',
2051 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
2052 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
2053 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
2054 'like_count': int,
2055 'availability': 'public',
2056 'playable_in_embed': True,
2057 'upload_date': '20131105',
2058 'description': 'md5:563ccbc698b39298481ca3c571169519',
2059 'channel_follower_count': int,
2060 'tags': 'count:24',
2061 'release_date': '20131106',
2062 'comment_count': int,
2063 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
2064 'channel': 'WiiLikeToPlay',
2065 'categories': ['Gaming'],
2066 'release_timestamp': 1383701914,
2067 'live_status': 'was_live',
2068 'age_limit': 0,
2069 'duration': 10128,
2070 'view_count': int,
2071 'uploader': 'WiiLikeToPlay',
2072 'uploader_id': '@WLTP',
2073 'uploader_url': 'https://www.youtube.com/@WLTP',
2076 'params': {'skip_download': True},
2077 'skip': 'Not multifeed anymore',
2080 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
2081 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
2082 'info_dict': {
2083 'id': 'gVfLd0zydlo',
2084 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
2086 'playlist_count': 2,
2087 'skip': 'Not multifeed anymore',
2090 'url': 'https://vid.plus/FlRa-iH7PGw',
2091 'only_matching': True,
2094 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
2095 'only_matching': True,
2098 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2099 # Also tests cut-off URL expansion in video description (see
2100 # https://github.com/ytdl-org/youtube-dl/issues/1892,
2101 # https://github.com/ytdl-org/youtube-dl/issues/8164)
2102 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
2103 'info_dict': {
2104 'id': 'lsguqyKfVQg',
2105 'ext': 'mp4',
2106 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
2107 'alt_title': 'Dark Walk',
2108 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
2109 'duration': 133,
2110 'upload_date': '20151119',
2111 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
2112 'track': 'Dark Walk',
2113 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
2114 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
2115 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
2116 'categories': ['Film & Animation'],
2117 'view_count': int,
2118 'live_status': 'not_live',
2119 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
2120 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
2121 'tags': 'count:13',
2122 'availability': 'public',
2123 'channel': 'IronSoulElf',
2124 'playable_in_embed': True,
2125 'like_count': int,
2126 'age_limit': 0,
2127 'channel_follower_count': int,
2129 'params': {
2130 'skip_download': True,
2134 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
2135 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
2136 'only_matching': True,
2139 # Video with yt:stretch=17:0
2140 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
2141 'info_dict': {
2142 'id': 'Q39EVAstoRM',
2143 'ext': 'mp4',
2144 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
2145 'description': 'md5:ee18a25c350637c8faff806845bddee9',
2146 'upload_date': '20151107',
2148 'params': {
2149 'skip_download': True,
2151 'skip': 'This video does not exist.',
2154 # Video with incomplete 'yt:stretch=16:'
2155 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
2156 'only_matching': True,
2159 # Video licensed under Creative Commons
2160 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
2161 'info_dict': {
2162 'id': 'M4gD1WSo5mA',
2163 'ext': 'mp4',
2164 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
2165 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
2166 'duration': 721,
2167 'upload_date': '20150128',
2168 'license': 'Creative Commons Attribution license (reuse allowed)',
2169 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
2170 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
2171 'like_count': int,
2172 'age_limit': 0,
2173 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
2174 'channel': 'The Berkman Klein Center for Internet & Society',
2175 'availability': 'public',
2176 'view_count': int,
2177 'categories': ['Education'],
2178 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
2179 'live_status': 'not_live',
2180 'playable_in_embed': True,
2181 'channel_follower_count': int,
2182 'chapters': list,
2183 'uploader': 'The Berkman Klein Center for Internet & Society',
2184 'uploader_id': '@BKCHarvard',
2185 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
2186 'timestamp': 1422422076,
2188 'params': {
2189 'skip_download': True,
2193 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
2194 'info_dict': {
2195 'id': 'eQcmzGIKrzg',
2196 'ext': 'mp4',
2197 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
2198 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
2199 'duration': 4060,
2200 'upload_date': '20151120',
2201 'license': 'Creative Commons Attribution license (reuse allowed)',
2202 'playable_in_embed': True,
2203 'tags': 'count:12',
2204 'like_count': int,
2205 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
2206 'age_limit': 0,
2207 'availability': 'public',
2208 'categories': ['News & Politics'],
2209 'channel': 'Bernie Sanders',
2210 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
2211 'view_count': int,
2212 'live_status': 'not_live',
2213 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
2214 'comment_count': int,
2215 'channel_follower_count': int,
2216 'chapters': list,
2217 'uploader': 'Bernie Sanders',
2218 'uploader_url': 'https://www.youtube.com/@BernieSanders',
2219 'uploader_id': '@BernieSanders',
2220 'channel_is_verified': True,
2221 'heatmap': 'count:100',
2222 'timestamp': 1447987198,
2224 'params': {
2225 'skip_download': True,
2229 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
2230 'only_matching': True,
2233 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2234 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
2235 'only_matching': True,
2238 # Rental video preview
2239 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
2240 'info_dict': {
2241 'id': 'uGpuVWrhIzE',
2242 'ext': 'mp4',
2243 'title': 'Piku - Trailer',
2244 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
2245 'upload_date': '20150811',
2246 'license': 'Standard YouTube License',
2248 'params': {
2249 'skip_download': True,
2251 'skip': 'This video is not available.',
2254 # YouTube Red video with episode data
2255 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
2256 'info_dict': {
2257 'id': 'iqKdEhx-dD4',
2258 'ext': 'mp4',
2259 'title': 'Isolation - Mind Field (Ep 1)',
2260 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
2261 'duration': 2085,
2262 'upload_date': '20170118',
2263 'series': 'Mind Field',
2264 'season_number': 1,
2265 'episode_number': 1,
2266 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
2267 'tags': 'count:12',
2268 'view_count': int,
2269 'availability': 'public',
2270 'age_limit': 0,
2271 'channel': 'Vsauce',
2272 'episode': 'Episode 1',
2273 'categories': ['Entertainment'],
2274 'season': 'Season 1',
2275 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2276 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2277 'like_count': int,
2278 'playable_in_embed': True,
2279 'live_status': 'not_live',
2280 'channel_follower_count': int,
2281 'uploader': 'Vsauce',
2282 'uploader_url': 'https://www.youtube.com/@Vsauce',
2283 'uploader_id': '@Vsauce',
2284 'comment_count': int,
2285 'channel_is_verified': True,
2286 'timestamp': 1484761047,
2288 'params': {
2289 'skip_download': True,
2291 'expected_warnings': [
2292 'Skipping DASH manifest',
2296 # The following content has been identified by the YouTube community
2297 # as inappropriate or offensive to some audiences.
2298 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2299 'info_dict': {
2300 'id': '6SJNVb0GnPI',
2301 'ext': 'mp4',
2302 'title': 'Race Differences in Intelligence',
2303 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2304 'duration': 965,
2305 'upload_date': '20140124',
2307 'params': {
2308 'skip_download': True,
2310 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2313 # itag 212
2314 'url': '1t24XAntNCY',
2315 'only_matching': True,
2318 # geo restricted to JP
2319 'url': 'sJL6WA-aGkQ',
2320 'only_matching': True,
2323 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2324 'only_matching': True,
2327 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2328 'only_matching': True,
2331 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2332 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2333 'only_matching': True,
2336 # DRM protected
2337 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2338 'only_matching': True,
2341 # Video with unsupported adaptive stream type formats
2342 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2343 'info_dict': {
2344 'id': 'Z4Vy8R84T1U',
2345 'ext': 'mp4',
2346 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2347 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2348 'duration': 433,
2349 'upload_date': '20130923',
2350 'formats': 'maxcount:10',
2352 'params': {
2353 'skip_download': True,
2354 'youtube_include_dash_manifest': False,
2356 'skip': 'not actual anymore',
2359 # Youtube Music Auto-generated description
2360 # TODO: fix metadata extraction
2361 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2362 'info_dict': {
2363 'id': 'MgNrAu2pzNs',
2364 'ext': 'mp4',
2365 'title': 'Voyeur Girl',
2366 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2367 'upload_date': '20190312',
2368 'artists': ['Stephen'],
2369 'creators': ['Stephen'],
2370 'track': 'Voyeur Girl',
2371 'album': 'it\'s too much love to know my dear',
2372 'release_date': '20190313',
2373 'alt_title': 'Voyeur Girl',
2374 'view_count': int,
2375 'playable_in_embed': True,
2376 'like_count': int,
2377 'categories': ['Music'],
2378 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2379 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2380 'uploader': 'Stephen',
2381 'availability': 'public',
2382 'duration': 169,
2383 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2384 'age_limit': 0,
2385 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2386 'tags': 'count:11',
2387 'live_status': 'not_live',
2388 'channel_follower_count': int,
2390 'params': {
2391 'skip_download': True,
2395 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2396 'only_matching': True,
2399 # invalid -> valid video id redirection
2400 'url': 'DJztXj2GPfl',
2401 'info_dict': {
2402 'id': 'DJztXj2GPfk',
2403 'ext': 'mp4',
2404 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2405 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2406 'upload_date': '20090125',
2407 'artist': 'Panjabi MC',
2408 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2409 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2411 'params': {
2412 'skip_download': True,
2414 'skip': 'Video unavailable',
2417 # empty description results in an empty string
2418 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2419 'info_dict': {
2420 'id': 'x41yOUIvK2k',
2421 'ext': 'mp4',
2422 'title': 'IMG 3456',
2423 'description': '',
2424 'upload_date': '20170613',
2425 'view_count': int,
2426 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2427 'like_count': int,
2428 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2429 'tags': [],
2430 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2431 'availability': 'public',
2432 'age_limit': 0,
2433 'categories': ['Pets & Animals'],
2434 'duration': 7,
2435 'playable_in_embed': True,
2436 'live_status': 'not_live',
2437 'channel': 'l\'Or Vert asbl',
2438 'channel_follower_count': int,
2439 'uploader': 'l\'Or Vert asbl',
2440 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2441 'uploader_id': '@ElevageOrVert',
2442 'timestamp': 1497343210,
2444 'params': {
2445 'skip_download': True,
2449 # with '};' inside yt initial data (see [1])
2450 # see [2] for an example with '};' inside ytInitialPlayerResponse
2451 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2452 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2453 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2454 'info_dict': {
2455 'id': 'CHqg6qOn4no',
2456 'ext': 'mp4',
2457 'title': 'Part 77 Sort a list of simple types in c#',
2458 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2459 'upload_date': '20130831',
2460 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2461 'like_count': int,
2462 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2463 'live_status': 'not_live',
2464 'categories': ['Education'],
2465 'availability': 'public',
2466 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2467 'tags': 'count:12',
2468 'playable_in_embed': True,
2469 'age_limit': 0,
2470 'view_count': int,
2471 'duration': 522,
2472 'channel': 'kudvenkat',
2473 'comment_count': int,
2474 'channel_follower_count': int,
2475 'chapters': list,
2476 'uploader': 'kudvenkat',
2477 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2478 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2479 'channel_is_verified': True,
2480 'heatmap': 'count:100',
2481 'timestamp': 1377976349,
2483 'params': {
2484 'skip_download': True,
2488 # another example of '};' in ytInitialData
2489 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2490 'only_matching': True,
2493 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2494 'only_matching': True,
2497 # https://github.com/ytdl-org/youtube-dl/pull/28094
2498 'url': 'OtqTfy26tG0',
2499 'info_dict': {
2500 'id': 'OtqTfy26tG0',
2501 'ext': 'mp4',
2502 'title': 'Burn Out',
2503 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2504 'upload_date': '20141120',
2505 'artist': 'The Cinematic Orchestra',
2506 'track': 'Burn Out',
2507 'album': 'Every Day',
2508 'like_count': int,
2509 'live_status': 'not_live',
2510 'alt_title': 'Burn Out',
2511 'duration': 614,
2512 'age_limit': 0,
2513 'view_count': int,
2514 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2515 'creator': 'The Cinematic Orchestra',
2516 'channel': 'The Cinematic Orchestra',
2517 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2518 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2519 'availability': 'public',
2520 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2521 'categories': ['Music'],
2522 'playable_in_embed': True,
2523 'channel_follower_count': int,
2524 'uploader': 'The Cinematic Orchestra',
2525 'comment_count': int,
2527 'params': {
2528 'skip_download': True,
2532 # controversial video, only works with bpctr when authenticated with cookies
2533 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2534 'only_matching': True,
2537 # controversial video, requires bpctr/contentCheckOk
2538 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2539 'info_dict': {
2540 'id': 'SZJvDhaSDnc',
2541 'ext': 'mp4',
2542 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2543 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2544 'upload_date': '20140716',
2545 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2546 'duration': 170,
2547 'categories': ['News & Politics'],
2548 'view_count': int,
2549 'channel': 'CBS Mornings',
2550 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2551 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2552 'age_limit': 18,
2553 'availability': 'needs_auth',
2554 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2555 'like_count': int,
2556 'live_status': 'not_live',
2557 'playable_in_embed': True,
2558 'channel_follower_count': int,
2559 'uploader': 'CBS Mornings',
2560 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2561 'uploader_id': '@CBSMornings',
2562 'comment_count': int,
2563 'channel_is_verified': True,
2564 'timestamp': 1405513526,
2566 'skip': 'Age-restricted; requires authentication',
2569 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2570 'url': 'cBvYw8_A0vQ',
2571 'info_dict': {
2572 'id': 'cBvYw8_A0vQ',
2573 'ext': 'mp4',
2574 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2575 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2576 'upload_date': '20201120',
2577 'duration': 1456,
2578 'categories': ['Travel & Events'],
2579 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2580 'view_count': int,
2581 'channel': 'Walk around Japan',
2582 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2583 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2584 'age_limit': 0,
2585 'availability': 'public',
2586 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2587 'live_status': 'not_live',
2588 'playable_in_embed': True,
2589 'channel_follower_count': int,
2590 'uploader': 'Walk around Japan',
2591 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2592 'uploader_id': '@walkaroundjapan7124',
2593 'timestamp': 1605884416,
2595 'params': {
2596 'skip_download': True,
2598 }, {
2599 # Has multiple audio streams
2600 'url': 'WaOKSUlf4TM',
2601 'only_matching': True,
2602 }, {
2603 # Requires Premium: has format 141 when requested using YTM url
2604 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2605 'only_matching': True,
2606 }, {
2607 # multiple subtitles with same lang_code
2608 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2609 'only_matching': True,
2610 }, {
2611 # Force use android client fallback
2612 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2613 'info_dict': {
2614 'id': 'YOelRv7fMxY',
2615 'title': 'DIGGING A SECRET TUNNEL Part 1',
2616 'ext': '3gp',
2617 'upload_date': '20210624',
2618 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2619 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2620 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2621 'duration': 596,
2622 'categories': ['Entertainment'],
2623 'view_count': int,
2624 'channel': 'colinfurze',
2625 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2626 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2627 'age_limit': 0,
2628 'availability': 'public',
2629 'like_count': int,
2630 'live_status': 'not_live',
2631 'playable_in_embed': True,
2632 'channel_follower_count': int,
2633 'chapters': list,
2634 'uploader': 'colinfurze',
2635 'uploader_url': 'https://www.youtube.com/@colinfurze',
2636 'uploader_id': '@colinfurze',
2637 'comment_count': int,
2638 'channel_is_verified': True,
2639 'heatmap': 'count:100',
2641 'params': {
2642 'format': '17', # 3gp format available on android
2643 'extractor_args': {'youtube': {'player_client': ['android']}},
2645 'skip': 'android client broken',
2648 # Skip download of additional client configs (remix client config in this case)
2649 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2650 'only_matching': True,
2651 'params': {
2652 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2654 }, {
2655 # shorts
2656 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2657 'only_matching': True,
2658 }, {
2659 'note': 'Storyboards',
2660 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2661 'info_dict': {
2662 'id': '5KLPxDtMqe8',
2663 'ext': 'mhtml',
2664 'format_id': 'sb0',
2665 'title': 'Your Brain is Plastic',
2666 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2667 'upload_date': '20140324',
2668 'like_count': int,
2669 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2670 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2671 'view_count': int,
2672 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2673 'playable_in_embed': True,
2674 'tags': 'count:12',
2675 'availability': 'public',
2676 'channel': 'SciShow',
2677 'live_status': 'not_live',
2678 'duration': 248,
2679 'categories': ['Education'],
2680 'age_limit': 0,
2681 'channel_follower_count': int,
2682 'chapters': list,
2683 'uploader': 'SciShow',
2684 'uploader_url': 'https://www.youtube.com/@SciShow',
2685 'uploader_id': '@SciShow',
2686 'comment_count': int,
2687 'channel_is_verified': True,
2688 'heatmap': 'count:100',
2689 'timestamp': 1395685455,
2690 }, 'params': {'format': 'mhtml', 'skip_download': True},
2691 }, {
2692 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2693 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2694 'info_dict': {
2695 'id': '2NUZ8W2llS4',
2696 'ext': 'mp4',
2697 'title': 'The NP that test your phone performance 🙂',
2698 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2699 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2700 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2701 'duration': 21,
2702 'view_count': int,
2703 'age_limit': 0,
2704 'categories': ['Gaming'],
2705 'tags': 'count:23',
2706 'playable_in_embed': True,
2707 'live_status': 'not_live',
2708 'upload_date': '20220103',
2709 'like_count': int,
2710 'availability': 'public',
2711 'channel': 'Leon Nguyen',
2712 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2713 'comment_count': int,
2714 'channel_follower_count': int,
2715 'uploader': 'Leon Nguyen',
2716 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2717 'uploader_id': '@LeonNguyen',
2718 'heatmap': 'count:100',
2719 'timestamp': 1641170939,
2721 }, {
2722 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2723 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2724 'info_dict': {
2725 'id': 'mzZzzBU6lrM',
2726 'ext': 'mp4',
2727 'title': 'I Met GeorgeNotFound In Real Life...',
2728 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2729 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2730 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2731 'duration': 955,
2732 'view_count': int,
2733 'age_limit': 0,
2734 'categories': ['Entertainment'],
2735 'tags': 'count:26',
2736 'playable_in_embed': True,
2737 'live_status': 'not_live',
2738 'release_timestamp': 1641172509,
2739 'release_date': '20220103',
2740 'upload_date': '20220103',
2741 'like_count': int,
2742 'availability': 'public',
2743 'channel': 'Quackity',
2744 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2745 'channel_follower_count': int,
2746 'uploader': 'Quackity',
2747 'uploader_id': '@Quackity',
2748 'uploader_url': 'https://www.youtube.com/@Quackity',
2749 'comment_count': int,
2750 'channel_is_verified': True,
2751 'heatmap': 'count:100',
2752 'timestamp': 1641172509,
2755 { # continuous livestream.
2756 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2757 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2758 'info_dict': {
2759 'id': 'jfKfPfyJRdk',
2760 'ext': 'mp4',
2761 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2762 'like_count': int,
2763 'uploader': 'Lofi Girl',
2764 'categories': ['Music'],
2765 'concurrent_view_count': int,
2766 'playable_in_embed': True,
2767 'timestamp': 1657627949,
2768 'release_date': '20220712',
2769 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2770 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2771 'age_limit': 0,
2772 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2773 'release_timestamp': 1657641570,
2774 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2775 'channel_follower_count': int,
2776 'channel_is_verified': True,
2777 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2778 'view_count': int,
2779 'live_status': 'is_live',
2780 'tags': 'count:32',
2781 'channel': 'Lofi Girl',
2782 'availability': 'public',
2783 'upload_date': '20220712',
2784 'uploader_id': '@LofiGirl',
2786 'params': {'skip_download': True},
2787 }, {
2788 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2789 'info_dict': {
2790 'id': 'tjjjtzRLHvA',
2791 'ext': 'mp4',
2792 'title': 'ハッシュタグ無し };if window.ytcsi',
2793 'upload_date': '20220323',
2794 'like_count': int,
2795 'availability': 'unlisted',
2796 'channel': 'Lesmiscore',
2797 'thumbnail': r're:^https?://.*\.jpg',
2798 'age_limit': 0,
2799 'categories': ['Music'],
2800 'view_count': int,
2801 'description': '',
2802 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2803 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2804 'live_status': 'not_live',
2805 'playable_in_embed': True,
2806 'channel_follower_count': int,
2807 'duration': 6,
2808 'tags': [],
2809 'uploader_id': '@lesmiscore',
2810 'uploader': 'Lesmiscore',
2811 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2812 'timestamp': 1648005313,
2814 }, {
2815 # Prefer primary title+description language metadata by default
2816 # Do not prefer translated description if primary is empty
2817 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2818 'info_dict': {
2819 'id': 'el3E4MbxRqQ',
2820 'ext': 'mp4',
2821 'title': 'dlp test video 2 - primary sv no desc',
2822 'description': '',
2823 'channel': 'cole-dlp-test-acc',
2824 'tags': [],
2825 'view_count': int,
2826 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2827 'like_count': int,
2828 'playable_in_embed': True,
2829 'availability': 'unlisted',
2830 'thumbnail': r're:^https?://.*\.jpg',
2831 'age_limit': 0,
2832 'duration': 5,
2833 'live_status': 'not_live',
2834 'upload_date': '20220908',
2835 'categories': ['People & Blogs'],
2836 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2837 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2838 'uploader_id': '@coletdjnz',
2839 'uploader': 'cole-dlp-test-acc',
2840 'timestamp': 1662677394,
2842 'params': {'skip_download': True},
2843 }, {
2844 # Extractor argument: prefer translated title+description
2845 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2846 'info_dict': {
2847 'id': 'gHKT4uU8Zng',
2848 'ext': 'mp4',
2849 'channel': 'cole-dlp-test-acc',
2850 'tags': [],
2851 'duration': 5,
2852 'live_status': 'not_live',
2853 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2854 'upload_date': '20220729',
2855 'view_count': int,
2856 'categories': ['People & Blogs'],
2857 'thumbnail': r're:^https?://.*\.jpg',
2858 'title': 'dlp test video title translated (fr)',
2859 'availability': 'public',
2860 'age_limit': 0,
2861 'description': 'dlp test video description translated (fr)',
2862 'playable_in_embed': True,
2863 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2864 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2865 'uploader_id': '@coletdjnz',
2866 'uploader': 'cole-dlp-test-acc',
2867 'timestamp': 1659073275,
2868 'like_count': int,
2870 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2871 'expected_warnings': [r'Preferring "fr" translated fields'],
2872 }, {
2873 'note': '6 channel audio',
2874 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2875 'only_matching': True,
2876 }, {
2877 'note': 'Multiple HLS formats with same itag',
2878 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2879 'info_dict': {
2880 'id': 'kX3nB4PpJko',
2881 'ext': 'mp4',
2882 'categories': ['Entertainment'],
2883 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2884 'live_status': 'not_live',
2885 'duration': 937,
2886 'channel_follower_count': int,
2887 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2888 'title': 'Last To Take Hand Off Jet, Keeps It!',
2889 'channel': 'MrBeast',
2890 'playable_in_embed': True,
2891 'view_count': int,
2892 'upload_date': '20221112',
2893 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2894 'age_limit': 0,
2895 'availability': 'public',
2896 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2897 'like_count': int,
2898 'tags': [],
2899 'uploader': 'MrBeast',
2900 'uploader_url': 'https://www.youtube.com/@MrBeast',
2901 'uploader_id': '@MrBeast',
2902 'comment_count': int,
2903 'channel_is_verified': True,
2904 'heatmap': 'count:100',
2906 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2907 }, {
2908 'note': 'Audio formats with Dynamic Range Compression',
2909 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2910 'info_dict': {
2911 'id': 'Tq92D6wQ1mg',
2912 'ext': 'webm',
2913 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2914 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2915 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2916 'channel_follower_count': int,
2917 'description': 'md5:17eccca93a786d51bc67646756894066',
2918 'upload_date': '20191228',
2919 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2920 'playable_in_embed': True,
2921 'like_count': int,
2922 'categories': ['Entertainment'],
2923 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2924 'age_limit': 18,
2925 'channel': 'Projekt Melody',
2926 'view_count': int,
2927 'availability': 'needs_auth',
2928 'comment_count': int,
2929 'live_status': 'not_live',
2930 'duration': 106,
2931 'uploader': 'Projekt Melody',
2932 'uploader_id': '@ProjektMelody',
2933 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2934 'timestamp': 1577508724,
2936 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2937 'skip': 'Age-restricted; requires authentication',
2940 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2941 'info_dict': {
2942 'id': 'qVv6vCqciTM',
2943 'ext': 'mp4',
2944 'age_limit': 0,
2945 'comment_count': int,
2946 'chapters': 'count:13',
2947 'upload_date': '20221223',
2948 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2949 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2950 'like_count': int,
2951 'release_date': '20221223',
2952 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2953 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2954 'view_count': int,
2955 'playable_in_embed': True,
2956 'duration': 4438,
2957 'availability': 'public',
2958 'channel_follower_count': int,
2959 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2960 'categories': ['Entertainment'],
2961 'live_status': 'was_live',
2962 'release_timestamp': 1671793345,
2963 'channel': 'さなちゃんねる',
2964 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2965 'uploader': 'さなちゃんねる',
2966 'uploader_url': 'https://www.youtube.com/@sana_natori',
2967 'uploader_id': '@sana_natori',
2968 'channel_is_verified': True,
2969 'heatmap': 'count:100',
2970 'timestamp': 1671798112,
2974 # Fallbacks when webpage and web client is unavailable
2975 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2976 'info_dict': {
2977 'id': 'wSSmNUl9Snw',
2978 'ext': 'mp4',
2979 # 'categories': ['Science & Technology'],
2980 'view_count': int,
2981 'chapters': 'count:2',
2982 'channel': 'Scott Manley',
2983 'like_count': int,
2984 'age_limit': 0,
2985 # 'availability': 'public',
2986 'channel_follower_count': int,
2987 'live_status': 'not_live',
2988 'upload_date': '20170831',
2989 'duration': 682,
2990 'tags': 'count:8',
2991 'uploader_url': 'https://www.youtube.com/@scottmanley',
2992 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2993 'uploader': 'Scott Manley',
2994 'uploader_id': '@scottmanley',
2995 'title': 'The Computer Hack That Saved Apollo 14',
2996 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2997 'thumbnail': r're:^https?://.*\.webp',
2998 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2999 'playable_in_embed': True,
3000 'comment_count': int,
3001 'channel_is_verified': True,
3002 'heatmap': 'count:100',
3004 'params': {
3005 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
3010 _WEBPAGE_TESTS = [
3011 # YouTube <object> embed
3013 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
3014 'md5': '873c81d308b979f0e23ee7e620b312a3',
3015 'info_dict': {
3016 'id': 'msN87y-iEx0',
3017 'ext': 'mp4',
3018 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
3019 'upload_date': '20080526',
3020 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
3021 'age_limit': 0,
3022 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
3023 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
3024 'playable_in_embed': True,
3025 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
3026 'like_count': int,
3027 'comment_count': int,
3028 'channel': 'Christopher Sykes',
3029 'live_status': 'not_live',
3030 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
3031 'availability': 'public',
3032 'duration': 195,
3033 'view_count': int,
3034 'categories': ['Science & Technology'],
3035 'channel_follower_count': int,
3036 'uploader': 'Christopher Sykes',
3037 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
3038 'uploader_id': '@ChristopherSykesDocumentaries',
3039 'heatmap': 'count:100',
3040 'timestamp': 1211825920,
3042 'params': {
3043 'skip_download': True,
3048 @classmethod
3049 def suitable(cls, url):
3050 from ..utils import parse_qs
3052 qs = parse_qs(url)
3053 if qs.get('list', [None])[0]:
3054 return False
3055 return super().suitable(url)
3057 def __init__(self, *args, **kwargs):
3058 super().__init__(*args, **kwargs)
3059 self._code_cache = {}
3060 self._player_cache = {}
3062 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
3063 lock = threading.Lock()
3064 start_time = time.time()
3065 formats = [f for f in formats if f.get('is_from_start')]
3067 def refetch_manifest(format_id, delay):
3068 nonlocal formats, start_time, is_live
3069 if time.time() <= start_time + delay:
3070 return
3072 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
3073 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
3074 microformats = traverse_obj(
3075 prs, (..., 'microformat', 'playerMicroformatRenderer'),
3076 expected_type=dict)
3077 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
3078 is_live = live_status == 'is_live'
3079 start_time = time.time()
3081 def mpd_feed(format_id, delay):
3083 @returns (manifest_url, manifest_stream_number, is_live) or None
3085 for retry in self.RetryManager(fatal=False):
3086 with lock:
3087 refetch_manifest(format_id, delay)
3089 f = next((f for f in formats if f['format_id'] == format_id), None)
3090 if not f:
3091 if not is_live:
3092 retry.error = f'{video_id}: Video is no longer live'
3093 else:
3094 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
3095 continue
3096 return f['manifest_url'], f['manifest_stream_number'], is_live
3097 return None
3099 for f in formats:
3100 f['is_live'] = is_live
3101 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
3102 live_start_time, mpd_feed, not is_live and f.copy())
3103 if is_live:
3104 f['fragments'] = gen
3105 f['protocol'] = 'http_dash_segments_generator'
3106 else:
3107 f['fragments'] = LazyList(gen({}))
3108 del f['is_from_start']
3110 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
3111 FETCH_SPAN, MAX_DURATION = 5, 432000
3113 mpd_url, stream_number, is_live = None, None, True
3115 begin_index = 0
3116 download_start_time = ctx.get('start') or time.time()
3118 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
3119 if lack_early_segments:
3120 self.report_warning(bug_reports_message(
3121 'Starting download from the last 120 hours of the live stream since '
3122 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
3123 lack_early_segments = True
3125 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
3126 fragments, fragment_base_url = None, None
3128 def _extract_sequence_from_mpd(refresh_sequence, immediate):
3129 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
3130 # Obtain from MPD's maximum seq value
3131 old_mpd_url = mpd_url
3132 last_error = ctx.pop('last_error', None)
3133 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
3134 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
3135 or (mpd_url, stream_number, False))
3136 if not refresh_sequence:
3137 if expire_fast and not is_live:
3138 return False, last_seq
3139 elif old_mpd_url == mpd_url:
3140 return True, last_seq
3141 if manifestless_orig_fmt:
3142 fmt_info = manifestless_orig_fmt
3143 else:
3144 try:
3145 fmts, _ = self._extract_mpd_formats_and_subtitles(
3146 mpd_url, None, note=False, errnote=False, fatal=False)
3147 except ExtractorError:
3148 fmts = None
3149 if not fmts:
3150 no_fragment_score += 2
3151 return False, last_seq
3152 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
3153 fragments = fmt_info['fragments']
3154 fragment_base_url = fmt_info['fragment_base_url']
3155 assert fragment_base_url
3157 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
3158 return True, _last_seq
3160 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
3161 while is_live:
3162 fetch_time = time.time()
3163 if no_fragment_score > 30:
3164 return
3165 if last_segment_url:
3166 # Obtain from "X-Head-Seqnum" header value from each segment
3167 try:
3168 urlh = self._request_webpage(
3169 last_segment_url, None, note=False, errnote=False, fatal=False)
3170 except ExtractorError:
3171 urlh = None
3172 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
3173 if last_seq is None:
3174 no_fragment_score += 2
3175 last_segment_url = None
3176 continue
3177 else:
3178 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
3179 no_fragment_score += 2
3180 if not should_continue:
3181 continue
3183 if known_idx > last_seq:
3184 last_segment_url = None
3185 continue
3187 last_seq += 1
3189 if begin_index < 0 and known_idx < 0:
3190 # skip from the start when it's negative value
3191 known_idx = last_seq + begin_index
3192 if lack_early_segments:
3193 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
3194 try:
3195 for idx in range(known_idx, last_seq):
3196 # do not update sequence here or you'll get skipped some part of it
3197 should_continue, _ = _extract_sequence_from_mpd(False, False)
3198 if not should_continue:
3199 known_idx = idx - 1
3200 raise ExtractorError('breaking out of outer loop')
3201 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
3202 yield {
3203 'url': last_segment_url,
3204 'fragment_count': last_seq,
3206 if known_idx == last_seq:
3207 no_fragment_score += 5
3208 else:
3209 no_fragment_score = 0
3210 known_idx = last_seq
3211 except ExtractorError:
3212 continue
3214 if manifestless_orig_fmt:
3215 # Stop at the first iteration if running for post-live manifestless;
3216 # fragment count no longer increase since it starts
3217 break
3219 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
3221 def _extract_player_url(self, *ytcfgs, webpage=None):
3222 player_url = traverse_obj(
3223 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
3224 get_all=False, expected_type=str)
3225 if not player_url:
3226 return
3227 return urljoin('https://www.youtube.com', player_url)
3229 def _download_player_url(self, video_id, fatal=False):
3230 res = self._download_webpage(
3231 'https://www.youtube.com/iframe_api',
3232 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
3233 if res:
3234 player_version = self._search_regex(
3235 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
3236 if player_version:
3237 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
3239 def _signature_cache_id(self, example_sig):
3240 """ Return a string representation of a signature """
3241 return '.'.join(str(len(part)) for part in example_sig.split('.'))
3243 @classmethod
3244 def _extract_player_info(cls, player_url):
3245 for player_re in cls._PLAYER_INFO_RE:
3246 id_m = re.search(player_re, player_url)
3247 if id_m:
3248 break
3249 else:
3250 raise ExtractorError(f'Cannot identify player {player_url!r}')
3251 return id_m.group('id')
3253 def _load_player(self, video_id, player_url, fatal=True):
3254 player_id = self._extract_player_info(player_url)
3255 if player_id not in self._code_cache:
3256 code = self._download_webpage(
3257 player_url, video_id, fatal=fatal,
3258 note='Downloading player ' + player_id,
3259 errnote=f'Download of {player_url} failed',
3260 headers=self._generate_webpage_headers())
3261 if code:
3262 self._code_cache[player_id] = code
3263 return self._code_cache.get(player_id)
3265 def _extract_signature_function(self, video_id, player_url, example_sig):
3266 player_id = self._extract_player_info(player_url)
3268 # Read from filesystem cache
3269 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
3270 assert os.path.basename(func_id) == func_id
3272 self.write_debug(f'Extracting signature function {func_id}')
3273 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3275 if not cache_spec:
3276 code = self._load_player(video_id, player_url)
3277 if code:
3278 res = self._parse_sig_js(code)
3279 test_string = ''.join(map(chr, range(len(example_sig))))
3280 cache_spec = [ord(c) for c in res(test_string)]
3281 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3283 return lambda s: ''.join(s[i] for i in cache_spec)
3285 def _print_sig_code(self, func, example_sig):
3286 if not self.get_param('youtube_print_sig_code'):
3287 return
3289 def gen_sig_code(idxs):
3290 def _genslice(start, end, step):
3291 starts = '' if start == 0 else str(start)
3292 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3293 steps = '' if step == 1 else (':%d' % step)
3294 return f's[{starts}{ends}{steps}]'
3296 step = None
3297 # Quelch pyflakes warnings - start will be set when step is set
3298 start = '(Never used)'
3299 for i, prev in zip(idxs[1:], idxs[:-1]):
3300 if step is not None:
3301 if i - prev == step:
3302 continue
3303 yield _genslice(start, prev, step)
3304 step = None
3305 continue
3306 if i - prev in [-1, 1]:
3307 step = i - prev
3308 start = prev
3309 continue
3310 else:
3311 yield 's[%d]' % prev
3312 if step is None:
3313 yield 's[%d]' % i
3314 else:
3315 yield _genslice(start, i, step)
3317 test_string = ''.join(map(chr, range(len(example_sig))))
3318 cache_res = func(test_string)
3319 cache_spec = [ord(c) for c in cache_res]
3320 expr_code = ' + '.join(gen_sig_code(cache_spec))
3321 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3322 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3323 f' return {expr_code}\n')
3324 self.to_screen('Extracted signature function:\n' + code)
3326 def _parse_sig_js(self, jscode):
3327 funcname = self._search_regex(
3328 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3329 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3330 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3331 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3332 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3333 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3334 # Obsolete patterns
3335 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3336 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3337 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3338 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3339 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3340 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3341 jscode, 'Initial JS player signature function name', group='sig')
3343 jsi = JSInterpreter(jscode)
3344 initial_function = jsi.extract_function(funcname)
3345 return lambda s: initial_function([s])
3347 def _cached(self, func, *cache_id):
3348 def inner(*args, **kwargs):
3349 if cache_id not in self._player_cache:
3350 try:
3351 self._player_cache[cache_id] = func(*args, **kwargs)
3352 except ExtractorError as e:
3353 self._player_cache[cache_id] = e
3354 except Exception as e:
3355 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3357 ret = self._player_cache[cache_id]
3358 if isinstance(ret, Exception):
3359 raise ret
3360 return ret
3361 return inner
3363 def _decrypt_signature(self, s, video_id, player_url):
3364 """Turn the encrypted s field into a working signature"""
3365 extract_sig = self._cached(
3366 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3367 func = extract_sig(video_id, player_url, s)
3368 self._print_sig_code(func, s)
3369 return func(s)
3371 def _decrypt_nsig(self, s, video_id, player_url):
3372 """Turn the encrypted n field into a working signature"""
3373 if player_url is None:
3374 raise ExtractorError('Cannot decrypt nsig without player_url')
3375 player_url = urljoin('https://www.youtube.com', player_url)
3377 try:
3378 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3379 except ExtractorError as e:
3380 raise ExtractorError('Unable to extract nsig function code', cause=e)
3381 if self.get_param('youtube_print_sig_code'):
3382 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3384 try:
3385 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3386 ret = extract_nsig(jsi, func_code)(s)
3387 except JSInterpreter.Exception as e:
3388 try:
3389 jsi = PhantomJSwrapper(self, timeout=5000)
3390 except ExtractorError:
3391 raise e
3392 self.report_warning(
3393 f'Native nsig extraction failed: Trying with PhantomJS\n'
3394 f' n = {s} ; player = {player_url}', video_id)
3395 self.write_debug(e, only_once=True)
3397 args, func_body = func_code
3398 ret = jsi.execute(
3399 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3400 video_id=video_id, note='Executing signature code').strip()
3402 self.write_debug(f'Decrypted nsig {s} => {ret}')
3403 return ret
3405 def _extract_n_function_name(self, jscode, player_url=None):
3406 # Examples (with placeholders nfunc, narray, idx):
3407 # * .get("n"))&&(b=nfunc(b)
3408 # * .get("n"))&&(b=narray[idx](b)
3409 # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
3410 # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
3411 # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
3412 # * a.D&&(b="nn"[+a.D],vL(a),c=a.j[b]||null)&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
3413 funcname, idx = self._search_regex(
3414 r'''(?x)
3416 \.get\("n"\)\)&&\(b=|
3418 b=String\.fromCharCode\(110\)|
3419 (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
3422 ,[a-zA-Z0-9_$]+\(a\))?,c=a\.
3424 get\(b\)|
3425 [a-zA-Z0-9_$]+\[b\]\|\|null
3426 )\)&&\(c=|
3427 \b(?P<var>[a-zA-Z0-9_$]+)=
3428 )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
3429 (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
3430 jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
3431 if not funcname:
3432 self.report_warning(join_nonempty(
3433 'Falling back to generic n function search',
3434 player_url and f' player = {player_url}', delim='\n'))
3435 return self._search_regex(
3436 r'''(?xs)
3437 ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
3438 \s*\{(?:(?!};).)+?["']enhanced_except_''',
3439 jscode, 'Initial JS player n function name', group='name')
3440 elif not idx:
3441 return funcname
3443 return json.loads(js_to_json(self._search_regex(
3444 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3445 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3447 def _extract_n_function_code(self, video_id, player_url):
3448 player_id = self._extract_player_info(player_url)
3449 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
3450 jscode = func_code or self._load_player(video_id, player_url)
3451 jsi = JSInterpreter(jscode)
3453 if func_code:
3454 return jsi, player_id, func_code
3456 func_name = self._extract_n_function_name(jscode, player_url=player_url)
3458 func_code = jsi.extract_function_code(func_name)
3460 self.cache.store('youtube-nsig', player_id, func_code)
3461 return jsi, player_id, func_code
3463 def _extract_n_function_from_code(self, jsi, func_code):
3464 func = jsi.extract_function_from_code(*func_code)
3466 def extract_nsig(s):
3467 try:
3468 ret = func([s])
3469 except JSInterpreter.Exception:
3470 raise
3471 except Exception as e:
3472 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3474 if ret.startswith('enhanced_except_'):
3475 raise JSInterpreter.Exception('Signature function returned an exception')
3476 return ret
3478 return extract_nsig
3480 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3482 Extract signatureTimestamp (sts)
3483 Required to tell API what sig/player version is in use.
3485 sts = None
3486 if isinstance(ytcfg, dict):
3487 sts = int_or_none(ytcfg.get('STS'))
3489 if not sts:
3490 # Attempt to extract from player
3491 if player_url is None:
3492 error_msg = 'Cannot extract signature timestamp without player_url.'
3493 if fatal:
3494 raise ExtractorError(error_msg)
3495 self.report_warning(error_msg)
3496 return
3497 code = self._load_player(video_id, player_url, fatal=fatal)
3498 if code:
3499 sts = int_or_none(self._search_regex(
3500 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3501 'JS player signature timestamp', group='sts', fatal=fatal))
3502 return sts
3504 def _mark_watched(self, video_id, player_responses):
3505 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3506 label = 'fully ' if is_full else ''
3507 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3508 expected_type=url_or_none)
3509 if not url:
3510 self.report_warning(f'Unable to mark {label}watched')
3511 return
3512 parsed_url = urllib.parse.urlparse(url)
3513 qs = urllib.parse.parse_qs(parsed_url.query)
3515 # cpn generation algorithm is reverse engineered from base.js.
3516 # In fact it works even with dummy cpn.
3517 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3518 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3520 # # more consistent results setting it to right before the end
3521 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3523 qs.update({
3524 'ver': ['2'],
3525 'cpn': [cpn],
3526 'cmt': video_length,
3527 'el': 'detailpage', # otherwise defaults to "shorts"
3530 if is_full:
3531 # these seem to mark watchtime "history" in the real world
3532 # they're required, so send in a single value
3533 qs.update({
3534 'st': 0,
3535 'et': video_length,
3538 url = urllib.parse.urlunparse(
3539 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3541 self._download_webpage(
3542 url, video_id, f'Marking {label}watched',
3543 'Unable to mark watched', fatal=False,
3544 headers=self._generate_webpage_headers())
3546 @classmethod
3547 def _extract_from_webpage(cls, url, webpage):
3548 # Invidious Instances
3549 # https://github.com/yt-dlp/yt-dlp/issues/195
3550 # https://github.com/iv-org/invidious/pull/1730
3551 mobj = re.search(
3552 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3553 webpage)
3554 if mobj:
3555 yield cls.url_result(mobj.group('url'), cls)
3556 raise cls.StopExtraction
3558 yield from super()._extract_from_webpage(url, webpage)
3560 # lazyYT YouTube embed
3561 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3562 yield cls.url_result(unescapeHTML(id_), cls, id_)
3564 # Wordpress "YouTube Video Importer" plugin
3565 for m in re.findall(r'''(?x)<div[^>]+
3566 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3567 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3568 yield cls.url_result(m[-1], cls, m[-1])
3570 @classmethod
3571 def extract_id(cls, url):
3572 video_id = cls.get_temp_id(url)
3573 if not video_id:
3574 raise ExtractorError(f'Invalid URL: {url}')
3575 return video_id
3577 def _extract_chapters_from_json(self, data, duration):
3578 chapter_list = traverse_obj(
3579 data, (
3580 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3581 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
3582 ), expected_type=list)
3584 return self._extract_chapters_helper(
3585 chapter_list,
3586 start_function=lambda chapter: float_or_none(
3587 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3588 title_function=lambda chapter: traverse_obj(
3589 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3590 duration=duration)
3592 def _extract_chapters_from_engagement_panel(self, data, duration):
3593 content_list = traverse_obj(
3594 data,
3595 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3596 expected_type=list)
3597 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3598 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3600 return next(filter(None, (
3601 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3602 chapter_time, chapter_title, duration)
3603 for contents in content_list)), [])
3605 def _extract_heatmap(self, data):
3606 return traverse_obj(data, (
3607 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3608 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3609 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3610 'start_time': ('startMillis', {float_or_none(scale=1000)}),
3611 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3612 'value': ('intensityScoreNormalized', {float_or_none}),
3613 })) or None
3615 def _extract_comment(self, entities, parent=None):
3616 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3617 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3618 return
3620 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3621 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3623 return {
3624 'id': comment_id,
3625 'parent': parent or 'root',
3626 **traverse_obj(comment_entity_payload, {
3627 'text': ('properties', 'content', 'content', {str}),
3628 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3629 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3630 'author': ('author', 'displayName', {str}),
3631 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3632 'author_is_uploader': ('author', 'isCreator', {bool}),
3633 'author_is_verified': ('author', 'isVerified', {bool}),
3634 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
3635 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
3636 ), {urljoin('https://www.youtube.com')}),
3637 }, get_all=False),
3638 'is_favorited': (None if toolbar_entity_payload is None else
3639 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3640 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3641 'timestamp': self._parse_time_text(time_text),
3644 def _extract_comment_old(self, comment_renderer, parent=None):
3645 comment_id = comment_renderer.get('commentId')
3646 if not comment_id:
3647 return
3649 info = {
3650 'id': comment_id,
3651 'text': self._get_text(comment_renderer, 'contentText'),
3652 'like_count': self._get_count(comment_renderer, 'voteCount'),
3653 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3654 'author': self._get_text(comment_renderer, 'authorText'),
3655 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3656 'parent': parent or 'root',
3659 # Timestamp is an estimate calculated from the current time and time_text
3660 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3661 timestamp = self._parse_time_text(time_text)
3663 info.update({
3664 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3665 '_time_text': time_text,
3666 'timestamp': timestamp,
3669 info['author_url'] = urljoin(
3670 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3671 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3672 expected_type=str, get_all=False))
3674 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3675 if author_is_uploader is not None:
3676 info['author_is_uploader'] = author_is_uploader
3678 comment_abr = traverse_obj(
3679 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3680 if comment_abr is not None:
3681 info['is_favorited'] = 'creatorHeart' in comment_abr
3683 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3684 if self._has_badge(badges, BadgeType.VERIFIED):
3685 info['author_is_verified'] = True
3687 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3688 if is_pinned:
3689 info['is_pinned'] = True
3691 return info
3693 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3695 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3697 def extract_header(contents):
3698 _continuation = None
3699 for content in contents:
3700 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3701 expected_comment_count = self._get_count(
3702 comments_header_renderer, 'countText', 'commentsCount')
3704 if expected_comment_count is not None:
3705 tracker['est_total'] = expected_comment_count
3706 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3707 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3709 sort_menu_item = try_get(
3710 comments_header_renderer,
3711 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3712 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3714 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3715 if not _continuation:
3716 continue
3718 sort_text = str_or_none(sort_menu_item.get('title'))
3719 if not sort_text:
3720 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3721 self.to_screen(f'Sorting comments by {sort_text.lower()}')
3722 break
3723 return _continuation
3725 def extract_thread(contents, entity_payloads):
3726 if not parent:
3727 tracker['current_page_thread'] = 0
3728 for content in contents:
3729 if not parent and tracker['total_parent_comments'] >= max_parents:
3730 yield
3731 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3733 # old comment format
3734 if not entity_payloads:
3735 comment_renderer = get_first(
3736 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3737 expected_type=dict, default={})
3739 comment = self._extract_comment_old(comment_renderer, parent)
3741 # new comment format
3742 else:
3743 view_model = (
3744 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3745 or traverse_obj(content, ('commentViewModel', {dict})))
3746 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3747 if not comment_keys:
3748 continue
3749 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3750 comment = self._extract_comment(entities, parent)
3751 if comment:
3752 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3754 if not comment:
3755 continue
3756 comment_id = comment['id']
3758 if comment.get('is_pinned'):
3759 tracker['pinned_comment_ids'].add(comment_id)
3760 # Sometimes YouTube may break and give us infinite looping comments.
3761 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3762 if comment_id in tracker['seen_comment_ids']:
3763 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3764 # Pinned comments may appear a second time in newest first sort
3765 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3766 continue
3767 self.report_warning(
3768 'Detected YouTube comments looping. Stopping comment extraction '
3769 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3770 yield
3771 else:
3772 tracker['seen_comment_ids'].add(comment['id'])
3774 tracker['running_total'] += 1
3775 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3776 yield comment
3778 # Attempt to get the replies
3779 comment_replies_renderer = try_get(
3780 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3782 if comment_replies_renderer:
3783 tracker['current_page_thread'] += 1
3784 comment_entries_iter = self._comment_entries(
3785 comment_replies_renderer, ytcfg, video_id,
3786 parent=comment.get('id'), tracker=tracker)
3787 yield from itertools.islice(comment_entries_iter, min(
3788 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3790 # Keeps track of counts across recursive calls
3791 if not tracker:
3792 tracker = {
3793 'running_total': 0,
3794 'est_total': None,
3795 'current_page_thread': 0,
3796 'total_parent_comments': 0,
3797 'total_reply_comments': 0,
3798 'seen_comment_ids': set(),
3799 'pinned_comment_ids': set(),
3802 # TODO: Deprecated
3803 # YouTube comments have a max depth of 2
3804 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3805 if max_depth:
3806 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3807 'Set max replies in the max-comments extractor argument instead')
3808 if max_depth == 1 and parent:
3809 return
3811 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3812 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
3814 continuation = self._extract_continuation(root_continuation_data)
3816 response = None
3817 is_forced_continuation = False
3818 is_first_continuation = parent is None
3819 if is_first_continuation and not continuation:
3820 # Sometimes you can get comments by generating the continuation yourself,
3821 # even if YouTube initially reports them being disabled - e.g. stories comments.
3822 # Note: if the comment section is actually disabled, YouTube may return a response with
3823 # required check_get_keys missing. So we will disable that check initially in this case.
3824 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3825 is_forced_continuation = True
3827 continuation_items_path = (
3828 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3829 for page_num in itertools.count(0):
3830 if not continuation:
3831 break
3832 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3833 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3834 if page_num == 0:
3835 if is_first_continuation:
3836 note_prefix = 'Downloading comment section API JSON'
3837 else:
3838 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3839 tracker['current_page_thread'], comment_prog_str)
3840 else:
3841 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
3842 ' ' if parent else '', ' replies' if parent else '',
3843 page_num, comment_prog_str)
3845 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3846 # Ignore check if YouTube says the comment count is 0.
3847 check_get_keys = None
3848 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3849 check_get_keys = [[*continuation_items_path, ..., (
3850 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
3851 try:
3852 response = self._extract_response(
3853 item_id=None, query=continuation,
3854 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3855 check_get_keys=check_get_keys)
3856 except ExtractorError as e:
3857 # Ignore incomplete data error for replies if retries didn't work.
3858 # This is to allow any other parent comments and comment threads to be downloaded.
3859 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3860 if 'incomplete data' in str(e).lower() and parent:
3861 if self.get_param('ignoreerrors') in (True, 'only_download'):
3862 self.report_warning(
3863 'Received incomplete data for a comment reply thread and retrying did not help. '
3864 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3865 return
3866 else:
3867 raise ExtractorError(
3868 'Incomplete data received for comment reply thread. '
3869 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3870 expected=True)
3871 raise
3872 is_forced_continuation = False
3873 continuation = None
3874 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
3875 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3876 if is_first_continuation:
3877 continuation = extract_header(continuation_items)
3878 is_first_continuation = False
3879 if continuation:
3880 break
3881 continue
3883 for entry in extract_thread(continuation_items, mutations):
3884 if not entry:
3885 return
3886 yield entry
3887 continuation = self._extract_continuation({'contents': continuation_items})
3888 if continuation:
3889 break
3891 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3892 if message and not parent and tracker['running_total'] == 0:
3893 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3894 raise self.CommentsDisabled
3896 @staticmethod
3897 def _generate_comment_continuation(video_id):
3899 Generates initial comment section continuation token from given video id
3901 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3902 return base64.b64encode(token.encode()).decode()
3904 def _get_comments(self, ytcfg, video_id, contents, webpage):
3905 """Entry for comment extraction"""
3906 def _real_comment_extract(contents):
3907 renderer = next((
3908 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3909 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3910 yield from self._comment_entries(renderer, ytcfg, video_id)
3912 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3913 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3915 @staticmethod
3916 def _get_checkok_params():
3917 return {'contentCheckOk': True, 'racyCheckOk': True}
3919 @classmethod
3920 def _generate_player_context(cls, sts=None):
3921 context = {
3922 'html5Preference': 'HTML5_PREF_WANTS',
3924 if sts is not None:
3925 context['signatureTimestamp'] = sts
3926 return {
3927 'playbackContext': {
3928 'contentPlaybackContext': context,
3930 **cls._get_checkok_params(),
3933 def _get_config_po_token(self, client):
3934 po_token_strs = self._configuration_arg('po_token', [], ie_key=YoutubeIE, casesense=True)
3935 for token_str in po_token_strs:
3936 po_token_client, sep, po_token = token_str.partition('+')
3937 if not sep:
3938 self.report_warning(
3939 f'Invalid po_token configuration format. Expected "client+po_token", got "{token_str}"', only_once=True)
3940 continue
3941 if po_token_client == client:
3942 return po_token
3944 def fetch_po_token(self, client='web', visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
3945 # PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
3946 if not visitor_data and not self.is_authenticated and player_url:
3947 self.report_warning(
3948 f'Unable to fetch PO Token for {client} client: Missing required Visitor Data. '
3949 f'You may need to pass Visitor Data with --extractor-args "youtube:visitor_data=XXX"')
3950 return
3952 config_po_token = self._get_config_po_token(client)
3953 if config_po_token:
3954 # PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
3955 # if using first channel in an account then we don't need the data_sync_id anymore...
3956 if not data_sync_id and self.is_authenticated and player_url:
3957 self.report_warning(
3958 f'Got a PO Token for {client} client, but missing Data Sync ID for account. Formats may not work.'
3959 f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
3961 return config_po_token
3963 # Require PO Token if logged in for external fetching
3964 if not data_sync_id and self.is_authenticated and player_url:
3965 self.report_warning(
3966 f'Unable to fetch PO Token for {client} client: Missing required Data Sync ID for account. '
3967 f'You may need to pass a Data Sync ID with --extractor-args "youtube:data_sync_id=XXX"')
3968 return
3970 return self._fetch_po_token(
3971 client=client,
3972 visitor_data=visitor_data,
3973 data_sync_id=data_sync_id,
3974 player_url=player_url,
3975 **kwargs,
3978 def _fetch_po_token(self, client, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
3979 """External PO Token fetch stub"""
3981 @staticmethod
3982 def _is_agegated(player_response):
3983 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3984 return True
3986 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3987 AGE_GATE_REASONS = (
3988 'confirm your age', 'age-restricted', 'inappropriate', # reason
3989 'age_verification_required', 'age_check_required', # status
3991 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3993 @staticmethod
3994 def _is_unplayable(player_response):
3995 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3997 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, visitor_data, data_sync_id, po_token):
3998 headers = self.generate_api_headers(
3999 ytcfg=player_ytcfg,
4000 default_client=client,
4001 visitor_data=visitor_data,
4002 session_index=self._extract_session_index(master_ytcfg, player_ytcfg),
4003 account_syncid=(
4004 self._data_sync_id_to_delegated_session_id(data_sync_id)
4005 or self._extract_account_syncid(master_ytcfg, initial_pr, player_ytcfg)
4009 yt_query = {
4010 'videoId': video_id,
4013 default_pp = traverse_obj(
4014 INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
4015 if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
4016 yt_query['params'] = player_params
4018 if po_token:
4019 yt_query['serviceIntegrityDimensions'] = {'poToken': po_token}
4021 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
4022 yt_query.update(self._generate_player_context(sts))
4023 return self._extract_response(
4024 item_id=video_id, ep='player', query=yt_query,
4025 ytcfg=player_ytcfg, headers=headers, fatal=True,
4026 default_client=client,
4027 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
4028 ) or None
4030 def _get_requested_clients(self, url, smuggled_data):
4031 requested_clients = []
4032 excluded_clients = []
4033 allowed_clients = sorted(
4034 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
4035 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
4036 for client in self._configuration_arg('player_client'):
4037 if client == 'default':
4038 requested_clients.extend(self._DEFAULT_CLIENTS)
4039 elif client == 'all':
4040 requested_clients.extend(allowed_clients)
4041 elif client.startswith('-'):
4042 excluded_clients.append(client[1:])
4043 elif client not in allowed_clients:
4044 self.report_warning(f'Skipping unsupported client "{client}"')
4045 else:
4046 requested_clients.append(client)
4047 if not requested_clients:
4048 requested_clients.extend(self._DEFAULT_CLIENTS)
4049 for excluded_client in excluded_clients:
4050 if excluded_client in requested_clients:
4051 requested_clients.remove(excluded_client)
4052 if not requested_clients:
4053 raise ExtractorError('No player clients have been requested', expected=True)
4055 if smuggled_data.get('is_music_url') or self.is_music_url(url):
4056 for requested_client in requested_clients:
4057 _, base_client, variant = _split_innertube_client(requested_client)
4058 music_client = f'{base_client}_music' if base_client != 'mweb' else 'web_music'
4059 if variant != 'music' and music_client in INNERTUBE_CLIENTS:
4060 if not INNERTUBE_CLIENTS[music_client]['REQUIRE_AUTH'] or self.is_authenticated:
4061 requested_clients.append(music_client)
4063 return orderedSet(requested_clients)
4065 def _invalid_player_response(self, pr, video_id):
4066 # YouTube may return a different video player response than expected.
4067 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
4068 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
4069 return pr_id
4071 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
4072 initial_pr = None
4073 if webpage:
4074 initial_pr = self._search_json(
4075 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
4077 prs = []
4078 deprioritized_prs = []
4080 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
4081 # Android player_response does not have microFormats which are needed for
4082 # extraction of some data. So we return the initial_pr with formats
4083 # stripped out even if not requested by the user
4084 # See: https://github.com/yt-dlp/yt-dlp/issues/501
4085 prs.append({**initial_pr, 'streamingData': None})
4087 all_clients = set(clients)
4088 clients = clients[::-1]
4090 def append_client(*client_names):
4091 """ Append the first client name that exists but not already used """
4092 for client_name in client_names:
4093 actual_client = _split_innertube_client(client_name)[0]
4094 if actual_client in INNERTUBE_CLIENTS:
4095 if actual_client not in all_clients:
4096 clients.append(client_name)
4097 all_clients.add(actual_client)
4098 return
4100 tried_iframe_fallback = False
4101 player_url = visitor_data = data_sync_id = None
4102 skipped_clients = {}
4103 while clients:
4104 deprioritize_pr = False
4105 client, base_client, variant = _split_innertube_client(clients.pop())
4106 player_ytcfg = master_ytcfg if client == 'web' else {}
4107 if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
4108 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
4110 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
4111 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
4112 if 'js' in self._configuration_arg('player_skip'):
4113 require_js_player = False
4114 player_url = None
4116 if not player_url and not tried_iframe_fallback and require_js_player:
4117 player_url = self._download_player_url(video_id)
4118 tried_iframe_fallback = True
4120 visitor_data = visitor_data or self._extract_visitor_data(master_ytcfg, initial_pr, player_ytcfg)
4121 data_sync_id = data_sync_id or self._extract_data_sync_id(master_ytcfg, initial_pr, player_ytcfg)
4122 po_token = self.fetch_po_token(
4123 client=client, visitor_data=visitor_data,
4124 data_sync_id=data_sync_id if self.is_authenticated else None,
4125 player_url=player_url if require_js_player else None,
4128 require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
4129 if not po_token and require_po_token:
4130 self.report_warning(
4131 f'No PO Token provided for {client} client, '
4132 f'which is required for working {client} formats. '
4133 f'You can manually pass a PO Token for this client with '
4134 f'--extractor-args "youtube:po_token={client}+XXX"',
4135 only_once=True)
4136 deprioritize_pr = True
4138 pr = initial_pr if client == 'web' else None
4139 try:
4140 pr = pr or self._extract_player_response(
4141 client, video_id,
4142 master_ytcfg=player_ytcfg or master_ytcfg,
4143 player_ytcfg=player_ytcfg,
4144 player_url=player_url,
4145 initial_pr=initial_pr,
4146 visitor_data=visitor_data,
4147 data_sync_id=data_sync_id,
4148 po_token=po_token)
4149 except ExtractorError as e:
4150 self.report_warning(e)
4151 continue
4153 if pr_id := self._invalid_player_response(pr, video_id):
4154 skipped_clients[client] = pr_id
4155 elif pr:
4156 # Save client name for introspection later
4157 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
4158 sd[STREAMING_DATA_CLIENT_NAME] = client
4159 sd[STREAMING_DATA_PO_TOKEN] = po_token
4160 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
4161 f[STREAMING_DATA_CLIENT_NAME] = client
4162 f[STREAMING_DATA_PO_TOKEN] = po_token
4163 if deprioritize_pr:
4164 deprioritized_prs.append(pr)
4165 else:
4166 prs.append(pr)
4168 # EU countries require age-verification for accounts to access age-restricted videos
4169 # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
4170 if self.is_authenticated and self._is_agegated(pr):
4171 self.to_screen(
4172 f'{video_id}: This video is age-restricted and YouTube is requiring '
4173 'account age-verification; some formats may be missing', only_once=True)
4174 # web_creator can work around the age-verification requirement
4175 # android_vr and mediaconnect may also be able to work around age-verification
4176 # tv_embedded may(?) still work around age-verification if the video is embeddable
4177 append_client('web_creator')
4179 prs.extend(deprioritized_prs)
4181 if skipped_clients:
4182 self.report_warning(
4183 f'Skipping player responses from {"/".join(skipped_clients)} clients '
4184 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
4185 if not prs:
4186 raise ExtractorError(
4187 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
4188 elif not prs:
4189 raise ExtractorError('Failed to extract any player response')
4190 return prs, player_url
4192 def _needs_live_processing(self, live_status, duration):
4193 if (live_status == 'is_live' and self.get_param('live_from_start')
4194 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
4195 return live_status
4197 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
4198 CHUNK_SIZE = 10 << 20
4199 PREFERRED_LANG_VALUE = 10
4200 original_language = None
4201 itags, stream_ids = collections.defaultdict(set), []
4202 itag_qualities, res_qualities = {}, {0: None}
4203 q = qualities([
4204 # Normally tiny is the smallest video-only formats. But
4205 # audio-only formats with unknown quality may get tagged as tiny
4206 'tiny',
4207 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
4208 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
4210 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
4211 format_types = self._configuration_arg('formats')
4212 all_formats = 'duplicate' in format_types
4213 if self._configuration_arg('include_duplicate_formats'):
4214 all_formats = True
4215 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
4216 'Use formats=duplicate extractor argument instead')
4218 def build_fragments(f):
4219 return LazyList({
4220 'url': update_url_query(f['url'], {
4221 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
4223 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
4225 for fmt in streaming_formats:
4226 if fmt.get('targetDurationSec'):
4227 continue
4229 itag = str_or_none(fmt.get('itag'))
4230 audio_track = fmt.get('audioTrack') or {}
4231 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
4232 if not all_formats:
4233 if stream_id in stream_ids:
4234 continue
4236 quality = fmt.get('quality')
4237 height = int_or_none(fmt.get('height'))
4238 if quality == 'tiny' or not quality:
4239 quality = fmt.get('audioQuality', '').lower() or quality
4240 # The 3gp format (17) in android client has a quality of "small",
4241 # but is actually worse than other formats
4242 if itag == '17':
4243 quality = 'tiny'
4244 if quality:
4245 if itag:
4246 itag_qualities[itag] = quality
4247 if height:
4248 res_qualities[height] = quality
4250 is_default = audio_track.get('audioIsDefault')
4251 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
4252 language_code = audio_track.get('id', '').split('.')[0]
4253 if language_code and is_default:
4254 original_language = language_code
4256 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
4257 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
4258 # number of fragment that would subsequently requested with (`&sq=N`)
4259 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
4260 continue
4262 fmt_url = fmt.get('url')
4263 if not fmt_url:
4264 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
4265 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
4266 encrypted_sig = try_get(sc, lambda x: x['s'][0])
4267 if not all((sc, fmt_url, player_url, encrypted_sig)):
4268 continue
4269 try:
4270 fmt_url += '&{}={}'.format(
4271 traverse_obj(sc, ('sp', -1)) or 'signature',
4272 self._decrypt_signature(encrypted_sig, video_id, player_url),
4274 except ExtractorError as e:
4275 self.report_warning('Signature extraction failed: Some formats may be missing',
4276 video_id=video_id, only_once=True)
4277 self.write_debug(e, only_once=True)
4278 continue
4280 query = parse_qs(fmt_url)
4281 if query.get('n'):
4282 try:
4283 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
4284 fmt_url = update_url_query(fmt_url, {
4285 'n': decrypt_nsig(query['n'][0], video_id, player_url),
4287 except ExtractorError as e:
4288 phantomjs_hint = ''
4289 if isinstance(e, JSInterpreter.Exception):
4290 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
4291 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
4292 if player_url:
4293 self.report_warning(
4294 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
4295 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
4296 self.write_debug(e, only_once=True)
4297 else:
4298 self.report_warning(
4299 'Cannot decrypt nsig without player_url: Some formats may be missing',
4300 video_id=video_id, only_once=True)
4301 continue
4303 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
4304 format_duration = traverse_obj(fmt, ('approxDurationMs', {float_or_none(scale=1000)}))
4305 # Some formats may have much smaller duration than others (possibly damaged during encoding)
4306 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
4307 # Make sure to avoid false positives with small duration differences.
4308 # E.g. __2ABJjxzNo, ySuUZEjARPY
4309 is_damaged = try_call(lambda: format_duration < duration // 2)
4310 if is_damaged:
4311 self.report_warning(
4312 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
4314 client_name = fmt[STREAMING_DATA_CLIENT_NAME]
4315 po_token = fmt.get(STREAMING_DATA_PO_TOKEN)
4317 if po_token:
4318 fmt_url = update_url_query(fmt_url, {'pot': po_token})
4320 # Clients that require PO Token return videoplayback URLs that may return 403
4321 is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
4322 if is_broken:
4323 self.report_warning(
4324 f'{video_id}: {client_name} client formats require a PO Token which was not provided. '
4325 'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
4327 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
4328 fps = int_or_none(fmt.get('fps')) or 0
4329 dct = {
4330 'asr': int_or_none(fmt.get('audioSampleRate')),
4331 'filesize': int_or_none(fmt.get('contentLength')),
4332 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
4333 'format_note': join_nonempty(
4334 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
4335 name, fmt.get('isDrc') and 'DRC',
4336 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
4337 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
4338 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
4339 (self.get_param('verbose') or all_formats) and short_client_name(client_name),
4340 delim=', '),
4341 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
4342 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
4343 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
4344 'audio_channels': fmt.get('audioChannels'),
4345 'height': height,
4346 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
4347 'has_drm': bool(fmt.get('drmFamilies')),
4348 'tbr': tbr,
4349 'filesize_approx': filesize_from_tbr(tbr, format_duration),
4350 'url': fmt_url,
4351 'width': int_or_none(fmt.get('width')),
4352 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
4353 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
4354 # Strictly de-prioritize broken, damaged and 3gp formats
4355 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
4357 mime_mobj = re.match(
4358 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
4359 if mime_mobj:
4360 dct['ext'] = mimetype2ext(mime_mobj.group(1))
4361 dct.update(parse_codecs(mime_mobj.group(2)))
4362 if itag:
4363 itags[itag].add(('https', dct.get('language')))
4364 stream_ids.append(stream_id)
4365 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
4366 if single_stream and dct.get('ext'):
4367 dct['container'] = dct['ext'] + '_dash'
4369 if (all_formats or 'dashy' in format_types) and dct['filesize']:
4370 yield {
4371 **dct,
4372 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
4373 'protocol': 'http_dash_segments',
4374 'fragments': build_fragments(dct),
4376 if all_formats or 'dashy' not in format_types:
4377 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
4378 yield dct
4380 needs_live_processing = self._needs_live_processing(live_status, duration)
4381 skip_bad_formats = 'incomplete' not in format_types
4382 if self._configuration_arg('include_incomplete_formats'):
4383 skip_bad_formats = False
4384 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
4385 'Use formats=incomplete extractor argument instead')
4387 skip_manifests = set(self._configuration_arg('skip'))
4388 if (not self.get_param('youtube_include_hls_manifest', True)
4389 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
4390 or needs_live_processing and skip_bad_formats):
4391 skip_manifests.add('hls')
4393 if not self.get_param('youtube_include_dash_manifest', True):
4394 skip_manifests.add('dash')
4395 if self._configuration_arg('include_live_dash'):
4396 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
4397 'Use formats=incomplete extractor argument instead')
4398 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
4399 skip_manifests.add('dash')
4401 def process_manifest_format(f, proto, client_name, itag, po_token):
4402 key = (proto, f.get('language'))
4403 if not all_formats and key in itags[itag]:
4404 return False
4405 itags[itag].add(key)
4407 if f.get('source_preference') is None:
4408 f['source_preference'] = -1
4410 # Clients that require PO Token return videoplayback URLs that may return 403
4411 # hls does not currently require PO Token
4412 if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
4413 self.report_warning(
4414 f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
4415 'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
4416 f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ')
4417 f['source_preference'] -= 20
4419 if itag and all_formats:
4420 f['format_id'] = f'{itag}-{proto}'
4421 elif any(p != proto for p, _ in itags[itag]):
4422 f['format_id'] = f'{itag}-{proto}'
4423 elif itag:
4424 f['format_id'] = itag
4426 if original_language and f.get('language') == original_language:
4427 f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
4428 f['language_preference'] = PREFERRED_LANG_VALUE
4430 if itag in ('616', '235'):
4431 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
4432 f['source_preference'] += 100
4434 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
4435 if f['quality'] == -1 and f.get('height'):
4436 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
4437 if self.get_param('verbose') or all_formats:
4438 f['format_note'] = join_nonempty(
4439 f.get('format_note'), short_client_name(client_name), delim=', ')
4440 if f.get('fps') and f['fps'] <= 1:
4441 del f['fps']
4443 if proto == 'hls' and f.get('has_drm'):
4444 f['has_drm'] = 'maybe'
4445 f['source_preference'] -= 5
4446 return True
4448 subtitles = {}
4449 for sd in streaming_data:
4450 client_name = sd[STREAMING_DATA_CLIENT_NAME]
4451 po_token = sd.get(STREAMING_DATA_PO_TOKEN)
4452 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
4453 if hls_manifest_url:
4454 if po_token:
4455 hls_manifest_url = hls_manifest_url.rstrip('/') + f'/pot/{po_token}'
4456 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4457 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
4458 subtitles = self._merge_subtitles(subs, subtitles)
4459 for f in fmts:
4460 if process_manifest_format(f, 'hls', client_name, self._search_regex(
4461 r'/itag/(\d+)', f['url'], 'itag', default=None), po_token):
4462 yield f
4464 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
4465 if dash_manifest_url:
4466 if po_token:
4467 dash_manifest_url = dash_manifest_url.rstrip('/') + f'/pot/{po_token}'
4468 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4469 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4470 for f in formats:
4471 if process_manifest_format(f, 'dash', client_name, f['format_id'], po_token):
4472 f['filesize'] = int_or_none(self._search_regex(
4473 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4474 if needs_live_processing:
4475 f['is_from_start'] = True
4477 yield f
4478 yield subtitles
4480 def _extract_storyboard(self, player_responses, duration):
4481 spec = get_first(
4482 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
4483 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4484 if not base_url:
4485 return
4486 L = len(spec) - 1
4487 for i, args in enumerate(spec):
4488 args = args.split('#')
4489 counts = list(map(int_or_none, args[:5]))
4490 if len(args) != 8 or not all(counts):
4491 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4492 continue
4493 width, height, frame_count, cols, rows = counts
4494 N, sigh = args[6:]
4496 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4497 fragment_count = frame_count / (cols * rows)
4498 fragment_duration = duration / fragment_count
4499 yield {
4500 'format_id': f'sb{i}',
4501 'format_note': 'storyboard',
4502 'ext': 'mhtml',
4503 'protocol': 'mhtml',
4504 'acodec': 'none',
4505 'vcodec': 'none',
4506 'url': url,
4507 'width': width,
4508 'height': height,
4509 'fps': frame_count / duration,
4510 'rows': rows,
4511 'columns': cols,
4512 'fragments': [{
4513 'url': url.replace('$M', str(j)),
4514 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4515 } for j in range(math.ceil(fragment_count))],
4518 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4519 webpage = None
4520 if 'webpage' not in self._configuration_arg('player_skip'):
4521 query = {'bpctr': '9999999999', 'has_verified': '1'}
4522 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
4523 if pp:
4524 query['pp'] = pp
4525 webpage = self._download_webpage(
4526 webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers())
4528 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4530 player_responses, player_url = self._extract_player_responses(
4531 self._get_requested_clients(url, smuggled_data),
4532 video_id, webpage, master_ytcfg, smuggled_data)
4534 return webpage, master_ytcfg, player_responses, player_url
4536 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4537 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4538 is_live = get_first(video_details, 'isLive')
4539 if is_live is None:
4540 is_live = get_first(live_broadcast_details, 'isLiveNow')
4541 live_content = get_first(video_details, 'isLiveContent')
4542 is_upcoming = get_first(video_details, 'isUpcoming')
4543 post_live = get_first(video_details, 'isPostLiveDvr')
4544 live_status = ('post_live' if post_live
4545 else 'is_live' if is_live
4546 else 'is_upcoming' if is_upcoming
4547 else 'was_live' if live_content
4548 else 'not_live' if False in (is_live, live_content)
4549 else None)
4550 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4551 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4552 if all(f.get('has_drm') for f in formats):
4553 # If there are no formats that definitely don't have DRM, all have DRM
4554 for f in formats:
4555 f['has_drm'] = True
4557 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4559 def _real_extract(self, url):
4560 url, smuggled_data = unsmuggle_url(url, {})
4561 video_id = self._match_id(url)
4563 base_url = self.http_scheme() + '//www.youtube.com/'
4564 webpage_url = base_url + 'watch?v=' + video_id
4566 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4568 playability_statuses = traverse_obj(
4569 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4571 trailer_video_id = get_first(
4572 playability_statuses,
4573 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4574 expected_type=str)
4575 if trailer_video_id:
4576 return self.url_result(
4577 trailer_video_id, self.ie_key(), trailer_video_id)
4579 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4580 if webpage else (lambda x: None))
4582 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4583 microformats = traverse_obj(
4584 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4585 expected_type=dict)
4587 translated_title = self._get_text(microformats, (..., 'title'))
4588 video_title = (self._preferred_lang and translated_title
4589 or get_first(video_details, 'title') # primary
4590 or translated_title
4591 or search_meta(['og:title', 'twitter:title', 'title']))
4592 translated_description = self._get_text(microformats, (..., 'description'))
4593 original_description = get_first(video_details, 'shortDescription')
4594 video_description = (
4595 self._preferred_lang and translated_description
4596 # If original description is blank, it will be an empty string.
4597 # Do not prefer translated description in this case.
4598 or original_description if original_description is not None else translated_description)
4600 multifeed_metadata_list = get_first(
4601 player_responses,
4602 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4603 expected_type=str)
4604 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4605 if self.get_param('noplaylist'):
4606 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4607 else:
4608 entries = []
4609 feed_ids = []
4610 for feed in multifeed_metadata_list.split(','):
4611 # Unquote should take place before split on comma (,) since textual
4612 # fields may contain comma as well (see
4613 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4614 feed_data = urllib.parse.parse_qs(
4615 urllib.parse.unquote_plus(feed))
4617 def feed_entry(name):
4618 return try_get(
4619 feed_data, lambda x: x[name][0], str)
4621 feed_id = feed_entry('id')
4622 if not feed_id:
4623 continue
4624 feed_title = feed_entry('title')
4625 title = video_title
4626 if feed_title:
4627 title += f' ({feed_title})'
4628 entries.append({
4629 '_type': 'url_transparent',
4630 'ie_key': 'Youtube',
4631 'url': smuggle_url(
4632 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
4633 {'force_singlefeed': True}),
4634 'title': title,
4636 feed_ids.append(feed_id)
4637 self.to_screen(
4638 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4639 ', '.join(feed_ids), video_id))
4640 return self.playlist_result(
4641 entries, video_id, video_title, video_description)
4643 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4644 or int_or_none(get_first(microformats, 'lengthSeconds'))
4645 or parse_duration(search_meta('duration')) or None)
4647 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4648 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4649 if live_status == 'post_live':
4650 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4652 if not formats:
4653 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4654 self.report_drm(video_id)
4655 pemr = get_first(
4656 playability_statuses,
4657 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4658 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4659 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4660 if subreason:
4661 if subreason == 'The uploader has not made this video available in your country.':
4662 countries = get_first(microformats, 'availableCountries')
4663 if not countries:
4664 regions_allowed = search_meta('regionsAllowed')
4665 countries = regions_allowed.split(',') if regions_allowed else None
4666 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4667 reason += f'. {subreason}'
4668 if reason:
4669 self.raise_no_formats(reason, expected=True)
4671 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4672 if not keywords and webpage:
4673 keywords = [
4674 unescapeHTML(m.group('content'))
4675 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4676 for keyword in keywords:
4677 if keyword.startswith('yt:stretch='):
4678 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4679 if mobj:
4680 # NB: float is intentional for forcing float division
4681 w, h = (float(v) for v in mobj.groups())
4682 if w > 0 and h > 0:
4683 ratio = w / h
4684 for f in formats:
4685 if f.get('vcodec') != 'none':
4686 f['stretched_ratio'] = ratio
4687 break
4688 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4689 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4690 if thumbnail_url:
4691 thumbnails.append({
4692 'url': thumbnail_url,
4694 original_thumbnails = thumbnails.copy()
4696 # The best resolution thumbnails sometimes does not appear in the webpage
4697 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4698 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4699 thumbnail_names = [
4700 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4701 # in resolution, these are not the custom thumbnail. So de-prioritize them
4702 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4703 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4705 n_thumbnail_names = len(thumbnail_names)
4706 thumbnails.extend({
4707 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4708 video_id=video_id, name=name, ext=ext,
4709 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4710 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4711 for thumb in thumbnails:
4712 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4713 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4714 self._remove_duplicate_formats(thumbnails)
4715 self._downloader._sort_thumbnails(original_thumbnails)
4717 category = get_first(microformats, 'category') or search_meta('genre')
4718 channel_id = self.ucid_or_none(str_or_none(
4719 get_first(video_details, 'channelId')
4720 or get_first(microformats, 'externalChannelId')
4721 or search_meta('channelId')))
4722 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4724 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4725 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4726 if not duration and live_end_time and live_start_time:
4727 duration = live_end_time - live_start_time
4729 needs_live_processing = self._needs_live_processing(live_status, duration)
4731 def is_bad_format(fmt):
4732 if needs_live_processing and not fmt.get('is_from_start'):
4733 return True
4734 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4735 and fmt.get('protocol') == 'http_dash_segments'):
4736 return True
4738 for fmt in filter(is_bad_format, formats):
4739 fmt['preference'] = (fmt.get('preference') or -1) - 10
4740 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4742 if needs_live_processing:
4743 self._prepare_live_from_start_formats(
4744 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4746 formats.extend(self._extract_storyboard(player_responses, duration))
4748 channel_handle = self.handle_from_url(owner_profile_url)
4750 info = {
4751 'id': video_id,
4752 'title': video_title,
4753 'formats': formats,
4754 'thumbnails': thumbnails,
4755 # The best thumbnail that we are sure exists. Prevents unnecessary
4756 # URL checking if user don't care about getting the best possible thumbnail
4757 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4758 'description': video_description,
4759 'channel_id': channel_id,
4760 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4761 'duration': duration,
4762 'view_count': int_or_none(
4763 get_first((video_details, microformats), (..., 'viewCount'))
4764 or search_meta('interactionCount')),
4765 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4766 'age_limit': 18 if (
4767 get_first(microformats, 'isFamilySafe') is False
4768 or search_meta('isFamilyFriendly') == 'false'
4769 or search_meta('og:restrictions:age') == '18+') else 0,
4770 'webpage_url': webpage_url,
4771 'categories': [category] if category else None,
4772 'tags': keywords,
4773 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4774 'live_status': live_status,
4775 'release_timestamp': live_start_time,
4776 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4777 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang', 'proto'),
4780 subtitles = {}
4781 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4782 if pctr:
4783 def get_lang_code(track):
4784 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4785 or track.get('languageCode'))
4787 # Converted into dicts to remove duplicates
4788 captions = {
4789 get_lang_code(sub): sub
4790 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4791 translation_languages = {
4792 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4793 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4795 def process_language(container, base_url, lang_code, sub_name, query):
4796 lang_subs = container.setdefault(lang_code, [])
4797 for fmt in self._SUBTITLE_FORMATS:
4798 query.update({
4799 'fmt': fmt,
4801 lang_subs.append({
4802 'ext': fmt,
4803 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4804 'name': sub_name,
4807 # NB: Constructing the full subtitle dictionary is slow
4808 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4809 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4810 for lang_code, caption_track in captions.items():
4811 base_url = caption_track.get('baseUrl')
4812 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4813 if not base_url:
4814 continue
4815 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4816 if caption_track.get('kind') != 'asr':
4817 if not lang_code:
4818 continue
4819 process_language(
4820 subtitles, base_url, lang_code, lang_name, {})
4821 if not caption_track.get('isTranslatable'):
4822 continue
4823 for trans_code, trans_name in translation_languages.items():
4824 if not trans_code:
4825 continue
4826 orig_trans_code = trans_code
4827 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4828 if not get_translated_subs:
4829 continue
4830 trans_code += f'-{lang_code}'
4831 trans_name += format_field(lang_name, None, ' from %s')
4832 if lang_code == f'a-{orig_trans_code}':
4833 # Set audio language based on original subtitles
4834 for f in formats:
4835 if f.get('acodec') != 'none' and not f.get('language'):
4836 f['language'] = orig_trans_code
4837 # Add an "-orig" label to the original language so that it can be distinguished.
4838 # The subs are returned without "-orig" as well for compatibility
4839 process_language(
4840 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4841 # Setting tlang=lang returns damaged subtitles.
4842 process_language(automatic_captions, base_url, trans_code, trans_name,
4843 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4845 info['automatic_captions'] = automatic_captions
4846 info['subtitles'] = subtitles
4848 parsed_url = urllib.parse.urlparse(url)
4849 for component in [parsed_url.fragment, parsed_url.query]:
4850 query = urllib.parse.parse_qs(component)
4851 for k, v in query.items():
4852 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4853 d_k += '_time'
4854 if d_k not in info and k in s_ks:
4855 info[d_k] = parse_duration(v[0])
4857 # Youtube Music Auto-generated description
4858 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4859 # XXX: Causes catastrophic backtracking if description has "·"
4860 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4861 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4862 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4863 mobj = re.search(
4864 r'''(?xs)
4865 (?=(?P<track>[^\n·]+))(?P=track)·
4866 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4867 (?=(?P<album>[^\n]+))(?P=album)\n
4868 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4869 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4870 (.+?\nArtist\s*:\s*
4871 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4872 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4873 ''', video_description)
4874 if mobj:
4875 release_year = mobj.group('release_year')
4876 release_date = mobj.group('release_date')
4877 if release_date:
4878 release_date = release_date.replace('-', '')
4879 if not release_year:
4880 release_year = release_date[:4]
4881 info.update({
4882 'album': mobj.group('album'.strip()),
4883 'artists': ([a] if (a := mobj.group('clean_artist'))
4884 else [a.strip() for a in mobj.group('artist').split('·')]),
4885 'track': mobj.group('track').strip(),
4886 'release_date': release_date,
4887 'release_year': int_or_none(release_year),
4890 initial_data = None
4891 if webpage:
4892 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4893 if not traverse_obj(initial_data, 'contents'):
4894 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4895 initial_data = None
4896 if not initial_data:
4897 query = {'videoId': video_id}
4898 query.update(self._get_checkok_params())
4899 initial_data = self._extract_response(
4900 item_id=video_id, ep='next', fatal=False,
4901 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4902 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4903 note='Downloading initial data API JSON')
4905 COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section')
4906 info['comment_count'] = traverse_obj(initial_data, (
4907 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4908 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4909 ), (
4910 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] in COMMENTS_SECTION_IDS,
4911 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4912 ), expected_type=self._get_count, get_all=False)
4914 try: # This will error if there is no livechat
4915 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4916 except (KeyError, IndexError, TypeError):
4917 pass
4918 else:
4919 info.setdefault('subtitles', {})['live_chat'] = [{
4920 # url is needed to set cookies
4921 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4922 'video_id': video_id,
4923 'ext': 'json',
4924 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4925 else 'youtube_live_chat_replay'),
4928 if initial_data:
4929 info['chapters'] = (
4930 self._extract_chapters_from_json(initial_data, duration)
4931 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4932 or self._extract_chapters_from_description(video_description, duration)
4933 or None)
4935 info['heatmap'] = self._extract_heatmap(initial_data)
4937 contents = traverse_obj(
4938 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4939 expected_type=list, default=[])
4941 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4942 if vpir:
4943 stl = vpir.get('superTitleLink')
4944 if stl:
4945 stl = self._get_text(stl)
4946 if try_get(
4947 vpir,
4948 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4949 info['location'] = stl
4950 else:
4951 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4952 if mobj:
4953 info.update({
4954 'series': mobj.group(1),
4955 'season_number': int(mobj.group(2)),
4956 'episode_number': int(mobj.group(3)),
4958 for tlb in (try_get(
4959 vpir,
4960 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4961 list) or []):
4962 tbrs = variadic(
4963 traverse_obj(
4964 tlb, ('toggleButtonRenderer', ...),
4965 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4966 for tbr in tbrs:
4967 for getter, regex in [(
4968 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4969 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4970 lambda x: x['accessibility'],
4971 lambda x: x['accessibilityData']['accessibilityData'],
4972 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4973 label = (try_get(tbr, getter, dict) or {}).get('label')
4974 if label:
4975 mobj = re.match(regex, label)
4976 if mobj:
4977 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4978 break
4980 info['like_count'] = traverse_obj(vpir, (
4981 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4982 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4983 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4984 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4986 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4987 if vcr:
4988 vc = self._get_count(vcr, 'viewCount')
4989 # Upcoming premieres with waiting count are treated as live here
4990 if vcr.get('isLive'):
4991 info['concurrent_view_count'] = vc
4992 elif info.get('view_count') is None:
4993 info['view_count'] = vc
4995 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4996 if vsir:
4997 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4998 info.update({
4999 'channel': self._get_text(vor, 'title'),
5000 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
5002 if not channel_handle:
5003 channel_handle = self.handle_from_url(
5004 traverse_obj(vor, (
5005 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
5006 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
5007 {str}), get_all=False))
5009 rows = try_get(
5010 vsir,
5011 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
5012 list) or []
5013 multiple_songs = False
5014 for row in rows:
5015 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
5016 multiple_songs = True
5017 break
5018 for row in rows:
5019 mrr = row.get('metadataRowRenderer') or {}
5020 mrr_title = mrr.get('title')
5021 if not mrr_title:
5022 continue
5023 mrr_title = self._get_text(mrr, 'title')
5024 mrr_contents_text = self._get_text(mrr, ('contents', 0))
5025 if mrr_title == 'License':
5026 info['license'] = mrr_contents_text
5027 elif not multiple_songs:
5028 if mrr_title == 'Album':
5029 info['album'] = mrr_contents_text
5030 elif mrr_title == 'Artist':
5031 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
5032 elif mrr_title == 'Song':
5033 info['track'] = mrr_contents_text
5034 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
5035 if self._has_badge(owner_badges, BadgeType.VERIFIED):
5036 info['channel_is_verified'] = True
5038 info.update({
5039 'uploader': info.get('channel'),
5040 'uploader_id': channel_handle,
5041 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5044 # We only want timestamp IF it has time precision AND a timezone
5045 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
5046 timestamp = (
5047 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
5048 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
5050 upload_date = (
5051 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
5053 unified_strdate(get_first(microformats, 'uploadDate'))
5054 or unified_strdate(search_meta('uploadDate'))
5057 # In the case we cannot get the timestamp:
5058 # The upload date for scheduled, live and past live streams / premieres in microformats
5059 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
5060 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
5061 if not upload_date or (not timestamp and live_status in ('not_live', None)):
5062 # this should be in UTC, as configured in the cookie/client context
5063 upload_date = strftime_or_none(
5064 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
5066 info['upload_date'] = upload_date
5067 info['timestamp'] = timestamp
5069 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
5070 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
5071 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
5072 if upload_datetime >= datetime_from_str('today-2days'):
5073 for fmt in info['formats']:
5074 if fmt.get('protocol') == 'm3u8_native':
5075 fmt['__needs_testing'] = True
5077 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
5078 v = info.get(s_k)
5079 if v:
5080 info[d_k] = v
5082 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
5084 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5085 or get_first(video_details, 'isPrivate', expected_type=bool))
5087 info['availability'] = (
5088 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5089 else self._availability(
5090 is_private=is_private,
5091 needs_premium=(
5092 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
5093 or False if initial_data and is_private is not None else None),
5094 needs_subscription=(
5095 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
5096 or False if initial_data and is_private is not None else None),
5097 needs_auth=info['age_limit'] >= 18,
5098 is_unlisted=None if is_private is None else (
5099 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5100 or get_first(microformats, 'isUnlisted', expected_type=bool))))
5102 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
5104 self.mark_watched(video_id, player_responses)
5106 return info
5109 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
5110 @staticmethod
5111 def passthrough_smuggled_data(func):
5112 def _smuggle(info, smuggled_data):
5113 if info.get('_type') not in ('url', 'url_transparent'):
5114 return info
5115 if smuggled_data.get('is_music_url'):
5116 parsed_url = urllib.parse.urlparse(info['url'])
5117 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
5118 smuggled_data.pop('is_music_url')
5119 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
5120 if smuggled_data:
5121 info['url'] = smuggle_url(info['url'], smuggled_data)
5122 return info
5124 @functools.wraps(func)
5125 def wrapper(self, url):
5126 url, smuggled_data = unsmuggle_url(url, {})
5127 if self.is_music_url(url):
5128 smuggled_data['is_music_url'] = True
5129 info_dict = func(self, url, smuggled_data)
5130 if smuggled_data:
5131 _smuggle(info_dict, smuggled_data)
5132 if info_dict.get('entries'):
5133 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
5134 return info_dict
5135 return wrapper
5137 @staticmethod
5138 def _extract_basic_item_renderer(item):
5139 # Modified from _extract_grid_item_renderer
5140 known_basic_renderers = (
5141 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
5143 for key, renderer in item.items():
5144 if not isinstance(renderer, dict):
5145 continue
5146 elif key in known_basic_renderers:
5147 return renderer
5148 elif key.startswith('grid') and key.endswith('Renderer'):
5149 return renderer
5151 def _extract_channel_renderer(self, renderer):
5152 channel_id = self.ucid_or_none(renderer['channelId'])
5153 title = self._get_text(renderer, 'title')
5154 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
5155 channel_handle = self.handle_from_url(
5156 traverse_obj(renderer, (
5157 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
5158 ('browseEndpoint', 'canonicalBaseUrl')),
5159 {str}), get_all=False))
5160 if not channel_handle:
5161 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
5162 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
5163 return {
5164 '_type': 'url',
5165 'url': channel_url,
5166 'id': channel_id,
5167 'ie_key': YoutubeTabIE.ie_key(),
5168 'channel': title,
5169 'uploader': title,
5170 'channel_id': channel_id,
5171 'channel_url': channel_url,
5172 'title': title,
5173 'uploader_id': channel_handle,
5174 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5175 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
5176 # However, in feed/channels this is set correctly to the subscriber count
5177 'channel_follower_count': traverse_obj(
5178 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
5179 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
5180 'playlist_count': (
5181 # videoCountText may be the subscriber count
5182 self._get_count(renderer, 'videoCountText')
5183 if self._get_count(renderer, 'subscriberCountText') is not None else None),
5184 'description': self._get_text(renderer, 'descriptionSnippet'),
5185 'channel_is_verified': True if self._has_badge(
5186 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
5189 def _grid_entries(self, grid_renderer):
5190 for item in grid_renderer['items']:
5191 if not isinstance(item, dict):
5192 continue
5193 renderer = self._extract_basic_item_renderer(item)
5194 if not isinstance(renderer, dict):
5195 continue
5196 title = self._get_text(renderer, 'title')
5198 # playlist
5199 playlist_id = renderer.get('playlistId')
5200 if playlist_id:
5201 yield self.url_result(
5202 f'https://www.youtube.com/playlist?list={playlist_id}',
5203 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5204 video_title=title)
5205 continue
5206 # video
5207 video_id = renderer.get('videoId')
5208 if video_id:
5209 yield self._extract_video(renderer)
5210 continue
5211 # channel
5212 channel_id = renderer.get('channelId')
5213 if channel_id:
5214 yield self._extract_channel_renderer(renderer)
5215 continue
5216 # generic endpoint URL support
5217 ep_url = urljoin('https://www.youtube.com/', try_get(
5218 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
5219 str))
5220 if ep_url:
5221 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
5222 if ie.suitable(ep_url):
5223 yield self.url_result(
5224 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
5225 break
5227 def _music_reponsive_list_entry(self, renderer):
5228 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
5229 if video_id:
5230 title = traverse_obj(renderer, (
5231 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
5232 'text', 'runs', 0, 'text'))
5233 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
5234 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
5235 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
5236 if playlist_id:
5237 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
5238 if video_id:
5239 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
5240 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5241 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
5242 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5243 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
5244 if browse_id:
5245 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
5246 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
5248 def _shelf_entries_from_content(self, shelf_renderer):
5249 content = shelf_renderer.get('content')
5250 if not isinstance(content, dict):
5251 return
5252 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
5253 if renderer:
5254 # TODO: add support for nested playlists so each shelf is processed
5255 # as separate playlist
5256 # TODO: this includes only first N items
5257 yield from self._grid_entries(renderer)
5258 renderer = content.get('horizontalListRenderer')
5259 if renderer:
5260 # TODO: handle case
5261 pass
5263 def _shelf_entries(self, shelf_renderer, skip_channels=False):
5264 ep = try_get(
5265 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5266 str)
5267 shelf_url = urljoin('https://www.youtube.com', ep)
5268 if shelf_url:
5269 # Skipping links to another channels, note that checking for
5270 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
5271 # will not work
5272 if skip_channels and '/channels?' in shelf_url:
5273 return
5274 title = self._get_text(shelf_renderer, 'title')
5275 yield self.url_result(shelf_url, video_title=title)
5276 # Shelf may not contain shelf URL, fallback to extraction from content
5277 yield from self._shelf_entries_from_content(shelf_renderer)
5279 def _playlist_entries(self, video_list_renderer):
5280 for content in video_list_renderer['contents']:
5281 if not isinstance(content, dict):
5282 continue
5283 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
5284 if not isinstance(renderer, dict):
5285 continue
5286 video_id = renderer.get('videoId')
5287 if not video_id:
5288 continue
5289 yield self._extract_video(renderer)
5291 def _rich_entries(self, rich_grid_renderer):
5292 renderer = traverse_obj(
5293 rich_grid_renderer,
5294 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer', 'shortsLockupViewModel'), any)) or {}
5295 video_id = renderer.get('videoId')
5296 if video_id:
5297 yield self._extract_video(renderer)
5298 return
5299 playlist_id = renderer.get('playlistId')
5300 if playlist_id:
5301 yield self.url_result(
5302 f'https://www.youtube.com/playlist?list={playlist_id}',
5303 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5304 video_title=self._get_text(renderer, 'title'))
5305 return
5306 # shortsLockupViewModel extraction
5307 entity_id = renderer.get('entityId')
5308 if entity_id:
5309 video_id = traverse_obj(renderer, ('onTap', 'innertubeCommand', 'reelWatchEndpoint', 'videoId', {str}))
5310 if not video_id:
5311 return
5312 yield self.url_result(
5313 f'https://www.youtube.com/shorts/{video_id}',
5314 ie=YoutubeIE, video_id=video_id,
5315 **traverse_obj(renderer, ('overlayMetadata', {
5316 'title': ('primaryText', 'content', {str}),
5317 'view_count': ('secondaryText', 'content', {parse_count}),
5318 })),
5319 thumbnails=self._extract_thumbnails(renderer, 'thumbnail', final_key='sources'))
5320 return
5322 def _video_entry(self, video_renderer):
5323 video_id = video_renderer.get('videoId')
5324 if video_id:
5325 return self._extract_video(video_renderer)
5327 def _hashtag_tile_entry(self, hashtag_tile_renderer):
5328 url = urljoin('https://youtube.com', traverse_obj(
5329 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
5330 if url:
5331 return self.url_result(
5332 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
5334 def _post_thread_entries(self, post_thread_renderer):
5335 post_renderer = try_get(
5336 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
5337 if not post_renderer:
5338 return
5339 # video attachment
5340 video_renderer = try_get(
5341 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
5342 video_id = video_renderer.get('videoId')
5343 if video_id:
5344 entry = self._extract_video(video_renderer)
5345 if entry:
5346 yield entry
5347 # playlist attachment
5348 playlist_id = try_get(
5349 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
5350 if playlist_id:
5351 yield self.url_result(
5352 f'https://www.youtube.com/playlist?list={playlist_id}',
5353 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5354 # inline video links
5355 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
5356 for run in runs:
5357 if not isinstance(run, dict):
5358 continue
5359 ep_url = try_get(
5360 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
5361 if not ep_url:
5362 continue
5363 if not YoutubeIE.suitable(ep_url):
5364 continue
5365 ep_video_id = YoutubeIE._match_id(ep_url)
5366 if video_id == ep_video_id:
5367 continue
5368 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
5370 def _post_thread_continuation_entries(self, post_thread_continuation):
5371 contents = post_thread_continuation.get('contents')
5372 if not isinstance(contents, list):
5373 return
5374 for content in contents:
5375 renderer = content.get('backstagePostThreadRenderer')
5376 if isinstance(renderer, dict):
5377 yield from self._post_thread_entries(renderer)
5378 continue
5379 renderer = content.get('videoRenderer')
5380 if isinstance(renderer, dict):
5381 yield self._video_entry(renderer)
5383 r''' # unused
5384 def _rich_grid_entries(self, contents):
5385 for content in contents:
5386 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
5387 if video_renderer:
5388 entry = self._video_entry(video_renderer)
5389 if entry:
5390 yield entry
5393 def _report_history_entries(self, renderer):
5394 for url in traverse_obj(renderer, (
5395 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
5396 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
5397 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
5398 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
5400 def _extract_entries(self, parent_renderer, continuation_list):
5401 # continuation_list is modified in-place with continuation_list = [continuation_token]
5402 continuation_list[:] = [None]
5403 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
5404 for content in contents:
5405 if not isinstance(content, dict):
5406 continue
5407 is_renderer = traverse_obj(
5408 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
5409 expected_type=dict)
5410 if not is_renderer:
5411 if content.get('richItemRenderer'):
5412 for entry in self._rich_entries(content['richItemRenderer']):
5413 yield entry
5414 continuation_list[0] = self._extract_continuation(parent_renderer)
5415 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
5416 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
5417 yield from self._report_history_entries(table)
5418 continuation_list[0] = self._extract_continuation(table)
5419 continue
5421 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
5422 for isr_content in isr_contents:
5423 if not isinstance(isr_content, dict):
5424 continue
5426 known_renderers = {
5427 'playlistVideoListRenderer': self._playlist_entries,
5428 'gridRenderer': self._grid_entries,
5429 'reelShelfRenderer': self._grid_entries,
5430 'shelfRenderer': self._shelf_entries,
5431 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
5432 'backstagePostThreadRenderer': self._post_thread_entries,
5433 'videoRenderer': lambda x: [self._video_entry(x)],
5434 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
5435 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
5436 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
5437 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
5439 for key, renderer in isr_content.items():
5440 if key not in known_renderers:
5441 continue
5442 for entry in known_renderers[key](renderer):
5443 if entry:
5444 yield entry
5445 continuation_list[0] = self._extract_continuation(renderer)
5446 break
5448 if not continuation_list[0]:
5449 continuation_list[0] = self._extract_continuation(is_renderer)
5451 if not continuation_list[0]:
5452 continuation_list[0] = self._extract_continuation(parent_renderer)
5454 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5455 continuation_list = [None]
5456 extract_entries = lambda x: self._extract_entries(x, continuation_list)
5457 tab_content = try_get(tab, lambda x: x['content'], dict)
5458 if not tab_content:
5459 return
5460 parent_renderer = (
5461 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5462 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
5463 yield from extract_entries(parent_renderer)
5464 continuation = continuation_list[0]
5465 seen_continuations = set()
5466 for page_num in itertools.count(1):
5467 if not continuation:
5468 break
5469 continuation_token = continuation.get('continuation')
5470 if continuation_token is not None and continuation_token in seen_continuations:
5471 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5472 break
5473 seen_continuations.add(continuation_token)
5474 headers = self.generate_api_headers(
5475 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
5476 response = self._extract_response(
5477 item_id=f'{item_id} page {page_num}',
5478 query=continuation, headers=headers, ytcfg=ytcfg,
5479 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5481 if not response:
5482 break
5483 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5484 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5485 visitor_data = self._extract_visitor_data(response) or visitor_data
5487 known_renderers = {
5488 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
5489 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5490 'gridVideoRenderer': (self._grid_entries, 'items'),
5491 'gridChannelRenderer': (self._grid_entries, 'items'),
5492 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
5493 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
5494 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
5495 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5496 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
5497 'playlistVideoListContinuation': (self._playlist_entries, None),
5498 'gridContinuation': (self._grid_entries, None),
5499 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5500 'sectionListContinuation': (extract_entries, None), # for feeds
5503 continuation_items = traverse_obj(response, (
5504 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5505 'appendContinuationItemsAction', 'continuationItems',
5506 ), 'continuationContents', get_all=False)
5507 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5509 video_items_renderer = None
5510 for key in continuation_item:
5511 if key not in known_renderers:
5512 continue
5513 func, parent_key = known_renderers[key]
5514 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
5515 continuation_list = [None]
5516 yield from func(video_items_renderer)
5517 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
5519 if not video_items_renderer:
5520 break
5522 @staticmethod
5523 def _extract_selected_tab(tabs, fatal=True):
5524 for tab_renderer in tabs:
5525 if tab_renderer.get('selected'):
5526 return tab_renderer
5527 if fatal:
5528 raise ExtractorError('Unable to find selected tab')
5530 @staticmethod
5531 def _extract_tab_renderers(response):
5532 return traverse_obj(
5533 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
5535 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
5536 metadata = self._extract_metadata_from_tabs(item_id, data)
5538 selected_tab = self._extract_selected_tab(tabs)
5539 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5540 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5542 return self.playlist_result(
5543 self._entries(
5544 selected_tab, metadata['id'], ytcfg,
5545 self._extract_account_syncid(ytcfg, data),
5546 self._extract_visitor_data(data, ytcfg)),
5547 **metadata)
5549 def _extract_metadata_from_tabs(self, item_id, data):
5550 info = {'id': item_id}
5552 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5553 if metadata_renderer:
5554 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5555 ('channelUrl', {self.ucid_from_url}))
5556 info.update({
5557 'channel': metadata_renderer.get('title'),
5558 'channel_id': channel_id,
5560 if info['channel_id']:
5561 info['id'] = info['channel_id']
5562 else:
5563 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5565 # pageHeaderViewModel slow rollout began April 2024
5566 page_header_view_model = traverse_obj(data, (
5567 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
5569 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5570 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5571 def _get_uncropped(url):
5572 return url_or_none((url or '').split('=')[0] + '=s0')
5574 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5575 if avatar_thumbnails:
5576 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5577 if uncropped_avatar:
5578 avatar_thumbnails.append({
5579 'url': uncropped_avatar,
5580 'id': 'avatar_uncropped',
5581 'preference': 1,
5584 channel_banners = (
5585 self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5586 or self._extract_thumbnails(
5587 page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
5588 for banner in channel_banners:
5589 banner['preference'] = -10
5591 if channel_banners:
5592 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5593 if uncropped_banner:
5594 channel_banners.append({
5595 'url': uncropped_banner,
5596 'id': 'banner_uncropped',
5597 'preference': -5,
5600 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5601 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5602 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5604 primary_thumbnails = self._extract_thumbnails(
5605 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5606 playlist_thumbnails = self._extract_thumbnails(
5607 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5609 info.update({
5610 'title': (traverse_obj(metadata_renderer, 'title')
5611 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5612 or info['id']),
5613 'availability': self._extract_availability(data),
5614 'channel_follower_count': (
5615 self._get_count(data, ('header', ..., 'subscriberCountText'))
5616 or traverse_obj(page_header_view_model, (
5617 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5618 lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
5619 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5620 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5621 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
5622 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5625 channel_handle = (
5626 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5627 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5629 if channel_handle:
5630 info.update({
5631 'uploader_id': channel_handle,
5632 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5635 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5636 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5637 info['channel_is_verified'] = True
5638 # Playlist stats is a text runs array containing [video count, view count, last updated].
5639 # last updated or (view count and last updated) may be missing.
5640 playlist_stats = get_first(
5641 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5643 last_updated_unix = self._parse_time_text(
5644 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5645 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5646 info['modified_date'] = strftime_or_none(last_updated_unix)
5648 info['view_count'] = self._get_count(playlist_stats, 1)
5649 if info['view_count'] is None: # 0 is allowed
5650 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5651 if info['view_count'] is None:
5652 info['view_count'] = self._get_count(data, (
5653 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5654 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5656 info['playlist_count'] = self._get_count(playlist_stats, 0)
5657 if info['playlist_count'] is None: # 0 is allowed
5658 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5660 if not info.get('channel_id'):
5661 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5662 if not owner: # Deprecated
5663 owner = traverse_obj(
5664 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5665 ('videoOwner', 'videoOwnerRenderer', 'title'))
5666 owner_text = self._get_text(owner)
5667 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5668 info.update({
5669 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5670 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5671 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
5674 info.update({
5675 'uploader': info['channel'],
5676 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5677 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5680 return info
5682 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5683 first_id = last_id = response = None
5684 for page_num in itertools.count(1):
5685 videos = list(self._playlist_entries(playlist))
5686 if not videos:
5687 return
5688 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5689 if start >= len(videos):
5690 return
5691 yield from videos[start:]
5692 first_id = first_id or videos[0]['id']
5693 last_id = videos[-1]['id']
5694 watch_endpoint = try_get(
5695 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5696 headers = self.generate_api_headers(
5697 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5698 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5699 query = {
5700 'playlistId': playlist_id,
5701 'videoId': watch_endpoint.get('videoId') or last_id,
5702 'index': watch_endpoint.get('index') or len(videos),
5703 'params': watch_endpoint.get('params') or 'OAE%3D',
5705 response = self._extract_response(
5706 item_id=f'{playlist_id} page {page_num}',
5707 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5708 check_get_keys='contents',
5710 playlist = try_get(
5711 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5713 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5714 title = playlist.get('title') or try_get(
5715 data, lambda x: x['titleText']['simpleText'], str)
5716 playlist_id = playlist.get('playlistId') or item_id
5718 # Delegating everything except mix playlists to regular tab-based playlist URL
5719 playlist_url = urljoin(url, try_get(
5720 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5721 str))
5723 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5724 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5725 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5727 if playlist_url and playlist_url != url and not is_known_unviewable:
5728 return self.url_result(
5729 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5730 video_title=title)
5732 return self.playlist_result(
5733 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5734 playlist_id=playlist_id, playlist_title=title)
5736 def _extract_availability(self, data):
5738 Gets the availability of a given playlist/tab.
5739 Note: Unless YouTube tells us explicitly, we do not assume it is public
5740 @param data: response
5742 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5743 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5744 player_header_privacy = playlist_header_renderer.get('privacy')
5746 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5748 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5749 privacy_setting_icon = get_first(
5750 (playlist_header_renderer, sidebar_renderer),
5751 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5752 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5753 expected_type=str)
5755 microformats_is_unlisted = traverse_obj(
5756 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5758 return (
5759 'public' if (
5760 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5761 or player_header_privacy == 'PUBLIC'
5762 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5763 else self._availability(
5764 is_private=(
5765 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5766 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5767 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5768 is_unlisted=(
5769 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5770 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5771 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5772 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5773 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5774 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5775 needs_auth=False))
5777 @staticmethod
5778 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5779 sidebar_renderer = try_get(
5780 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5781 for item in sidebar_renderer:
5782 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5783 if renderer:
5784 return renderer
5786 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5788 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5790 is_playlist = bool(traverse_obj(
5791 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5792 if not is_playlist:
5793 return
5794 headers = self.generate_api_headers(
5795 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5796 visitor_data=self._extract_visitor_data(data, ytcfg))
5797 query = {
5798 'params': 'wgYCCAA=',
5799 'browseId': f'VL{item_id}',
5801 return self._extract_response(
5802 item_id=item_id, headers=headers, query=query,
5803 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5804 note='Redownloading playlist API JSON with unavailable videos')
5806 @functools.cached_property
5807 def skip_webpage(self):
5808 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5810 def _extract_webpage(self, url, item_id, fatal=True):
5811 webpage, data = None, None
5812 for retry in self.RetryManager(fatal=fatal):
5813 try:
5814 webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers())
5815 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5816 except ExtractorError as e:
5817 if isinstance(e.cause, network_exceptions):
5818 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5819 retry.error = e
5820 continue
5821 self._error_or_warning(e, fatal=fatal)
5822 break
5824 try:
5825 self._extract_and_report_alerts(data)
5826 except ExtractorError as e:
5827 self._error_or_warning(e, fatal=fatal)
5828 break
5830 # Sometimes youtube returns a webpage with incomplete ytInitialData
5831 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5832 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5833 retry.error = ExtractorError('Incomplete yt initial data received')
5834 data = None
5835 continue
5837 return webpage, data
5839 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5840 """Use if failed to extract ytcfg (and data) from initial webpage"""
5841 if not ytcfg and self.is_authenticated:
5842 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5843 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5844 raise ExtractorError(
5845 f'{msg}. If you are not downloading private content, or '
5846 'your cookies are only for the first account and channel,'
5847 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5848 expected=True)
5849 self.report_warning(msg, only_once=True)
5851 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5852 data = None
5853 if not self.skip_webpage:
5854 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5855 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5856 # Reject webpage data if redirected to home page without explicitly requesting
5857 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5858 if (url != 'https://www.youtube.com/feed/recommended'
5859 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5860 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5861 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5862 if fatal:
5863 raise ExtractorError(msg, expected=True)
5864 self.report_warning(msg, only_once=True)
5865 if not data:
5866 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5867 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5868 return data, ytcfg
5870 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5871 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5872 resolve_response = self._extract_response(
5873 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5874 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5875 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5876 for ep_key, ep in endpoints.items():
5877 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5878 if params:
5879 return self._extract_response(
5880 item_id=item_id, query=params, ep=ep, headers=headers,
5881 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5882 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5883 err_note = 'Failed to resolve url (does the playlist exist?)'
5884 if fatal:
5885 raise ExtractorError(err_note, expected=True)
5886 self.report_warning(err_note, item_id)
5888 _SEARCH_PARAMS = None
5890 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5891 data = {'query': query}
5892 if params is NO_DEFAULT:
5893 params = self._SEARCH_PARAMS
5894 if params:
5895 data['params'] = params
5897 content_keys = (
5898 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5899 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5900 # ytmusic search
5901 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5902 ('continuationContents', ),
5904 display_id = f'query "{query}"'
5905 check_get_keys = tuple({keys[0] for keys in content_keys})
5906 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5907 self._report_playlist_authcheck(ytcfg, fatal=False)
5909 continuation_list = [None]
5910 search = None
5911 for page_num in itertools.count(1):
5912 data.update(continuation_list[0] or {})
5913 headers = self.generate_api_headers(
5914 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5915 search = self._extract_response(
5916 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5917 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5918 slr_contents = traverse_obj(search, *content_keys)
5919 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5920 if not continuation_list[0]:
5921 break
5924 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5925 IE_DESC = 'YouTube Tabs'
5926 _VALID_URL = r'''(?x:
5927 https?://
5928 (?!consent\.)(?:\w+\.)?
5930 youtube(?:kids)?\.com|
5931 {invidious}
5934 (?P<channel_type>channel|c|user|browse)/|
5935 (?P<not_channel>
5936 feed/|hashtag/|
5937 (?:playlist|watch)\?.*?\blist=
5939 (?!(?:{reserved_names})\b) # Direct URLs
5941 (?P<id>[^/?\#&]+)
5942 )'''.format(
5943 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5944 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5946 IE_NAME = 'youtube:tab'
5948 _TESTS = [{
5949 'note': 'playlists, multipage',
5950 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5951 'playlist_mincount': 94,
5952 'info_dict': {
5953 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5954 'title': 'Igor Kleiner Ph.D. - Playlists',
5955 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5956 'uploader': 'Igor Kleiner Ph.D.',
5957 'uploader_id': '@IgorDataScience',
5958 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5959 'channel': 'Igor Kleiner Ph.D.',
5960 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5961 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5962 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5963 'channel_follower_count': int,
5965 }, {
5966 'note': 'playlists, multipage, different order',
5967 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5968 'playlist_mincount': 94,
5969 'info_dict': {
5970 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5971 'title': 'Igor Kleiner Ph.D. - Playlists',
5972 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5973 'uploader': 'Igor Kleiner Ph.D.',
5974 'uploader_id': '@IgorDataScience',
5975 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5976 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5977 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5978 'channel': 'Igor Kleiner Ph.D.',
5979 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5980 'channel_follower_count': int,
5982 }, {
5983 'note': 'playlists, series',
5984 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5985 'playlist_mincount': 5,
5986 'info_dict': {
5987 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5988 'title': '3Blue1Brown - Playlists',
5989 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5990 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5991 'channel': '3Blue1Brown',
5992 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5993 'uploader_id': '@3blue1brown',
5994 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5995 'uploader': '3Blue1Brown',
5996 'tags': ['Mathematics'],
5997 'channel_follower_count': int,
5998 'channel_is_verified': True,
6000 }, {
6001 'note': 'playlists, singlepage',
6002 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
6003 'playlist_mincount': 4,
6004 'info_dict': {
6005 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
6006 'title': 'ThirstForScience - Playlists',
6007 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
6008 'uploader': 'ThirstForScience',
6009 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
6010 'uploader_id': '@ThirstForScience',
6011 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
6012 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
6013 'tags': 'count:12',
6014 'channel': 'ThirstForScience',
6015 'channel_follower_count': int,
6017 }, {
6018 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
6019 'only_matching': True,
6020 }, {
6021 'note': 'basic, single video playlist',
6022 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
6023 'info_dict': {
6024 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
6025 'title': 'youtube-dl public playlist',
6026 'description': '',
6027 'tags': [],
6028 'view_count': int,
6029 'modified_date': '20201130',
6030 'channel': 'Sergey M.',
6031 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
6032 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6033 'availability': 'public',
6034 'uploader': 'Sergey M.',
6035 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
6036 'uploader_id': '@sergeym.6173',
6038 'playlist_count': 1,
6039 }, {
6040 'note': 'empty playlist',
6041 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
6042 'info_dict': {
6043 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
6044 'title': 'youtube-dl empty playlist',
6045 'tags': [],
6046 'channel': 'Sergey M.',
6047 'description': '',
6048 'modified_date': '20230921',
6049 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
6050 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6051 'availability': 'unlisted',
6052 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
6053 'uploader_id': '@sergeym.6173',
6054 'uploader': 'Sergey M.',
6056 'playlist_count': 0,
6057 }, {
6058 'note': 'Home tab',
6059 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
6060 'info_dict': {
6061 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6062 'title': 'lex will - Home',
6063 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6064 'uploader': 'lex will',
6065 'uploader_id': '@lexwill718',
6066 'channel': 'lex will',
6067 'tags': ['bible', 'history', 'prophesy'],
6068 'uploader_url': 'https://www.youtube.com/@lexwill718',
6069 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6070 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6071 'channel_follower_count': int,
6073 'playlist_mincount': 2,
6074 }, {
6075 'note': 'Videos tab',
6076 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
6077 'info_dict': {
6078 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6079 'title': 'lex will - Videos',
6080 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6081 'uploader': 'lex will',
6082 'uploader_id': '@lexwill718',
6083 'tags': ['bible', 'history', 'prophesy'],
6084 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6085 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6086 'uploader_url': 'https://www.youtube.com/@lexwill718',
6087 'channel': 'lex will',
6088 'channel_follower_count': int,
6090 'playlist_mincount': 975,
6091 }, {
6092 'note': 'Videos tab, sorted by popular',
6093 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
6094 'info_dict': {
6095 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6096 'title': 'lex will - Videos',
6097 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6098 'uploader': 'lex will',
6099 'uploader_id': '@lexwill718',
6100 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6101 'uploader_url': 'https://www.youtube.com/@lexwill718',
6102 'channel': 'lex will',
6103 'tags': ['bible', 'history', 'prophesy'],
6104 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6105 'channel_follower_count': int,
6107 'playlist_mincount': 199,
6108 }, {
6109 'note': 'Playlists tab',
6110 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
6111 'info_dict': {
6112 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6113 'title': 'lex will - Playlists',
6114 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6115 'uploader': 'lex will',
6116 'uploader_id': '@lexwill718',
6117 'uploader_url': 'https://www.youtube.com/@lexwill718',
6118 'channel': 'lex will',
6119 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6120 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6121 'tags': ['bible', 'history', 'prophesy'],
6122 'channel_follower_count': int,
6124 'playlist_mincount': 17,
6125 }, {
6126 'note': 'Community tab',
6127 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
6128 'info_dict': {
6129 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6130 'title': 'lex will - Community',
6131 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6132 'channel': 'lex will',
6133 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6134 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6135 'tags': ['bible', 'history', 'prophesy'],
6136 'channel_follower_count': int,
6137 'uploader_url': 'https://www.youtube.com/@lexwill718',
6138 'uploader_id': '@lexwill718',
6139 'uploader': 'lex will',
6141 'playlist_mincount': 18,
6142 }, {
6143 'note': 'Channels tab',
6144 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
6145 'info_dict': {
6146 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6147 'title': 'lex will - Channels',
6148 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
6149 'channel': 'lex will',
6150 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
6151 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
6152 'tags': ['bible', 'history', 'prophesy'],
6153 'channel_follower_count': int,
6154 'uploader_url': 'https://www.youtube.com/@lexwill718',
6155 'uploader_id': '@lexwill718',
6156 'uploader': 'lex will',
6158 'playlist_mincount': 12,
6159 }, {
6160 'note': 'Search tab',
6161 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
6162 'playlist_mincount': 40,
6163 'info_dict': {
6164 'id': 'UCYO_jab_esuFRV4b17AJtAw',
6165 'title': '3Blue1Brown - Search - linear algebra',
6166 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6167 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6168 'tags': ['Mathematics'],
6169 'channel': '3Blue1Brown',
6170 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6171 'channel_follower_count': int,
6172 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6173 'uploader_id': '@3blue1brown',
6174 'uploader': '3Blue1Brown',
6175 'channel_is_verified': True,
6177 }, {
6178 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6179 'only_matching': True,
6180 }, {
6181 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6182 'only_matching': True,
6183 }, {
6184 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
6185 'only_matching': True,
6186 }, {
6187 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
6188 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
6189 'info_dict': {
6190 'title': '29C3: Not my department',
6191 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
6192 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
6193 'tags': [],
6194 'view_count': int,
6195 'modified_date': '20150605',
6196 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
6197 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
6198 'channel': 'Christiaan008',
6199 'availability': 'public',
6200 'uploader_id': '@ChRiStIaAn008',
6201 'uploader': 'Christiaan008',
6202 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
6204 'playlist_count': 96,
6205 }, {
6206 'note': 'Large playlist',
6207 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
6208 'info_dict': {
6209 'title': 'Uploads from Cauchemar',
6210 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
6211 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
6212 'tags': [],
6213 'modified_date': r're:\d{8}',
6214 'channel': 'Cauchemar',
6215 'view_count': int,
6216 'description': '',
6217 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
6218 'availability': 'public',
6219 'uploader_id': '@Cauchemar89',
6220 'uploader': 'Cauchemar',
6221 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
6223 'playlist_mincount': 1123,
6224 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6225 }, {
6226 'note': 'even larger playlist, 8832 videos',
6227 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
6228 'only_matching': True,
6229 }, {
6230 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
6231 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
6232 'info_dict': {
6233 'title': 'Uploads from Interstellar Movie',
6234 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
6235 'tags': [],
6236 'view_count': int,
6237 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
6238 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
6239 'channel': 'Interstellar Movie',
6240 'description': '',
6241 'modified_date': r're:\d{8}',
6242 'availability': 'public',
6243 'uploader_id': '@InterstellarMovie',
6244 'uploader': 'Interstellar Movie',
6245 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
6247 'playlist_mincount': 21,
6248 }, {
6249 'note': 'Playlist with "show unavailable videos" button',
6250 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
6251 'info_dict': {
6252 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
6253 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
6254 'view_count': int,
6255 'channel': 'Phim Siêu Nhân Nhật Bản',
6256 'tags': [],
6257 'description': '',
6258 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
6259 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
6260 'modified_date': r're:\d{8}',
6261 'availability': 'public',
6262 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
6263 'uploader_id': '@phimsieunhannhatban',
6264 'uploader': 'Phim Siêu Nhân Nhật Bản',
6266 'playlist_mincount': 200,
6267 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6268 }, {
6269 'note': 'Playlist with unavailable videos in page 7',
6270 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
6271 'info_dict': {
6272 'title': 'Uploads from BlankTV',
6273 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
6274 'channel': 'BlankTV',
6275 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
6276 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
6277 'view_count': int,
6278 'tags': [],
6279 'modified_date': r're:\d{8}',
6280 'description': '',
6281 'availability': 'public',
6282 'uploader_id': '@blanktv',
6283 'uploader': 'BlankTV',
6284 'uploader_url': 'https://www.youtube.com/@blanktv',
6286 'playlist_mincount': 1000,
6287 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6288 }, {
6289 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
6290 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
6291 'info_dict': {
6292 'title': 'Data Analysis with Dr Mike Pound',
6293 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
6294 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
6295 'tags': [],
6296 'view_count': int,
6297 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
6298 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
6299 'channel': 'Computerphile',
6300 'availability': 'public',
6301 'modified_date': '20190712',
6302 'uploader_id': '@Computerphile',
6303 'uploader': 'Computerphile',
6304 'uploader_url': 'https://www.youtube.com/@Computerphile',
6306 'playlist_mincount': 11,
6307 }, {
6308 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
6309 'only_matching': True,
6310 }, {
6311 'note': 'Playlist URL that does not actually serve a playlist',
6312 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
6313 'info_dict': {
6314 'id': 'FqZTN594JQw',
6315 'ext': 'webm',
6316 'title': "Smiley's People 01 detective, Adventure Series, Action",
6317 'upload_date': '20150526',
6318 'license': 'Standard YouTube License',
6319 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
6320 'categories': ['People & Blogs'],
6321 'tags': list,
6322 'view_count': int,
6323 'like_count': int,
6325 'params': {
6326 'skip_download': True,
6328 'skip': 'This video is not available.',
6329 'add_ie': [YoutubeIE.ie_key()],
6330 }, {
6331 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
6332 'only_matching': True,
6333 }, {
6334 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
6335 'only_matching': True,
6336 }, {
6337 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
6338 'info_dict': {
6339 'id': 'hGkQjiJLjWQ', # This will keep changing
6340 'ext': 'mp4',
6341 'title': str,
6342 'upload_date': r're:\d{8}',
6343 'description': str,
6344 'categories': ['News & Politics'],
6345 'tags': list,
6346 'like_count': int,
6347 'release_timestamp': int,
6348 'channel': 'Sky News',
6349 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
6350 'age_limit': 0,
6351 'view_count': int,
6352 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
6353 'playable_in_embed': True,
6354 'release_date': r're:\d+',
6355 'availability': 'public',
6356 'live_status': 'is_live',
6357 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6358 'channel_follower_count': int,
6359 'concurrent_view_count': int,
6360 'uploader_url': 'https://www.youtube.com/@SkyNews',
6361 'uploader_id': '@SkyNews',
6362 'uploader': 'Sky News',
6363 'channel_is_verified': True,
6365 'params': {
6366 'skip_download': True,
6368 'expected_warnings': ['Ignoring subtitle tracks found in '],
6369 }, {
6370 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
6371 'info_dict': {
6372 'id': 'a48o2S1cPoo',
6373 'ext': 'mp4',
6374 'title': 'The Young Turks - Live Main Show',
6375 'upload_date': '20150715',
6376 'license': 'Standard YouTube License',
6377 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
6378 'categories': ['News & Politics'],
6379 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
6380 'like_count': int,
6382 'params': {
6383 'skip_download': True,
6385 'only_matching': True,
6386 }, {
6387 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
6388 'only_matching': True,
6389 }, {
6390 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
6391 'only_matching': True,
6392 }, {
6393 'note': 'A channel that is not live. Should raise error',
6394 'url': 'https://www.youtube.com/user/numberphile/live',
6395 'only_matching': True,
6396 }, {
6397 'url': 'https://www.youtube.com/feed/trending',
6398 'only_matching': True,
6399 }, {
6400 'url': 'https://www.youtube.com/feed/library',
6401 'only_matching': True,
6402 }, {
6403 'url': 'https://www.youtube.com/feed/history',
6404 'only_matching': True,
6405 }, {
6406 'url': 'https://www.youtube.com/feed/subscriptions',
6407 'only_matching': True,
6408 }, {
6409 'url': 'https://www.youtube.com/feed/watch_later',
6410 'only_matching': True,
6411 }, {
6412 'note': 'Recommended - redirects to home page.',
6413 'url': 'https://www.youtube.com/feed/recommended',
6414 'only_matching': True,
6415 }, {
6416 'note': 'inline playlist with not always working continuations',
6417 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
6418 'only_matching': True,
6419 }, {
6420 'url': 'https://www.youtube.com/course',
6421 'only_matching': True,
6422 }, {
6423 'url': 'https://www.youtube.com/zsecurity',
6424 'only_matching': True,
6425 }, {
6426 'url': 'http://www.youtube.com/NASAgovVideo/videos',
6427 'only_matching': True,
6428 }, {
6429 'url': 'https://www.youtube.com/TheYoungTurks/live',
6430 'only_matching': True,
6431 }, {
6432 'url': 'https://www.youtube.com/hashtag/cctv9',
6433 'info_dict': {
6434 'id': 'cctv9',
6435 'title': 'cctv9 - All',
6436 'tags': [],
6438 'playlist_mincount': 300, # not consistent but should be over 300
6439 }, {
6440 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6441 'only_matching': True,
6442 }, {
6443 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6444 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6445 'only_matching': True,
6446 }, {
6447 'note': '/browse/ should redirect to /channel/',
6448 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6449 'only_matching': True,
6450 }, {
6451 'note': 'VLPL, should redirect to playlist?list=PL...',
6452 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6453 'info_dict': {
6454 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6455 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6456 'title': 'NCS : All Releases 💿',
6457 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6458 'modified_date': r're:\d{8}',
6459 'view_count': int,
6460 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6461 'tags': [],
6462 'channel': 'NoCopyrightSounds',
6463 'availability': 'public',
6464 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6465 'uploader': 'NoCopyrightSounds',
6466 'uploader_id': '@NoCopyrightSounds',
6468 'playlist_mincount': 166,
6469 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6470 }, {
6471 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6472 'note': 'Topic, should redirect to playlist?list=UU...',
6473 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6474 'info_dict': {
6475 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6476 'title': 'Uploads from Royalty Free Music - Topic',
6477 'tags': [],
6478 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6479 'channel': 'Royalty Free Music - Topic',
6480 'view_count': int,
6481 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6482 'modified_date': r're:\d{8}',
6483 'description': '',
6484 'availability': 'public',
6485 'uploader': 'Royalty Free Music - Topic',
6487 'playlist_mincount': 101,
6488 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6489 }, {
6490 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6491 # Treat as a general feed
6492 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6493 'info_dict': {
6494 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6495 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6496 'tags': [],
6498 'playlist_mincount': 9,
6499 }, {
6500 'note': 'Youtube music Album',
6501 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6502 'info_dict': {
6503 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6504 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6505 'tags': [],
6506 'view_count': int,
6507 'description': '',
6508 'availability': 'unlisted',
6509 'modified_date': r're:\d{8}',
6511 'playlist_count': 50,
6512 'expected_warnings': ['YouTube Music is not directly supported'],
6513 }, {
6514 'note': 'unlisted single video playlist',
6515 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6516 'info_dict': {
6517 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6518 'title': 'yt-dlp unlisted playlist test',
6519 'availability': 'unlisted',
6520 'tags': [],
6521 'modified_date': '20220418',
6522 'channel': 'colethedj',
6523 'view_count': int,
6524 'description': '',
6525 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6526 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6527 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6528 'uploader_id': '@colethedj1894',
6529 'uploader': 'colethedj',
6531 'playlist': [{
6532 'info_dict': {
6533 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6534 'id': 'BaW_jenozKc',
6535 '_type': 'url',
6536 'ie_key': 'Youtube',
6537 'duration': 10,
6538 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6539 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6540 'view_count': int,
6541 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6542 'channel': 'Philipp Hagemeister',
6543 'uploader_id': '@PhilippHagemeister',
6544 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6545 'uploader': 'Philipp Hagemeister',
6548 'playlist_count': 1,
6549 'params': {'extract_flat': True},
6550 }, {
6551 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6552 'url': 'https://www.youtube.com/feed/recommended',
6553 'info_dict': {
6554 'id': 'recommended',
6555 'title': 'recommended',
6556 'tags': [],
6558 'playlist_mincount': 50,
6559 'params': {
6560 'skip_download': True,
6561 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6563 }, {
6564 'note': 'API Fallback: /videos tab, sorted by oldest first',
6565 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6566 'info_dict': {
6567 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6568 'title': 'Cody\'sLab - Videos',
6569 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6570 'channel': 'Cody\'sLab',
6571 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6572 'tags': [],
6573 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6574 'channel_follower_count': int,
6576 'playlist_mincount': 650,
6577 'params': {
6578 'skip_download': True,
6579 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6581 'skip': 'Query for sorting no longer works',
6582 }, {
6583 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6584 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6585 'info_dict': {
6586 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6587 'title': 'Uploads from Royalty Free Music - Topic',
6588 'modified_date': r're:\d{8}',
6589 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6590 'description': '',
6591 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6592 'tags': [],
6593 'channel': 'Royalty Free Music - Topic',
6594 'view_count': int,
6595 'availability': 'public',
6596 'uploader': 'Royalty Free Music - Topic',
6598 'playlist_mincount': 101,
6599 'params': {
6600 'skip_download': True,
6601 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6603 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6604 }, {
6605 'note': 'non-standard redirect to regional channel',
6606 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6607 'only_matching': True,
6608 }, {
6609 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6610 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6611 'info_dict': {
6612 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6613 'modified_date': '20220407',
6614 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6615 'tags': [],
6616 'availability': 'unlisted',
6617 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6618 'channel': 'pukkandan',
6619 'description': 'Test for collaborative playlist',
6620 'title': 'yt-dlp test - collaborative playlist',
6621 'view_count': int,
6622 'uploader_url': 'https://www.youtube.com/@pukkandan',
6623 'uploader_id': '@pukkandan',
6624 'uploader': 'pukkandan',
6626 'playlist_mincount': 2,
6627 }, {
6628 'note': 'translated tab name',
6629 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6630 'info_dict': {
6631 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6632 'tags': [],
6633 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6634 'description': 'test description',
6635 'title': 'cole-dlp-test-acc - 再生リスト',
6636 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6637 'channel': 'cole-dlp-test-acc',
6638 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6639 'uploader_id': '@coletdjnz',
6640 'uploader': 'cole-dlp-test-acc',
6642 'playlist_mincount': 1,
6643 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6644 'expected_warnings': ['Preferring "ja"'],
6645 }, {
6646 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6647 'note': 'preferred lang set with playlist with translated video titles',
6648 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6649 'info_dict': {
6650 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6651 'tags': [],
6652 'view_count': int,
6653 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6654 'channel': 'cole-dlp-test-acc',
6655 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6656 'description': 'test',
6657 'title': 'dlp test playlist',
6658 'availability': 'public',
6659 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6660 'uploader_id': '@coletdjnz',
6661 'uploader': 'cole-dlp-test-acc',
6663 'playlist_mincount': 1,
6664 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6665 'expected_warnings': ['Preferring "ja"'],
6666 }, {
6667 # shorts audio pivot for 2GtVksBMYFM.
6668 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6669 'info_dict': {
6670 'id': 'sfv_audio_pivot',
6671 'title': 'sfv_audio_pivot',
6672 'tags': [],
6674 'playlist_mincount': 50,
6676 }, {
6677 # Channel with a real live tab (not to be mistaken with streams tab)
6678 # Do not treat like it should redirect to live stream
6679 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6680 'info_dict': {
6681 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6682 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6683 'tags': [],
6685 'playlist_mincount': 20,
6686 }, {
6687 # Tab name is not the same as tab id
6688 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6689 'info_dict': {
6690 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6691 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6692 'tags': [],
6694 'playlist_mincount': 8,
6695 }, {
6696 # Home tab id is literally home. Not to get mistaken with featured
6697 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6698 'info_dict': {
6699 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6700 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6701 'tags': [],
6703 'playlist_mincount': 8,
6704 }, {
6705 # Should get three playlists for videos, shorts and streams tabs
6706 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6707 'info_dict': {
6708 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6709 'title': 'Polka Ch. 尾丸ポルカ',
6710 'channel_follower_count': int,
6711 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6712 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6713 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6714 'channel': 'Polka Ch. 尾丸ポルカ',
6715 'tags': 'count:35',
6716 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6717 'uploader': 'Polka Ch. 尾丸ポルカ',
6718 'uploader_id': '@OmaruPolka',
6719 'channel_is_verified': True,
6721 'playlist_count': 3,
6722 }, {
6723 # Shorts tab with channel with handle
6724 # TODO: fix channel description
6725 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6726 'info_dict': {
6727 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6728 'title': 'Not Just Bikes - Shorts',
6729 'tags': 'count:10',
6730 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6731 'description': 'md5:5e82545b3a041345927a92d0585df247',
6732 'channel_follower_count': int,
6733 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6734 'channel': 'Not Just Bikes',
6735 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6736 'uploader': 'Not Just Bikes',
6737 'uploader_id': '@NotJustBikes',
6738 'channel_is_verified': True,
6740 'playlist_mincount': 10,
6741 }, {
6742 # Streams tab
6743 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6744 'info_dict': {
6745 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6746 'title': '中村悠一 - Live',
6747 'tags': 'count:7',
6748 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6749 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6750 'channel': '中村悠一',
6751 'channel_follower_count': int,
6752 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6753 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6754 'uploader_id': '@Yuichi-Nakamura',
6755 'uploader': '中村悠一',
6757 'playlist_mincount': 60,
6758 }, {
6759 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6760 # See test_youtube_lists
6761 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6762 'only_matching': True,
6763 }, {
6764 # No uploads and no UCID given. Should fail with no uploads error
6765 # See test_youtube_lists
6766 'url': 'https://www.youtube.com/news',
6767 'only_matching': True,
6768 }, {
6769 # No videos tab but has a shorts tab
6770 'url': 'https://www.youtube.com/c/TKFShorts',
6771 'info_dict': {
6772 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6773 'title': 'Shorts Break - Shorts',
6774 'tags': 'count:48',
6775 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6776 'channel': 'Shorts Break',
6777 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6778 'channel_follower_count': int,
6779 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6780 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6781 'uploader': 'Shorts Break',
6782 'uploader_id': '@ShortsBreak_Official',
6784 'playlist_mincount': 30,
6785 }, {
6786 # Trending Now Tab. tab id is empty
6787 'url': 'https://www.youtube.com/feed/trending',
6788 'info_dict': {
6789 'id': 'trending',
6790 'title': 'trending - Now',
6791 'tags': [],
6793 'playlist_mincount': 30,
6794 }, {
6795 # Trending Gaming Tab. tab id is empty
6796 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6797 'info_dict': {
6798 'id': 'trending',
6799 'title': 'trending - Gaming',
6800 'tags': [],
6802 'playlist_mincount': 30,
6803 }, {
6804 # Shorts url result in shorts tab
6805 # TODO: Fix channel id extraction
6806 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6807 'info_dict': {
6808 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6809 'title': 'cole-dlp-test-acc - Shorts',
6810 'channel': 'cole-dlp-test-acc',
6811 'description': 'test description',
6812 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6813 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6814 'tags': [],
6815 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6816 'uploader_id': '@coletdjnz',
6817 'uploader': 'cole-dlp-test-acc',
6819 'playlist': [{
6820 'info_dict': {
6821 # Channel data is not currently available for short renderers (as of 2023-03-01)
6822 '_type': 'url',
6823 'ie_key': 'Youtube',
6824 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6825 'id': 'sSM9J5YH_60',
6826 'title': 'SHORT short',
6827 'view_count': int,
6828 'thumbnails': list,
6831 'params': {'extract_flat': True},
6832 }, {
6833 # Live video status should be extracted
6834 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6835 'info_dict': {
6836 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6837 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6838 'tags': [],
6840 'playlist': [{
6841 'info_dict': {
6842 '_type': 'url',
6843 'ie_key': 'Youtube',
6844 'url': 'startswith:https://www.youtube.com/watch?v=',
6845 'id': str,
6846 'title': str,
6847 'live_status': 'is_live',
6848 'channel_id': str,
6849 'channel_url': str,
6850 'concurrent_view_count': int,
6851 'channel': str,
6852 'uploader': str,
6853 'uploader_url': str,
6854 'uploader_id': str,
6855 'channel_is_verified': bool, # this will keep changing
6858 'params': {'extract_flat': True, 'playlist_items': '1'},
6859 'playlist_mincount': 1,
6860 }, {
6861 # Channel renderer metadata. Contains number of videos on the channel
6862 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6863 'info_dict': {
6864 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6865 'title': 'cole-dlp-test-acc - Channels',
6866 'channel': 'cole-dlp-test-acc',
6867 'description': 'test description',
6868 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6869 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6870 'tags': [],
6871 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6872 'uploader_id': '@coletdjnz',
6873 'uploader': 'cole-dlp-test-acc',
6875 'playlist': [{
6876 'info_dict': {
6877 '_type': 'url',
6878 'ie_key': 'YoutubeTab',
6879 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6880 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6881 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6882 'title': 'PewDiePie',
6883 'channel': 'PewDiePie',
6884 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6885 'thumbnails': list,
6886 'channel_follower_count': int,
6887 'playlist_count': int,
6888 'uploader': 'PewDiePie',
6889 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6890 'uploader_id': '@PewDiePie',
6891 'channel_is_verified': True,
6894 'params': {'extract_flat': True},
6895 }, {
6896 'url': 'https://www.youtube.com/@3blue1brown/about',
6897 'info_dict': {
6898 'id': '@3blue1brown',
6899 'tags': ['Mathematics'],
6900 'title': '3Blue1Brown',
6901 'channel_follower_count': int,
6902 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6903 'channel': '3Blue1Brown',
6904 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6905 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6906 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6907 'uploader_id': '@3blue1brown',
6908 'uploader': '3Blue1Brown',
6909 'channel_is_verified': True,
6911 'playlist_count': 0,
6912 }, {
6913 # Podcasts tab, with rich entry playlistRenderers
6914 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6915 'info_dict': {
6916 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6917 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6918 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6919 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6920 'title': '99 Percent Invisible - Podcasts',
6921 'uploader': '99 Percent Invisible',
6922 'channel_follower_count': int,
6923 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6924 'tags': [],
6925 'channel': '99 Percent Invisible',
6926 'uploader_id': '@99percentinvisiblepodcast',
6928 'playlist_count': 0,
6929 }, {
6930 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6931 'url': 'https://www.youtube.com/@AHimitsu/releases',
6932 'info_dict': {
6933 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6934 'channel': 'A Himitsu',
6935 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6936 'title': 'A Himitsu - Releases',
6937 'uploader_id': '@AHimitsu',
6938 'uploader': 'A Himitsu',
6939 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6940 'tags': 'count:12',
6941 'description': 'I make music',
6942 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6943 'channel_follower_count': int,
6944 'channel_is_verified': True,
6946 'playlist_mincount': 10,
6947 }, {
6948 # Playlist with only shorts, shown as reel renderers
6949 # FIXME: future: YouTube currently doesn't give continuation for this,
6950 # may do in future.
6951 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6952 'info_dict': {
6953 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6954 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6955 'view_count': int,
6956 'uploader_id': '@BangyShorts',
6957 'description': '',
6958 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6959 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6960 'channel': 'Bangy Shorts',
6961 'uploader': 'Bangy Shorts',
6962 'tags': [],
6963 'availability': 'public',
6964 'modified_date': r're:\d{8}',
6965 'title': 'Uploads from Bangy Shorts',
6967 'playlist_mincount': 100,
6968 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6969 }, {
6970 'note': 'Tags containing spaces',
6971 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6972 'playlist_count': 3,
6973 'info_dict': {
6974 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6975 'channel': 'Markiplier',
6976 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6977 'title': 'Markiplier',
6978 'channel_follower_count': int,
6979 'description': 'md5:0c010910558658824402809750dc5d97',
6980 'uploader_id': '@markiplier',
6981 'uploader_url': 'https://www.youtube.com/@markiplier',
6982 'uploader': 'Markiplier',
6983 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6984 'channel_is_verified': True,
6985 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6986 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6987 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6988 'mark fischbach'],
6992 @classmethod
6993 def suitable(cls, url):
6994 return False if YoutubeIE.suitable(url) else super().suitable(url)
6996 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6998 def _get_url_mobj(self, url):
6999 mobj = self._URL_RE.match(url).groupdict()
7000 mobj.update((k, '') for k, v in mobj.items() if v is None)
7001 return mobj
7003 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
7004 tab_name = (tab.get('title') or '').lower()
7005 tab_url = urljoin(base_url, traverse_obj(
7006 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
7008 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
7009 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
7010 if tab_id:
7011 return {
7012 'TAB_ID_SPONSORSHIPS': 'membership',
7013 }.get(tab_id, tab_id), tab_name
7015 # Fallback to tab name if we cannot get the tab id.
7016 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
7017 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
7018 if tab_name:
7019 self.write_debug(f'Falling back to selected tab name: {tab_name}')
7020 return {
7021 'home': 'featured',
7022 'live': 'streams',
7023 }.get(tab_name, tab_name), tab_name
7025 def _has_tab(self, tabs, tab_id):
7026 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
7028 def _empty_playlist(self, item_id, data):
7029 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
7031 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
7032 def _real_extract(self, url, smuggled_data):
7033 item_id = self._match_id(url)
7034 url = urllib.parse.urlunparse(
7035 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
7036 compat_opts = self.get_param('compat_opts', [])
7038 mobj = self._get_url_mobj(url)
7039 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
7040 if is_channel and smuggled_data.get('is_music_url'):
7041 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
7042 return self.url_result(
7043 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
7044 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
7045 mdata = self._extract_tab_endpoint(
7046 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
7047 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
7048 get_all=False, expected_type=str)
7049 if not murl:
7050 raise ExtractorError('Failed to resolve album to playlist')
7051 return self.url_result(murl, YoutubeTabIE)
7052 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
7053 return self.url_result(
7054 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
7056 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
7057 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
7058 url = f'{pre}/videos{post}'
7059 if smuggled_data.get('is_music_url'):
7060 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
7062 # Handle both video/playlist URLs
7063 qs = parse_qs(url)
7064 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
7065 if not video_id and mobj['not_channel'].startswith('watch'):
7066 if not playlist_id:
7067 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
7068 raise ExtractorError('A video URL was given without video ID', expected=True)
7069 # Common mistake: https://www.youtube.com/watch?list=playlist_id
7070 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
7071 return self.url_result(
7072 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
7074 if not self._yes_playlist(playlist_id, video_id):
7075 return self.url_result(
7076 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
7078 data, ytcfg = self._extract_data(url, display_id)
7080 # YouTube may provide a non-standard redirect to the regional channel
7081 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
7082 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
7083 redirect_url = traverse_obj(
7084 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
7085 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
7086 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
7087 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
7088 return self.url_result(redirect_url, YoutubeTabIE)
7090 tabs, extra_tabs = self._extract_tab_renderers(data), []
7091 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
7092 selected_tab = self._extract_selected_tab(tabs)
7093 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
7094 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
7096 # /about is no longer a tab
7097 if original_tab_id == 'about':
7098 return self._empty_playlist(item_id, data)
7100 if not original_tab_id and selected_tab_name:
7101 self.to_screen('Downloading all uploads of the channel. '
7102 'To download only the videos in a specific tab, pass the tab\'s URL')
7103 if self._has_tab(tabs, 'streams'):
7104 extra_tabs.append(''.join((pre, '/streams', post)))
7105 if self._has_tab(tabs, 'shorts'):
7106 extra_tabs.append(''.join((pre, '/shorts', post)))
7107 # XXX: Members-only tab should also be extracted
7109 if not extra_tabs and selected_tab_id != 'videos':
7110 # Channel does not have streams, shorts or videos tabs
7111 if item_id[:2] != 'UC':
7112 return self._empty_playlist(item_id, data)
7114 # Topic channels don't have /videos. Use the equivalent playlist instead
7115 pl_id = f'UU{item_id[2:]}'
7116 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
7117 try:
7118 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
7119 except ExtractorError:
7120 return self._empty_playlist(item_id, data)
7121 else:
7122 item_id, url = pl_id, pl_url
7123 self.to_screen(
7124 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
7126 elif extra_tabs and selected_tab_id != 'videos':
7127 # When there are shorts/live tabs but not videos tab
7128 url, data = f'{pre}{post}', None
7130 elif (original_tab_id or 'videos') != selected_tab_id:
7131 if original_tab_id == 'live':
7132 # Live tab should have redirected to the video
7133 # Except in the case the channel has an actual live tab
7134 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
7135 raise UserNotLive(video_id=item_id)
7136 elif selected_tab_name:
7137 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
7139 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
7140 url = f'{pre}{post}'
7142 # YouTube sometimes provides a button to reload playlist with unavailable videos.
7143 if 'no-youtube-unavailable-videos' not in compat_opts:
7144 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
7145 self._extract_and_report_alerts(data, only_once=True)
7147 tabs, entries = self._extract_tab_renderers(data), []
7148 if tabs:
7149 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
7150 entries[0].update({
7151 'extractor_key': YoutubeTabIE.ie_key(),
7152 'extractor': YoutubeTabIE.IE_NAME,
7153 'webpage_url': url,
7155 if self.get_param('playlist_items') == '0':
7156 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
7157 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
7158 entries.extend(map(self._real_extract, extra_tabs))
7160 if len(entries) == 1:
7161 return entries[0]
7162 elif entries:
7163 metadata = self._extract_metadata_from_tabs(item_id, data)
7164 uploads_url = 'the Uploads (UU) playlist URL'
7165 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
7166 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
7167 self.to_screen(
7168 'Downloading as multiple playlists, separated by tabs. '
7169 f'To download as a single playlist instead, pass {uploads_url}')
7170 return self.playlist_result(entries, item_id, **metadata)
7172 # Inline playlist
7173 playlist = traverse_obj(
7174 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
7175 if playlist:
7176 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
7178 video_id = traverse_obj(
7179 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
7180 if video_id:
7181 if tab != '/live': # live tab is expected to redirect to video
7182 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
7183 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
7185 raise ExtractorError('Unable to recognize tab page')
7188 class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
7189 IE_DESC = 'YouTube playlists'
7190 _VALID_URL = r'''(?x)(?:
7191 (?:https?://)?
7192 (?:\w+\.)?
7195 youtube(?:kids)?\.com|
7196 {invidious}
7198 /.*?\?.*?\blist=
7200 (?P<id>{playlist_id})
7201 )'''.format(
7202 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
7203 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
7205 IE_NAME = 'youtube:playlist'
7206 _TESTS = [{
7207 'note': 'issue #673',
7208 'url': 'PLBB231211A4F62143',
7209 'info_dict': {
7210 'title': '[OLD]Team Fortress 2 (Class-based LP)',
7211 'id': 'PLBB231211A4F62143',
7212 'uploader': 'Wickman',
7213 'uploader_id': '@WickmanVT',
7214 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
7215 'view_count': int,
7216 'uploader_url': 'https://www.youtube.com/@WickmanVT',
7217 'modified_date': r're:\d{8}',
7218 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
7219 'channel': 'Wickman',
7220 'tags': [],
7221 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
7222 'availability': 'public',
7224 'playlist_mincount': 29,
7225 }, {
7226 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
7227 'info_dict': {
7228 'title': 'YDL_safe_search',
7229 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
7231 'playlist_count': 2,
7232 'skip': 'This playlist is private',
7233 }, {
7234 'note': 'embedded',
7235 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
7236 'playlist_count': 4,
7237 'info_dict': {
7238 'title': 'JODA15',
7239 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
7240 'uploader': 'milan',
7241 'uploader_id': '@milan5503',
7242 'description': '',
7243 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
7244 'tags': [],
7245 'modified_date': '20140919',
7246 'view_count': int,
7247 'channel': 'milan',
7248 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
7249 'uploader_url': 'https://www.youtube.com/@milan5503',
7250 'availability': 'public',
7252 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
7253 }, {
7254 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
7255 'playlist_mincount': 455,
7256 'info_dict': {
7257 'title': '2018 Chinese New Singles (11/6 updated)',
7258 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
7259 'uploader': 'LBK',
7260 'uploader_id': '@music_king',
7261 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
7262 'channel': 'LBK',
7263 'view_count': int,
7264 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
7265 'tags': [],
7266 'uploader_url': 'https://www.youtube.com/@music_king',
7267 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
7268 'modified_date': r're:\d{8}',
7269 'availability': 'public',
7271 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
7272 }, {
7273 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
7274 'only_matching': True,
7275 }, {
7276 # music album playlist
7277 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
7278 'only_matching': True,
7281 @classmethod
7282 def suitable(cls, url):
7283 if YoutubeTabIE.suitable(url):
7284 return False
7285 from ..utils import parse_qs
7286 qs = parse_qs(url)
7287 if qs.get('v', [None])[0]:
7288 return False
7289 return super().suitable(url)
7291 def _real_extract(self, url):
7292 playlist_id = self._match_id(url)
7293 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
7294 url = update_url_query(
7295 'https://www.youtube.com/playlist',
7296 parse_qs(url) or {'list': playlist_id})
7297 if is_music_url:
7298 url = smuggle_url(url, {'is_music_url': True})
7299 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
7302 class YoutubeYtBeIE(YoutubeBaseInfoExtractor):
7303 IE_DESC = 'youtu.be'
7304 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
7305 _TESTS = [{
7306 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
7307 'info_dict': {
7308 'id': 'yeWKywCrFtk',
7309 'ext': 'mp4',
7310 'title': 'Small Scale Baler and Braiding Rugs',
7311 'uploader': 'Backus-Page House Museum',
7312 'uploader_id': '@backuspagemuseum',
7313 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
7314 'upload_date': '20161008',
7315 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
7316 'categories': ['Nonprofits & Activism'],
7317 'tags': list,
7318 'like_count': int,
7319 'age_limit': 0,
7320 'playable_in_embed': True,
7321 'thumbnail': r're:^https?://.*\.webp',
7322 'channel': 'Backus-Page House Museum',
7323 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
7324 'live_status': 'not_live',
7325 'view_count': int,
7326 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
7327 'availability': 'public',
7328 'duration': 59,
7329 'comment_count': int,
7330 'channel_follower_count': int,
7332 'params': {
7333 'noplaylist': True,
7334 'skip_download': True,
7336 }, {
7337 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
7338 'only_matching': True,
7341 def _real_extract(self, url):
7342 mobj = self._match_valid_url(url)
7343 video_id = mobj.group('id')
7344 playlist_id = mobj.group('playlist_id')
7345 return self.url_result(
7346 update_url_query('https://www.youtube.com/watch', {
7347 'v': video_id,
7348 'list': playlist_id,
7349 'feature': 'youtu.be',
7350 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
7353 class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor):
7354 IE_DESC = 'YouTube livestream embeds'
7355 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
7356 _TESTS = [{
7357 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
7358 'only_matching': True,
7361 def _real_extract(self, url):
7362 channel_id = self._match_id(url)
7363 return self.url_result(
7364 f'https://www.youtube.com/channel/{channel_id}/live',
7365 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
7368 class YoutubeYtUserIE(YoutubeBaseInfoExtractor):
7369 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
7370 IE_NAME = 'youtube:user'
7371 _VALID_URL = r'ytuser:(?P<id>.+)'
7372 _TESTS = [{
7373 'url': 'ytuser:phihag',
7374 'only_matching': True,
7377 def _real_extract(self, url):
7378 user_id = self._match_id(url)
7379 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
7382 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
7383 IE_NAME = 'youtube:favorites'
7384 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
7385 _VALID_URL = r':ytfav(?:ou?rite)?s?'
7386 _LOGIN_REQUIRED = True
7387 _TESTS = [{
7388 'url': ':ytfav',
7389 'only_matching': True,
7390 }, {
7391 'url': ':ytfavorites',
7392 'only_matching': True,
7395 def _real_extract(self, url):
7396 return self.url_result(
7397 'https://www.youtube.com/playlist?list=LL',
7398 ie=YoutubeTabIE.ie_key())
7401 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
7402 IE_NAME = 'youtube:notif'
7403 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
7404 _VALID_URL = r':ytnotif(?:ication)?s?'
7405 _LOGIN_REQUIRED = True
7406 _TESTS = [{
7407 'url': ':ytnotif',
7408 'only_matching': True,
7409 }, {
7410 'url': ':ytnotifications',
7411 'only_matching': True,
7414 def _extract_notification_menu(self, response, continuation_list):
7415 notification_list = traverse_obj(
7416 response,
7417 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
7418 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
7419 expected_type=list) or []
7420 continuation_list[0] = None
7421 for item in notification_list:
7422 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
7423 if entry:
7424 yield entry
7425 continuation = item.get('continuationItemRenderer')
7426 if continuation:
7427 continuation_list[0] = continuation
7429 def _extract_notification_renderer(self, notification):
7430 video_id = traverse_obj(
7431 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
7432 url = f'https://www.youtube.com/watch?v={video_id}'
7433 channel_id = None
7434 if not video_id:
7435 browse_ep = traverse_obj(
7436 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7437 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
7438 post_id = self._search_regex(
7439 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
7440 'post id', default=None)
7441 if not channel_id or not post_id:
7442 return
7443 # The direct /post url redirects to this in the browser
7444 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7446 channel = traverse_obj(
7447 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7448 expected_type=str)
7449 notification_title = self._get_text(notification, 'shortMessage')
7450 if notification_title:
7451 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7452 # TODO: handle recommended videos
7453 title = self._search_regex(
7454 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
7455 'video title', default=None)
7456 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7457 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7458 else None)
7459 return {
7460 '_type': 'url',
7461 'url': url,
7462 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7463 'video_id': video_id,
7464 'title': title,
7465 'channel_id': channel_id,
7466 'channel': channel,
7467 'uploader': channel,
7468 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
7469 'timestamp': timestamp,
7472 def _notification_menu_entries(self, ytcfg):
7473 continuation_list = [None]
7474 response = None
7475 for page in itertools.count(1):
7476 ctoken = traverse_obj(
7477 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7478 response = self._extract_response(
7479 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7480 ep='notification/get_notification_menu', check_get_keys='actions',
7481 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7482 yield from self._extract_notification_menu(response, continuation_list)
7483 if not continuation_list[0]:
7484 break
7486 def _real_extract(self, url):
7487 display_id = 'notifications'
7488 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7489 self._report_playlist_authcheck(ytcfg)
7490 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7493 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7494 IE_DESC = 'YouTube search'
7495 IE_NAME = 'youtube:search'
7496 _SEARCH_KEY = 'ytsearch'
7497 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
7498 _TESTS = [{
7499 'url': 'ytsearch5:youtube-dl test video',
7500 'playlist_count': 5,
7501 'info_dict': {
7502 'id': 'youtube-dl test video',
7503 'title': 'youtube-dl test video',
7505 }, {
7506 'note': 'Suicide/self-harm search warning',
7507 'url': 'ytsearch1:i hate myself and i wanna die',
7508 'playlist_count': 1,
7509 'info_dict': {
7510 'id': 'i hate myself and i wanna die',
7511 'title': 'i hate myself and i wanna die',
7516 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7517 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
7518 _SEARCH_KEY = 'ytsearchdate'
7519 IE_DESC = 'YouTube search, newest videos first'
7520 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
7521 _TESTS = [{
7522 'url': 'ytsearchdate5:youtube-dl test video',
7523 'playlist_count': 5,
7524 'info_dict': {
7525 'id': 'youtube-dl test video',
7526 'title': 'youtube-dl test video',
7531 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
7532 IE_DESC = 'YouTube search URLs with sorting and filter support'
7533 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
7534 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7535 _TESTS = [{
7536 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7537 'playlist_mincount': 5,
7538 'info_dict': {
7539 'id': 'youtube-dl test video',
7540 'title': 'youtube-dl test video',
7542 }, {
7543 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7544 'playlist_mincount': 5,
7545 'info_dict': {
7546 'id': 'python',
7547 'title': 'python',
7549 }, {
7550 'url': 'https://www.youtube.com/results?search_query=%23cats',
7551 'playlist_mincount': 1,
7552 'info_dict': {
7553 'id': '#cats',
7554 'title': '#cats',
7555 # The test suite does not have support for nested playlists
7556 # 'entries': [{
7557 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7558 # 'title': '#cats',
7559 # }],
7561 }, {
7562 # Channel results
7563 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7564 'info_dict': {
7565 'id': 'kurzgesagt',
7566 'title': 'kurzgesagt',
7568 'playlist': [{
7569 'info_dict': {
7570 '_type': 'url',
7571 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7572 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7573 'ie_key': 'YoutubeTab',
7574 'channel': 'Kurzgesagt – In a Nutshell',
7575 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7576 'title': 'Kurzgesagt – In a Nutshell',
7577 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7578 # No longer available for search as it is set to the handle.
7579 # 'playlist_count': int,
7580 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7581 'thumbnails': list,
7582 'uploader_id': '@kurzgesagt',
7583 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7584 'uploader': 'Kurzgesagt – In a Nutshell',
7585 'channel_is_verified': True,
7586 'channel_follower_count': int,
7589 'params': {'extract_flat': True, 'playlist_items': '1'},
7590 'playlist_mincount': 1,
7591 }, {
7592 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7593 'only_matching': True,
7596 def _real_extract(self, url):
7597 qs = parse_qs(url)
7598 query = (qs.get('search_query') or qs.get('q'))[0]
7599 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7602 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7603 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7604 IE_NAME = 'youtube:music:search_url'
7605 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7606 _TESTS = [{
7607 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7608 'playlist_count': 16,
7609 'info_dict': {
7610 'id': 'royalty free music',
7611 'title': 'royalty free music',
7613 }, {
7614 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7615 'playlist_mincount': 30,
7616 'info_dict': {
7617 'id': 'royalty free music - songs',
7618 'title': 'royalty free music - songs',
7620 'params': {'extract_flat': 'in_playlist'},
7621 }, {
7622 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7623 'playlist_mincount': 30,
7624 'info_dict': {
7625 'id': 'royalty free music - community playlists',
7626 'title': 'royalty free music - community playlists',
7628 'params': {'extract_flat': 'in_playlist'},
7631 _SECTIONS = {
7632 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7633 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7634 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7635 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7636 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7637 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7640 def _real_extract(self, url):
7641 qs = parse_qs(url)
7642 query = (qs.get('search_query') or qs.get('q'))[0]
7643 params = qs.get('sp', (None,))[0]
7644 if params:
7645 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7646 else:
7647 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
7648 params = self._SECTIONS.get(section)
7649 if not params:
7650 section = None
7651 title = join_nonempty(query, section, delim=' - ')
7652 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7655 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
7657 Base class for feed extractors
7658 Subclasses must re-define the _FEED_NAME property.
7660 _LOGIN_REQUIRED = True
7661 _FEED_NAME = 'feeds'
7663 @classproperty
7664 def IE_NAME(cls):
7665 return f'youtube:{cls._FEED_NAME}'
7667 def _real_extract(self, url):
7668 return self.url_result(
7669 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7672 class YoutubeWatchLaterIE(YoutubeBaseInfoExtractor):
7673 IE_NAME = 'youtube:watchlater'
7674 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7675 _VALID_URL = r':ytwatchlater'
7676 _TESTS = [{
7677 'url': ':ytwatchlater',
7678 'only_matching': True,
7681 def _real_extract(self, url):
7682 return self.url_result(
7683 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7686 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7687 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7688 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7689 _FEED_NAME = 'recommended'
7690 _LOGIN_REQUIRED = False
7691 _TESTS = [{
7692 'url': ':ytrec',
7693 'only_matching': True,
7694 }, {
7695 'url': ':ytrecommended',
7696 'only_matching': True,
7697 }, {
7698 'url': 'https://youtube.com',
7699 'only_matching': True,
7703 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7704 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7705 _VALID_URL = r':ytsub(?:scription)?s?'
7706 _FEED_NAME = 'subscriptions'
7707 _TESTS = [{
7708 'url': ':ytsubs',
7709 'only_matching': True,
7710 }, {
7711 'url': ':ytsubscriptions',
7712 'only_matching': True,
7716 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7717 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7718 _VALID_URL = r':ythis(?:tory)?'
7719 _FEED_NAME = 'history'
7720 _TESTS = [{
7721 'url': ':ythistory',
7722 'only_matching': True,
7726 class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor):
7727 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7728 IE_NAME = 'youtube:shorts:pivot:audio'
7729 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7730 _TESTS = [{
7731 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7732 'only_matching': True,
7735 @staticmethod
7736 def _generate_audio_pivot_params(video_id):
7738 Generates sfv_audio_pivot browse params for this video id
7740 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7741 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7743 def _real_extract(self, url):
7744 video_id = self._match_id(url)
7745 return self.url_result(
7746 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7747 ie=YoutubeTabIE)
7750 class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor):
7751 IE_NAME = 'youtube:truncated_url'
7752 IE_DESC = False # Do not list
7753 _VALID_URL = r'''(?x)
7754 (?:https?://)?
7755 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7756 (?:watch\?(?:
7757 feature=[a-z_]+|
7758 annotation_id=annotation_[^&]+|
7759 x-yt-cl=[0-9]+|
7760 hl=[^&]*|
7761 t=[0-9]+
7764 attribution_link\?a=[^&]+
7769 _TESTS = [{
7770 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7771 'only_matching': True,
7772 }, {
7773 'url': 'https://www.youtube.com/watch?',
7774 'only_matching': True,
7775 }, {
7776 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7777 'only_matching': True,
7778 }, {
7779 'url': 'https://www.youtube.com/watch?feature=foo',
7780 'only_matching': True,
7781 }, {
7782 'url': 'https://www.youtube.com/watch?hl=en-GB',
7783 'only_matching': True,
7784 }, {
7785 'url': 'https://www.youtube.com/watch?t=2372',
7786 'only_matching': True,
7789 def _real_extract(self, url):
7790 raise ExtractorError(
7791 'Did you forget to quote the URL? Remember that & is a meta '
7792 'character in most shells, so you want to put the URL in quotes, '
7793 'like yt-dlp '
7794 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7795 ' or simply yt-dlp BaW_jenozKc .',
7796 expected=True)
7799 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7800 IE_NAME = 'youtube:clip'
7801 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7802 _TESTS = [{
7803 # FIXME: Other metadata should be extracted from the clip, not from the base video
7804 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7805 'info_dict': {
7806 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7807 'ext': 'mp4',
7808 'section_start': 29.0,
7809 'section_end': 39.7,
7810 'duration': 10.7,
7811 'age_limit': 0,
7812 'availability': 'public',
7813 'categories': ['Gaming'],
7814 'channel': 'Scott The Woz',
7815 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7816 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7817 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7818 'like_count': int,
7819 'playable_in_embed': True,
7820 'tags': 'count:17',
7821 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7822 'title': 'Mobile Games on Console - Scott The Woz',
7823 'upload_date': '20210920',
7824 'uploader': 'Scott The Woz',
7825 'uploader_id': '@ScottTheWoz',
7826 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7827 'view_count': int,
7828 'live_status': 'not_live',
7829 'channel_follower_count': int,
7830 'chapters': 'count:20',
7831 'comment_count': int,
7832 'heatmap': 'count:100',
7836 def _real_extract(self, url):
7837 clip_id = self._match_id(url)
7838 _, data = self._extract_webpage(url, clip_id)
7840 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7841 if not video_id:
7842 raise ExtractorError('Unable to find video ID')
7844 clip_data = traverse_obj(data, (
7845 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7846 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7847 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7848 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7850 return {
7851 '_type': 'url_transparent',
7852 'url': f'https://www.youtube.com/watch?v={video_id}',
7853 'ie_key': YoutubeIE.ie_key(),
7854 'id': clip_id,
7855 'section_start': int(clip_data['startTimeMs']) / 1000,
7856 'section_end': int(clip_data['endTimeMs']) / 1000,
7857 '_format_sort_fields': ( # https protocol is prioritized for ffmpeg compatibility
7858 'proto:https', 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec', 'channels', 'acodec', 'lang'),
7862 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7863 IE_NAME = 'youtube:consent'
7864 IE_DESC = False # Do not list
7865 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7866 _TESTS = [{
7867 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7868 'info_dict': {
7869 'id': 'qVv6vCqciTM',
7870 'ext': 'mp4',
7871 'age_limit': 0,
7872 'uploader_id': '@sana_natori',
7873 'comment_count': int,
7874 'chapters': 'count:13',
7875 'upload_date': '20221223',
7876 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7877 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7878 'uploader_url': 'https://www.youtube.com/@sana_natori',
7879 'like_count': int,
7880 'release_date': '20221223',
7881 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7882 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7883 'view_count': int,
7884 'playable_in_embed': True,
7885 'duration': 4438,
7886 'availability': 'public',
7887 'channel_follower_count': int,
7888 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7889 'categories': ['Entertainment'],
7890 'live_status': 'was_live',
7891 'release_timestamp': 1671793345,
7892 'channel': 'さなちゃんねる',
7893 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7894 'uploader': 'さなちゃんねる',
7895 'channel_is_verified': True,
7896 'heatmap': 'count:100',
7898 'add_ie': ['Youtube'],
7899 'params': {'skip_download': 'Youtube'},
7902 def _real_extract(self, url):
7903 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7904 if not redirect_url:
7905 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7906 return self.url_result(redirect_url)
7909 class YoutubeTruncatedIDIE(YoutubeBaseInfoExtractor):
7910 IE_NAME = 'youtube:truncated_id'
7911 IE_DESC = False # Do not list
7912 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7914 _TESTS = [{
7915 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7916 'only_matching': True,
7919 def _real_extract(self, url):
7920 video_id = self._match_id(url)
7921 raise ExtractorError(
7922 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7923 expected=True)