[cleanup] Misc (#10623)
[yt-dlp3.git] / yt_dlp / extractor / youtube.py
blob88e1a28ae3c03c5ca87a41d377115b5394f9f7b4
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime as dt
6 import enum
7 import functools
8 import hashlib
9 import itertools
10 import json
11 import math
12 import os.path
13 import random
14 import re
15 import shlex
16 import sys
17 import threading
18 import time
19 import traceback
20 import urllib.parse
22 from .common import InfoExtractor, SearchInfoExtractor
23 from .openload import PhantomJSwrapper
24 from ..jsinterp import JSInterpreter
25 from ..networking.exceptions import HTTPError, network_exceptions
26 from ..utils import (
27 NO_DEFAULT,
28 ExtractorError,
29 LazyList,
30 UserNotLive,
31 bug_reports_message,
32 classproperty,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 filesize_from_tbr,
37 filter_dict,
38 float_or_none,
39 format_field,
40 get_first,
41 int_or_none,
42 is_html,
43 join_nonempty,
44 js_to_json,
45 mimetype2ext,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 strftime_or_none,
58 traverse_obj,
59 try_call,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unified_timestamp,
64 unsmuggle_url,
65 update_url_query,
66 url_or_none,
67 urljoin,
68 variadic,
71 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
72 # any clients starting with _ cannot be explicitly requested by the user
73 INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_CONTEXT': {
76 'client': {
77 'clientName': 'WEB',
78 'clientVersion': '2.20240726.00.00',
81 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
83 # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
84 'web_safari': {
85 'INNERTUBE_CONTEXT': {
86 'client': {
87 'clientName': 'WEB',
88 'clientVersion': '2.20240726.00.00',
89 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
94 'web_embedded': {
95 'INNERTUBE_CONTEXT': {
96 'client': {
97 'clientName': 'WEB_EMBEDDED_PLAYER',
98 'clientVersion': '1.20240723.01.00',
101 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
103 'web_music': {
104 'INNERTUBE_HOST': 'music.youtube.com',
105 'INNERTUBE_CONTEXT': {
106 'client': {
107 'clientName': 'WEB_REMIX',
108 'clientVersion': '1.20240724.00.00',
111 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
113 'web_creator': {
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'WEB_CREATOR',
117 'clientVersion': '1.20240723.03.00',
120 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
122 'android': {
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID',
126 'clientVersion': '19.29.37',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip',
129 'osName': 'Android',
130 'osVersion': '11',
133 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
134 'REQUIRE_JS_PLAYER': False,
136 'android_music': {
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '7.11.50',
141 'androidSdkVersion': 30,
142 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip',
143 'osName': 'Android',
144 'osVersion': '11',
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
148 'REQUIRE_JS_PLAYER': False,
150 'android_creator': {
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
154 'clientVersion': '24.30.100',
155 'androidSdkVersion': 30,
156 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip',
157 'osName': 'Android',
158 'osVersion': '11',
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False,
164 # YouTube Kids videos aren't returned on this client for some reason
165 'android_vr': {
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'ANDROID_VR',
169 'clientVersion': '1.57.29',
170 'deviceMake': 'Oculus',
171 'deviceModel': 'Quest 3',
172 'androidSdkVersion': 32,
173 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
174 'osName': 'Android',
175 'osVersion': '12L',
178 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
179 'REQUIRE_JS_PLAYER': False,
181 'android_testsuite': {
182 'INNERTUBE_CONTEXT': {
183 'client': {
184 'clientName': 'ANDROID_TESTSUITE',
185 'clientVersion': '1.9',
186 'androidSdkVersion': 30,
187 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
188 'osName': 'Android',
189 'osVersion': '11',
192 'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
193 'REQUIRE_JS_PLAYER': False,
194 'PLAYER_PARAMS': '2AMB',
196 # This client only has legacy formats and storyboards
197 'android_producer': {
198 'INNERTUBE_CONTEXT': {
199 'client': {
200 'clientName': 'ANDROID_PRODUCER',
201 'clientVersion': '0.111.1',
202 'androidSdkVersion': 30,
203 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
204 'osName': 'Android',
205 'osVersion': '11',
208 'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
209 'REQUIRE_JS_PLAYER': False,
211 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
212 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
213 'ios': {
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'IOS',
217 'clientVersion': '19.29.1',
218 'deviceMake': 'Apple',
219 'deviceModel': 'iPhone16,2',
220 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
221 'osName': 'iPhone',
222 'osVersion': '17.5.1.21F90',
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
226 'REQUIRE_JS_PLAYER': False,
228 'ios_music': {
229 'INNERTUBE_CONTEXT': {
230 'client': {
231 'clientName': 'IOS_MUSIC',
232 'clientVersion': '7.08.2',
233 'deviceMake': 'Apple',
234 'deviceModel': 'iPhone16,2',
235 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
236 'osName': 'iPhone',
237 'osVersion': '17.5.1.21F90',
240 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
241 'REQUIRE_JS_PLAYER': False,
243 'ios_creator': {
244 'INNERTUBE_CONTEXT': {
245 'client': {
246 'clientName': 'IOS_CREATOR',
247 'clientVersion': '24.30.100',
248 'deviceMake': 'Apple',
249 'deviceModel': 'iPhone16,2',
250 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
251 'osName': 'iPhone',
252 'osVersion': '17.5.1.21F90',
255 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
256 'REQUIRE_JS_PLAYER': False,
258 # mweb has 'ultralow' formats
259 # See: https://github.com/yt-dlp/yt-dlp/pull/557
260 'mweb': {
261 'INNERTUBE_CONTEXT': {
262 'client': {
263 'clientName': 'MWEB',
264 'clientVersion': '2.20240726.01.00',
267 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
269 'tv': {
270 'INNERTUBE_CONTEXT': {
271 'client': {
272 'clientName': 'TVHTML5',
273 'clientVersion': '7.20240724.13.00',
276 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
278 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
279 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
280 'tv_embedded': {
281 'INNERTUBE_CONTEXT': {
282 'client': {
283 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
284 'clientVersion': '2.0',
287 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
289 # This client has pre-merged video+audio 720p/1080p streams
290 'mediaconnect': {
291 'INNERTUBE_CONTEXT': {
292 'client': {
293 'clientName': 'MEDIA_CONNECT_FRONTEND',
294 'clientVersion': '0.1',
297 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
298 'REQUIRE_JS_PLAYER': False,
303 def _split_innertube_client(client_name):
304 variant, *base = client_name.rsplit('.', 1)
305 if base:
306 return variant, base[0], variant
307 base, *variant = client_name.split('_', 1)
308 return client_name, base, variant[0] if variant else None
311 def short_client_name(client_name):
312 main, *parts = _split_innertube_client(client_name)[0].split('_')
313 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
316 def build_innertube_clients():
317 THIRD_PARTY = {
318 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
320 BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
321 priority = qualities(BASE_CLIENTS[::-1])
323 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
324 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
325 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
326 ytcfg.setdefault('PLAYER_PARAMS', None)
327 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
329 _, base_client, variant = _split_innertube_client(client)
330 ytcfg['priority'] = 10 * priority(base_client)
332 if variant == 'embedded':
333 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
334 ytcfg['priority'] -= 2
335 elif variant:
336 ytcfg['priority'] -= 3
339 build_innertube_clients()
342 class BadgeType(enum.Enum):
343 AVAILABILITY_UNLISTED = enum.auto()
344 AVAILABILITY_PRIVATE = enum.auto()
345 AVAILABILITY_PUBLIC = enum.auto()
346 AVAILABILITY_PREMIUM = enum.auto()
347 AVAILABILITY_SUBSCRIPTION = enum.auto()
348 LIVE_NOW = enum.auto()
349 VERIFIED = enum.auto()
352 class YoutubeBaseInfoExtractor(InfoExtractor):
353 """Provide base functions for Youtube extractors"""
355 _RESERVED_NAMES = (
356 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
357 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
358 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
359 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
361 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
363 # _NETRC_MACHINE = 'youtube'
365 # If True it will raise an error if no login info is provided
366 _LOGIN_REQUIRED = False
368 _INVIDIOUS_SITES = (
369 # invidious-redirect websites
370 r'(?:www\.)?redirect\.invidious\.io',
371 r'(?:(?:www|dev)\.)?invidio\.us',
372 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
373 r'(?:www\.)?invidious\.pussthecat\.org',
374 r'(?:www\.)?invidious\.zee\.li',
375 r'(?:www\.)?invidious\.ethibox\.fr',
376 r'(?:www\.)?iv\.ggtyler\.dev',
377 r'(?:www\.)?inv\.vern\.i2p',
378 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
379 r'(?:www\.)?inv\.riverside\.rocks',
380 r'(?:www\.)?invidious\.silur\.me',
381 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
382 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
383 r'(?:www\.)?invidious\.slipfox\.xyz',
384 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
385 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
386 r'(?:www\.)?invidious\.tiekoetter\.com',
387 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
388 r'(?:www\.)?invidious\.nerdvpn\.de',
389 r'(?:www\.)?invidious\.weblibre\.org',
390 r'(?:www\.)?inv\.odyssey346\.dev',
391 r'(?:www\.)?invidious\.dhusch\.de',
392 r'(?:www\.)?iv\.melmac\.space',
393 r'(?:www\.)?watch\.thekitty\.zone',
394 r'(?:www\.)?invidious\.privacydev\.net',
395 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
396 r'(?:www\.)?invidious\.drivet\.xyz',
397 r'(?:www\.)?vid\.priv\.au',
398 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
399 r'(?:www\.)?inv\.vern\.cc',
400 r'(?:www\.)?invidious\.esmailelbob\.xyz',
401 r'(?:www\.)?invidious\.sethforprivacy\.com',
402 r'(?:www\.)?yt\.oelrichsgarcia\.de',
403 r'(?:www\.)?yt\.artemislena\.eu',
404 r'(?:www\.)?invidious\.flokinet\.to',
405 r'(?:www\.)?invidious\.baczek\.me',
406 r'(?:www\.)?y\.com\.sb',
407 r'(?:www\.)?invidious\.epicsite\.xyz',
408 r'(?:www\.)?invidious\.lidarshield\.cloud',
409 r'(?:www\.)?yt\.funami\.tech',
410 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
411 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
412 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
413 # youtube-dl invidious instances list
414 r'(?:(?:www|no)\.)?invidiou\.sh',
415 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
416 r'(?:www\.)?invidious\.kabi\.tk',
417 r'(?:www\.)?invidious\.mastodon\.host',
418 r'(?:www\.)?invidious\.zapashcanon\.fr',
419 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
420 r'(?:www\.)?invidious\.tinfoil-hat\.net',
421 r'(?:www\.)?invidious\.himiko\.cloud',
422 r'(?:www\.)?invidious\.reallyancient\.tech',
423 r'(?:www\.)?invidious\.tube',
424 r'(?:www\.)?invidiou\.site',
425 r'(?:www\.)?invidious\.site',
426 r'(?:www\.)?invidious\.xyz',
427 r'(?:www\.)?invidious\.nixnet\.xyz',
428 r'(?:www\.)?invidious\.048596\.xyz',
429 r'(?:www\.)?invidious\.drycat\.fr',
430 r'(?:www\.)?inv\.skyn3t\.in',
431 r'(?:www\.)?tube\.poal\.co',
432 r'(?:www\.)?tube\.connect\.cafe',
433 r'(?:www\.)?vid\.wxzm\.sx',
434 r'(?:www\.)?vid\.mint\.lgbt',
435 r'(?:www\.)?vid\.puffyan\.us',
436 r'(?:www\.)?yewtu\.be',
437 r'(?:www\.)?yt\.elukerio\.org',
438 r'(?:www\.)?yt\.lelux\.fi',
439 r'(?:www\.)?invidious\.ggc-project\.de',
440 r'(?:www\.)?yt\.maisputain\.ovh',
441 r'(?:www\.)?ytprivate\.com',
442 r'(?:www\.)?invidious\.13ad\.de',
443 r'(?:www\.)?invidious\.toot\.koeln',
444 r'(?:www\.)?invidious\.fdn\.fr',
445 r'(?:www\.)?watch\.nettohikari\.com',
446 r'(?:www\.)?invidious\.namazso\.eu',
447 r'(?:www\.)?invidious\.silkky\.cloud',
448 r'(?:www\.)?invidious\.exonip\.de',
449 r'(?:www\.)?invidious\.riverside\.rocks',
450 r'(?:www\.)?invidious\.blamefran\.net',
451 r'(?:www\.)?invidious\.moomoo\.de',
452 r'(?:www\.)?ytb\.trom\.tf',
453 r'(?:www\.)?yt\.cyberhost\.uk',
454 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
455 r'(?:www\.)?qklhadlycap4cnod\.onion',
456 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
457 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
458 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
459 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
460 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
461 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
462 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
463 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
464 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
465 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
466 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
467 r'(?:www\.)?piped\.kavin\.rocks',
468 r'(?:www\.)?piped\.tokhmi\.xyz',
469 r'(?:www\.)?piped\.syncpundit\.io',
470 r'(?:www\.)?piped\.mha\.fi',
471 r'(?:www\.)?watch\.whatever\.social',
472 r'(?:www\.)?piped\.garudalinux\.org',
473 r'(?:www\.)?piped\.rivo\.lol',
474 r'(?:www\.)?piped-libre\.kavin\.rocks',
475 r'(?:www\.)?yt\.jae\.fi',
476 r'(?:www\.)?piped\.mint\.lgbt',
477 r'(?:www\.)?il\.ax',
478 r'(?:www\.)?piped\.esmailelbob\.xyz',
479 r'(?:www\.)?piped\.projectsegfau\.lt',
480 r'(?:www\.)?piped\.privacydev\.net',
481 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
482 r'(?:www\.)?piped\.smnz\.de',
483 r'(?:www\.)?piped\.adminforge\.de',
484 r'(?:www\.)?watch\.whatevertinfoil\.de',
485 r'(?:www\.)?piped\.qdi\.fi',
486 r'(?:(?:www|cf)\.)?piped\.video',
487 r'(?:www\.)?piped\.aeong\.one',
488 r'(?:www\.)?piped\.moomoo\.me',
489 r'(?:www\.)?piped\.chauvet\.pro',
490 r'(?:www\.)?watch\.leptons\.xyz',
491 r'(?:www\.)?pd\.vern\.cc',
492 r'(?:www\.)?piped\.hostux\.net',
493 r'(?:www\.)?piped\.lunar\.icu',
494 # Hyperpipe instances from https://hyperpipe.codeberg.page/
495 r'(?:www\.)?hyperpipe\.surge\.sh',
496 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
497 r'(?:www\.)?listen\.whatever\.social',
498 r'(?:www\.)?music\.adminforge\.de',
501 # extracted from account/account_menu ep
502 # XXX: These are the supported YouTube UI and API languages,
503 # which is slightly different from languages supported for translation in YouTube studio
504 _SUPPORTED_LANG_CODES = [
505 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
506 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
507 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
508 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
509 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
510 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
513 _IGNORED_WARNINGS = {
514 'Unavailable videos will be hidden during playback',
515 'Unavailable videos are hidden',
518 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
519 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
521 def ucid_or_none(self, ucid):
522 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
524 def handle_or_none(self, handle):
525 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
527 def handle_from_url(self, url):
528 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
529 url, 'channel handle', default=None)
531 def ucid_from_url(self, url):
532 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
533 url, 'channel id', default=None)
535 @functools.cached_property
536 def _preferred_lang(self):
538 Returns a language code supported by YouTube for the user preferred language.
539 Returns None if no preferred language set.
541 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
542 if not preferred_lang:
543 return
544 if preferred_lang not in self._SUPPORTED_LANG_CODES:
545 raise ExtractorError(
546 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
547 expected=True)
548 elif preferred_lang != 'en':
549 self.report_warning(
550 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
551 return preferred_lang
553 def _initialize_consent(self):
554 cookies = self._get_cookies('https://www.youtube.com/')
555 if cookies.get('__Secure-3PSID'):
556 return
557 socs = cookies.get('SOCS')
558 if socs and not socs.value.startswith('CAA'): # not consented
559 return
560 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
562 def _initialize_pref(self):
563 cookies = self._get_cookies('https://www.youtube.com/')
564 pref_cookie = cookies.get('PREF')
565 pref = {}
566 if pref_cookie:
567 try:
568 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
569 except ValueError:
570 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
571 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
572 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
574 def _real_initialize(self):
575 self._initialize_pref()
576 self._initialize_consent()
577 self._check_login_required()
579 def _check_login_required(self):
580 if self._LOGIN_REQUIRED and not self._cookies_passed:
581 self.raise_login_required('Login details are needed to download this content', method='cookies')
583 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
584 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
586 def _get_default_ytcfg(self, client='web'):
587 return copy.deepcopy(INNERTUBE_CLIENTS[client])
589 def _get_innertube_host(self, client='web'):
590 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
592 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
593 # try_get but with fallback to default ytcfg client values when present
594 _func = lambda y: try_get(y, getter, expected_type)
595 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
597 def _extract_client_name(self, ytcfg, default_client='web'):
598 return self._ytcfg_get_safe(
599 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
600 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
602 def _extract_client_version(self, ytcfg, default_client='web'):
603 return self._ytcfg_get_safe(
604 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
605 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
607 def _select_api_hostname(self, req_api_hostname, default_client=None):
608 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
609 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
611 def _extract_context(self, ytcfg=None, default_client='web'):
612 context = get_first(
613 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
614 # Enforce language and tz for extraction
615 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
616 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
617 return context
619 _SAPISID = None
621 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
622 time_now = round(time.time())
623 if self._SAPISID is None:
624 yt_cookies = self._get_cookies('https://www.youtube.com')
625 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
626 # See: https://github.com/yt-dlp/yt-dlp/issues/393
627 sapisid_cookie = dict_get(
628 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
629 if sapisid_cookie and sapisid_cookie.value:
630 self._SAPISID = sapisid_cookie.value
631 self.write_debug('Extracted SAPISID cookie')
632 # SAPISID cookie is required if not already present
633 if not yt_cookies.get('SAPISID'):
634 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
635 self._set_cookie(
636 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
637 else:
638 self._SAPISID = False
639 if not self._SAPISID:
640 return None
641 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
642 sapisidhash = hashlib.sha1(
643 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
644 return f'SAPISIDHASH {time_now}_{sapisidhash}'
646 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
647 note='Downloading API JSON', errnote='Unable to download API page',
648 context=None, api_key=None, api_hostname=None, default_client='web'):
650 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
651 data.update(query)
652 real_headers = self.generate_api_headers(default_client=default_client)
653 real_headers.update({'content-type': 'application/json'})
654 if headers:
655 real_headers.update(headers)
656 return self._download_json(
657 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
658 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
659 data=json.dumps(data).encode('utf8'), headers=real_headers,
660 query=filter_dict({
661 'key': self._configuration_arg(
662 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
663 'prettyPrint': 'false',
664 }, cndn=lambda _, v: v))
666 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
667 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
669 @staticmethod
670 def _extract_session_index(*data):
672 Index of current account in account list.
673 See: https://github.com/yt-dlp/yt-dlp/pull/519
675 for ytcfg in data:
676 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
677 if session_index is not None:
678 return session_index
680 # Deprecated?
681 def _extract_identity_token(self, ytcfg=None, webpage=None):
682 if ytcfg:
683 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
684 if token:
685 return token
686 if webpage:
687 return self._search_regex(
688 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
689 'identity token', default=None, fatal=False)
691 @staticmethod
692 def _extract_account_syncid(*args):
694 Extract syncId required to download private playlists of secondary channels
695 @params response and/or ytcfg
697 for data in args:
698 # ytcfg includes channel_syncid if on secondary channel
699 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
700 if delegated_sid:
701 return delegated_sid
702 sync_ids = (try_get(
703 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
704 lambda x: x['DATASYNC_ID']), str) or '').split('||')
705 if len(sync_ids) >= 2 and sync_ids[1]:
706 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
707 # and just "user_syncid||" for primary channel. We only want the channel_syncid
708 return sync_ids[0]
710 @staticmethod
711 def _extract_visitor_data(*args):
713 Extracts visitorData from an API response or ytcfg
714 Appears to be used to track session state
716 return get_first(
717 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
718 expected_type=str)
720 @functools.cached_property
721 def is_authenticated(self):
722 return bool(self._generate_sapisidhash_header())
724 def extract_ytcfg(self, video_id, webpage):
725 if not webpage:
726 return {}
727 return self._parse_json(
728 self._search_regex(
729 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
730 default='{}'), video_id, fatal=False) or {}
732 def generate_api_headers(
733 self, *, ytcfg=None, account_syncid=None, session_index=None,
734 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
736 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
737 headers = {
738 'X-YouTube-Client-Name': str(
739 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
740 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
741 'Origin': origin,
742 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
743 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
744 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
745 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
747 if session_index is None:
748 session_index = self._extract_session_index(ytcfg)
749 if account_syncid or session_index is not None:
750 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
752 auth = self._generate_sapisidhash_header(origin)
753 if auth is not None:
754 headers['Authorization'] = auth
755 headers['X-Origin'] = origin
756 return filter_dict(headers)
758 def _download_ytcfg(self, client, video_id):
759 url = {
760 'web': 'https://www.youtube.com',
761 'web_music': 'https://music.youtube.com',
762 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
763 }.get(client)
764 if not url:
765 return {}
766 webpage = self._download_webpage(
767 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
768 return self.extract_ytcfg(video_id, webpage) or {}
770 @staticmethod
771 def _build_api_continuation_query(continuation, ctp=None):
772 query = {
773 'continuation': continuation,
775 # TODO: Inconsistency with clickTrackingParams.
776 # Currently we have a fixed ctp contained within context (from ytcfg)
777 # and a ctp in root query for continuation.
778 if ctp:
779 query['clickTracking'] = {'clickTrackingParams': ctp}
780 return query
782 @classmethod
783 def _extract_next_continuation_data(cls, renderer):
784 next_continuation = try_get(
785 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
786 lambda x: x['continuation']['reloadContinuationData']), dict)
787 if not next_continuation:
788 return
789 continuation = next_continuation.get('continuation')
790 if not continuation:
791 return
792 ctp = next_continuation.get('clickTrackingParams')
793 return cls._build_api_continuation_query(continuation, ctp)
795 @classmethod
796 def _extract_continuation_ep_data(cls, continuation_ep: dict):
797 if isinstance(continuation_ep, dict):
798 continuation = try_get(
799 continuation_ep, lambda x: x['continuationCommand']['token'], str)
800 if not continuation:
801 return
802 ctp = continuation_ep.get('clickTrackingParams')
803 return cls._build_api_continuation_query(continuation, ctp)
805 @classmethod
806 def _extract_continuation(cls, renderer):
807 next_continuation = cls._extract_next_continuation_data(renderer)
808 if next_continuation:
809 return next_continuation
811 return traverse_obj(renderer, (
812 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
813 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
814 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
816 @classmethod
817 def _extract_alerts(cls, data):
818 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
819 if not isinstance(alert_dict, dict):
820 continue
821 for alert in alert_dict.values():
822 alert_type = alert.get('type')
823 if not alert_type:
824 continue
825 message = cls._get_text(alert, 'text')
826 if message:
827 yield alert_type, message
829 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
830 errors, warnings = [], []
831 for alert_type, alert_message in alerts:
832 if alert_type.lower() == 'error' and fatal:
833 errors.append([alert_type, alert_message])
834 elif alert_message not in self._IGNORED_WARNINGS:
835 warnings.append([alert_type, alert_message])
837 for alert_type, alert_message in (warnings + errors[:-1]):
838 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
839 if errors:
840 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
842 def _extract_and_report_alerts(self, data, *args, **kwargs):
843 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
845 def _extract_badges(self, badge_list: list):
847 Extract known BadgeType's from a list of badge renderers.
848 @returns [{'type': BadgeType}]
850 icon_type_map = {
851 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
852 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
853 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
854 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
855 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
856 'CHECK': BadgeType.VERIFIED,
859 badge_style_map = {
860 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
861 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
862 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
863 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
864 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
867 label_map = {
868 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
869 'private': BadgeType.AVAILABILITY_PRIVATE,
870 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
871 'live': BadgeType.LIVE_NOW,
872 'premium': BadgeType.AVAILABILITY_PREMIUM,
873 'verified': BadgeType.VERIFIED,
874 'official artist channel': BadgeType.VERIFIED,
877 badges = []
878 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
879 badge_type = (
880 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
881 or badge_style_map.get(traverse_obj(badge, 'style'))
883 if badge_type:
884 badges.append({'type': badge_type})
885 continue
887 # fallback, won't work in some languages
888 label = traverse_obj(
889 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
890 for match, label_badge_type in label_map.items():
891 if match in label.lower():
892 badges.append({'type': label_badge_type})
893 break
895 return badges
897 @staticmethod
898 def _has_badge(badges, badge_type):
899 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
901 @staticmethod
902 def _get_text(data, *path_list, max_runs=None):
903 for path in path_list or [None]:
904 if path is None:
905 obj = [data]
906 else:
907 obj = traverse_obj(data, path, default=[])
908 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
909 obj = [obj]
910 for item in obj:
911 text = try_get(item, lambda x: x['simpleText'], str)
912 if text:
913 return text
914 runs = try_get(item, lambda x: x['runs'], list) or []
915 if not runs and isinstance(item, list):
916 runs = item
918 runs = runs[:min(len(runs), max_runs or len(runs))]
919 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
920 if text:
921 return text
923 def _get_count(self, data, *path_list):
924 count_text = self._get_text(data, *path_list) or ''
925 count = parse_count(count_text)
926 if count is None:
927 count = str_to_int(
928 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
929 return count
931 @staticmethod
932 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
934 Extract thumbnails from thumbnails dict
935 @param path_list: path list to level that contains 'thumbnails' key
937 thumbnails = []
938 for path in path_list or [()]:
939 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
940 thumbnail_url = url_or_none(thumbnail.get('url'))
941 if not thumbnail_url:
942 continue
943 # Sometimes youtube gives a wrong thumbnail URL. See:
944 # https://github.com/yt-dlp/yt-dlp/issues/233
945 # https://github.com/ytdl-org/youtube-dl/issues/28023
946 if 'maxresdefault' in thumbnail_url:
947 thumbnail_url = thumbnail_url.split('?')[0]
948 thumbnails.append({
949 'url': thumbnail_url,
950 'height': int_or_none(thumbnail.get('height')),
951 'width': int_or_none(thumbnail.get('width')),
953 return thumbnails
955 @staticmethod
956 def extract_relative_time(relative_time_text):
958 Extracts a relative time from string and converts to dt object
959 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
962 # XXX: this could be moved to a general function in utils/_utils.py
963 # The relative time text strings are roughly the same as what
964 # Javascript's Intl.RelativeTimeFormat function generates.
965 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
966 mobj = re.search(
967 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
968 relative_time_text)
969 if mobj:
970 start = mobj.group('start')
971 if start:
972 return datetime_from_str(start)
973 try:
974 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
975 except ValueError:
976 return None
978 def _parse_time_text(self, text):
979 if not text:
980 return
981 dt_ = self.extract_relative_time(text)
982 timestamp = None
983 if isinstance(dt_, dt.datetime):
984 timestamp = calendar.timegm(dt_.timetuple())
986 if timestamp is None:
987 timestamp = (
988 unified_timestamp(text) or unified_timestamp(
989 self._search_regex(
990 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
991 text.lower(), 'time text', default=None)))
993 if text and timestamp is None and self._preferred_lang in (None, 'en'):
994 self.report_warning(
995 f'Cannot parse localized time text "{text}"', only_once=True)
996 return timestamp
998 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
999 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
1000 default_client='web'):
1001 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
1002 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
1003 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
1004 icd_rm = next(icd_retries)
1005 main_retries = iter(self.RetryManager())
1006 main_rm = next(main_retries)
1007 # Manual retry loop for multiple RetryManagers
1008 # The proper RetryManager MUST be advanced after an error
1009 # and its result MUST be checked if the manager is non fatal
1010 while True:
1011 try:
1012 response = self._call_api(
1013 ep=ep, fatal=True, headers=headers,
1014 video_id=item_id, query=query, note=note,
1015 context=self._extract_context(ytcfg, default_client),
1016 api_hostname=api_hostname, default_client=default_client)
1017 except ExtractorError as e:
1018 if not isinstance(e.cause, network_exceptions):
1019 return self._error_or_warning(e, fatal=fatal)
1020 elif not isinstance(e.cause, HTTPError):
1021 main_rm.error = e
1022 next(main_retries)
1023 continue
1025 first_bytes = e.cause.response.read(512)
1026 if not is_html(first_bytes):
1027 yt_error = try_get(
1028 self._parse_json(
1029 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
1030 lambda x: x['error']['message'], str)
1031 if yt_error:
1032 self._report_alerts([('ERROR', yt_error)], fatal=False)
1033 # Downloading page may result in intermittent 5xx HTTP error
1034 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1035 # We also want to catch all other network exceptions since errors in later pages can be troublesome
1036 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
1037 if e.cause.status not in (403, 429):
1038 main_rm.error = e
1039 next(main_retries)
1040 continue
1041 return self._error_or_warning(e, fatal=fatal)
1043 try:
1044 self._extract_and_report_alerts(response, only_once=True)
1045 except ExtractorError as e:
1046 # YouTube's servers may return errors we want to retry on in a 200 OK response
1047 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1048 if 'unknown error' in e.msg.lower():
1049 main_rm.error = e
1050 next(main_retries)
1051 continue
1052 return self._error_or_warning(e, fatal=fatal)
1053 # Youtube sometimes sends incomplete data
1054 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1055 if not traverse_obj(response, *variadic(check_get_keys)):
1056 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1057 should_retry = next(icd_retries, None)
1058 if not should_retry:
1059 return None
1060 continue
1062 return response
1064 @staticmethod
1065 def is_music_url(url):
1066 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1068 def _extract_video(self, renderer):
1069 video_id = renderer.get('videoId')
1071 reel_header_renderer = traverse_obj(renderer, (
1072 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1073 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1075 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1076 description = self._get_text(renderer, 'descriptionSnippet')
1078 duration = int_or_none(renderer.get('lengthSeconds'))
1079 if duration is None:
1080 duration = parse_duration(self._get_text(
1081 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1082 if duration is None:
1083 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1084 duration = parse_duration(self._search_regex(
1085 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1086 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1087 video_id, default=None, group='duration'))
1089 channel_id = traverse_obj(
1090 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1091 expected_type=str, get_all=False)
1092 if not channel_id:
1093 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1095 channel_id = self.ucid_or_none(channel_id)
1097 overlay_style = traverse_obj(
1098 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1099 get_all=False, expected_type=str)
1100 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1101 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1102 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1103 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1104 expected_type=str)) or ''
1105 url = f'https://www.youtube.com/watch?v={video_id}'
1106 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1107 url = f'https://www.youtube.com/shorts/{video_id}'
1109 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1110 or self._get_text(reel_header_renderer, 'timestampText') or '')
1111 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1113 live_status = (
1114 'is_upcoming' if scheduled_timestamp is not None
1115 else 'was_live' if 'streamed' in time_text.lower()
1116 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1117 else None)
1119 # videoInfo is a string like '50K views • 10 years ago'.
1120 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1121 view_count = (0 if 'no views' in view_count_text.lower()
1122 else self._get_count({'simpleText': view_count_text}))
1123 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1125 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1126 or self._get_text(reel_header_renderer, 'channelTitleText'))
1128 channel_handle = traverse_obj(renderer, (
1129 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1130 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1131 expected_type=self.handle_from_url, get_all=False)
1132 return {
1133 '_type': 'url',
1134 'ie_key': YoutubeIE.ie_key(),
1135 'id': video_id,
1136 'url': url,
1137 'title': title,
1138 'description': description,
1139 'duration': duration,
1140 'channel_id': channel_id,
1141 'channel': channel,
1142 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1143 'uploader': channel,
1144 'uploader_id': channel_handle,
1145 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1146 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1147 'timestamp': (self._parse_time_text(time_text)
1148 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1149 else None),
1150 'release_timestamp': scheduled_timestamp,
1151 'availability':
1152 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1153 else self._availability(
1154 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1155 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1156 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1157 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1158 view_count_field: view_count,
1159 'live_status': live_status,
1160 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
1164 class YoutubeIE(YoutubeBaseInfoExtractor):
1165 IE_DESC = 'YouTube'
1166 _VALID_URL = r'''(?x)^
1168 (?:https?://|//) # http(s):// or protocol-independent URL
1169 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1170 (?:www\.)?deturl\.com/www\.youtube\.com|
1171 (?:www\.)?pwnyoutube\.com|
1172 (?:www\.)?hooktube\.com|
1173 (?:www\.)?yourepeat\.com|
1174 tube\.majestyc\.net|
1175 {invidious}|
1176 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1177 (?:.*?\#/)? # handle anchor (#/) redirect urls
1178 (?: # the various things that can precede the ID:
1179 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1180 |(?: # or the v= param in all its forms
1181 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1182 (?:\?|\#!?) # the params delimiter ? or # or #!
1183 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1187 |(?:
1188 youtu\.be| # just youtu.be/xxxx
1189 vid\.plus| # or vid.plus/xxxx
1190 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1191 {invidious}
1193 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1195 )? # all until now is optional -> you can pass the naked ID
1196 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1197 (?(1).+)? # if we found the ID, everything can follow
1198 (?:\#|$)'''.format(
1199 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1201 _EMBED_REGEX = [
1202 r'''(?x)
1204 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1205 data-video-url=|
1206 <embed[^>]+?src=|
1207 embedSWF\(?:\s*|
1208 <object[^>]+data=|
1209 new\s+SWFObject\(
1211 (["\'])
1212 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1213 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1214 \1''',
1215 # https://wordpress.org/plugins/lazy-load-for-videos/
1216 r'''(?xs)
1217 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1218 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1220 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1222 _PLAYER_INFO_RE = (
1223 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1224 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1225 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1227 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1228 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1229 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1230 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1231 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1232 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1233 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1234 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1235 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1236 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1237 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1238 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1239 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1240 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1241 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1242 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1243 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1244 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1245 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1248 # 3D videos
1249 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1250 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1251 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1252 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1253 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1254 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1255 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1257 # Apple HTTP Live Streaming
1258 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1267 # DASH mp4 video
1268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1275 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1276 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1277 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1278 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1279 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1281 # Dash mp4 audio
1282 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1283 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1284 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1285 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1286 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1287 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1288 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1290 # Dash webm
1291 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1292 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1293 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1294 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1295 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1296 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1297 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1298 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1299 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1300 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1301 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1302 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1303 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1304 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1305 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1306 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1307 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1308 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1309 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1310 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1311 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1312 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1314 # Dash webm audio
1315 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1316 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1318 # Dash webm audio with opus inside
1319 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1320 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1321 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1323 # RTMP (unnamed)
1324 '_rtmp': {'protocol': 'rtmp'},
1326 # av01 video only formats sometimes served with "unknown" codecs
1327 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1328 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1329 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1330 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1331 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1332 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1333 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1334 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1336 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1337 _POTOKEN_EXPERIMENTS = ('51217476', '51217102')
1338 _BROKEN_CLIENTS = {
1339 short_client_name(client): client
1340 for client in ('android', 'android_creator', 'android_music')
1343 _GEO_BYPASS = False
1345 IE_NAME = 'youtube'
1346 _TESTS = [
1348 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1349 'info_dict': {
1350 'id': 'BaW_jenozKc',
1351 'ext': 'mp4',
1352 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1353 'channel': 'Philipp Hagemeister',
1354 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1355 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1356 'upload_date': '20121002',
1357 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1358 'categories': ['Science & Technology'],
1359 'tags': ['youtube-dl'],
1360 'duration': 10,
1361 'view_count': int,
1362 'like_count': int,
1363 'availability': 'public',
1364 'playable_in_embed': True,
1365 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1366 'live_status': 'not_live',
1367 'age_limit': 0,
1368 'start_time': 1,
1369 'end_time': 9,
1370 'comment_count': int,
1371 'channel_follower_count': int,
1372 'uploader': 'Philipp Hagemeister',
1373 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1374 'uploader_id': '@PhilippHagemeister',
1375 'heatmap': 'count:100',
1376 'timestamp': 1349198244,
1380 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1381 'note': 'Embed-only video (#1746)',
1382 'info_dict': {
1383 'id': 'yZIXLfi8CZQ',
1384 'ext': 'mp4',
1385 'upload_date': '20120608',
1386 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1387 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1388 'age_limit': 18,
1390 'skip': 'Private video',
1393 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1394 'note': 'Use the first video ID in the URL',
1395 'info_dict': {
1396 'id': 'BaW_jenozKc',
1397 'ext': 'mp4',
1398 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1399 'channel': 'Philipp Hagemeister',
1400 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1401 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1402 'upload_date': '20121002',
1403 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1404 'categories': ['Science & Technology'],
1405 'tags': ['youtube-dl'],
1406 'duration': 10,
1407 'view_count': int,
1408 'like_count': int,
1409 'availability': 'public',
1410 'playable_in_embed': True,
1411 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1412 'live_status': 'not_live',
1413 'age_limit': 0,
1414 'comment_count': int,
1415 'channel_follower_count': int,
1416 'uploader': 'Philipp Hagemeister',
1417 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1418 'uploader_id': '@PhilippHagemeister',
1419 'heatmap': 'count:100',
1420 'timestamp': 1349198244,
1422 'params': {
1423 'skip_download': True,
1427 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1428 'note': '256k DASH audio (format 141) via DASH manifest',
1429 'info_dict': {
1430 'id': 'a9LDPn-MO4I',
1431 'ext': 'm4a',
1432 'upload_date': '20121002',
1433 'description': '',
1434 'title': 'UHDTV TEST 8K VIDEO.mp4',
1436 'params': {
1437 'youtube_include_dash_manifest': True,
1438 'format': '141',
1440 'skip': 'format 141 not served anymore',
1442 # DASH manifest with encrypted signature
1444 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1445 'info_dict': {
1446 'id': 'IB3lcPjvWLA',
1447 'ext': 'm4a',
1448 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1449 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1450 'duration': 244,
1451 'upload_date': '20131011',
1452 'abr': 129.495,
1453 'like_count': int,
1454 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1455 'playable_in_embed': True,
1456 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1457 'view_count': int,
1458 'track': 'The Spark',
1459 'live_status': 'not_live',
1460 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1461 'channel': 'Afrojack',
1462 'tags': 'count:19',
1463 'availability': 'public',
1464 'categories': ['Music'],
1465 'age_limit': 0,
1466 'alt_title': 'The Spark',
1467 'channel_follower_count': int,
1468 'uploader': 'Afrojack',
1469 'uploader_url': 'https://www.youtube.com/@Afrojack',
1470 'uploader_id': '@Afrojack',
1472 'params': {
1473 'youtube_include_dash_manifest': True,
1474 'format': '141/bestaudio[ext=m4a]',
1477 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1479 'note': 'Embed allowed age-gate video',
1480 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1481 'info_dict': {
1482 'id': 'HtVdAasjOgU',
1483 'ext': 'mp4',
1484 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1485 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1486 'duration': 142,
1487 'upload_date': '20140605',
1488 'age_limit': 18,
1489 'categories': ['Gaming'],
1490 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1491 'availability': 'needs_auth',
1492 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1493 'like_count': int,
1494 'channel': 'The Witcher',
1495 'live_status': 'not_live',
1496 'tags': 'count:17',
1497 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1498 'playable_in_embed': True,
1499 'view_count': int,
1500 'channel_follower_count': int,
1501 'uploader': 'The Witcher',
1502 'uploader_url': 'https://www.youtube.com/@thewitcher',
1503 'uploader_id': '@thewitcher',
1504 'comment_count': int,
1505 'channel_is_verified': True,
1506 'heatmap': 'count:100',
1507 'timestamp': 1401991663,
1511 'note': 'Age-gate video with embed allowed in public site',
1512 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1513 'info_dict': {
1514 'id': 'HsUATh_Nc2U',
1515 'ext': 'mp4',
1516 'title': 'Godzilla 2 (Official Video)',
1517 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1518 'upload_date': '20200408',
1519 'age_limit': 18,
1520 'availability': 'needs_auth',
1521 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1522 'channel': 'FlyingKitty',
1523 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1524 'view_count': int,
1525 'categories': ['Entertainment'],
1526 'live_status': 'not_live',
1527 'tags': ['Flyingkitty', 'godzilla 2'],
1528 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1529 'like_count': int,
1530 'duration': 177,
1531 'playable_in_embed': True,
1532 'channel_follower_count': int,
1533 'uploader': 'FlyingKitty',
1534 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1535 'uploader_id': '@FlyingKitty900',
1536 'comment_count': int,
1537 'channel_is_verified': True,
1541 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1542 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1543 'info_dict': {
1544 'id': 'Tq92D6wQ1mg',
1545 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1546 'ext': 'mp4',
1547 'upload_date': '20191228',
1548 'description': 'md5:17eccca93a786d51bc67646756894066',
1549 'age_limit': 18,
1550 'like_count': int,
1551 'availability': 'needs_auth',
1552 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1553 'view_count': int,
1554 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1555 'channel': 'Projekt Melody',
1556 'live_status': 'not_live',
1557 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1558 'playable_in_embed': True,
1559 'categories': ['Entertainment'],
1560 'duration': 106,
1561 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1562 'comment_count': int,
1563 'channel_follower_count': int,
1564 'uploader': 'Projekt Melody',
1565 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1566 'uploader_id': '@ProjektMelody',
1567 'timestamp': 1577508724,
1571 'note': 'Non-Agegated non-embeddable video',
1572 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1573 'info_dict': {
1574 'id': 'MeJVWBSsPAY',
1575 'ext': 'mp4',
1576 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1577 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1578 'upload_date': '20130730',
1579 'track': 'Such mich find mich',
1580 'age_limit': 0,
1581 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1582 'like_count': int,
1583 'playable_in_embed': False,
1584 'creator': 'OOMPH!',
1585 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1586 'view_count': int,
1587 'alt_title': 'Such mich find mich',
1588 'duration': 210,
1589 'channel': 'Herr Lurik',
1590 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1591 'categories': ['Music'],
1592 'availability': 'public',
1593 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1594 'live_status': 'not_live',
1595 'artist': 'OOMPH!',
1596 'channel_follower_count': int,
1597 'uploader': 'Herr Lurik',
1598 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1599 'uploader_id': '@HerrLurik',
1603 'note': 'Non-bypassable age-gated video',
1604 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1605 'only_matching': True,
1607 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1608 # YouTube Red ad is not captured for creator
1610 'url': '__2ABJjxzNo',
1611 'info_dict': {
1612 'id': '__2ABJjxzNo',
1613 'ext': 'mp4',
1614 'duration': 266,
1615 'upload_date': '20100430',
1616 'creator': 'deadmau5',
1617 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1618 'title': 'Deadmau5 - Some Chords (HD)',
1619 'alt_title': 'Some Chords',
1620 'availability': 'public',
1621 'tags': 'count:14',
1622 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1623 'view_count': int,
1624 'live_status': 'not_live',
1625 'channel': 'deadmau5',
1626 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1627 'like_count': int,
1628 'track': 'Some Chords',
1629 'artist': 'deadmau5',
1630 'playable_in_embed': True,
1631 'age_limit': 0,
1632 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1633 'categories': ['Music'],
1634 'album': 'Some Chords',
1635 'channel_follower_count': int,
1636 'uploader': 'deadmau5',
1637 'uploader_url': 'https://www.youtube.com/@deadmau5',
1638 'uploader_id': '@deadmau5',
1640 'expected_warnings': [
1641 'DASH manifest missing',
1644 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1646 'url': 'lqQg6PlCWgI',
1647 'info_dict': {
1648 'id': 'lqQg6PlCWgI',
1649 'ext': 'mp4',
1650 'duration': 6085,
1651 'upload_date': '20150827',
1652 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1653 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1654 'like_count': int,
1655 'release_timestamp': 1343767800,
1656 'playable_in_embed': True,
1657 'categories': ['Sports'],
1658 'release_date': '20120731',
1659 'channel': 'Olympics',
1660 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1661 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1662 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1663 'age_limit': 0,
1664 'availability': 'public',
1665 'live_status': 'was_live',
1666 'view_count': int,
1667 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1668 'channel_follower_count': int,
1669 'uploader': 'Olympics',
1670 'uploader_url': 'https://www.youtube.com/@Olympics',
1671 'uploader_id': '@Olympics',
1672 'channel_is_verified': True,
1673 'timestamp': 1440707674,
1675 'params': {
1676 'skip_download': 'requires avconv',
1679 # Non-square pixels
1681 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1682 'info_dict': {
1683 'id': '_b-2C3KPAM0',
1684 'ext': 'mp4',
1685 'stretched_ratio': 16 / 9.,
1686 'duration': 85,
1687 'upload_date': '20110310',
1688 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1689 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1690 'playable_in_embed': True,
1691 'channel': '孫ᄋᄅ',
1692 'age_limit': 0,
1693 'tags': 'count:11',
1694 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1695 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1696 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1697 'view_count': int,
1698 'categories': ['People & Blogs'],
1699 'like_count': int,
1700 'live_status': 'not_live',
1701 'availability': 'unlisted',
1702 'comment_count': int,
1703 'channel_follower_count': int,
1704 'uploader': '孫ᄋᄅ',
1705 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1706 'uploader_id': '@AllenMeow',
1707 'timestamp': 1299776999,
1710 # url_encoded_fmt_stream_map is empty string
1712 'url': 'qEJwOuvDf7I',
1713 'info_dict': {
1714 'id': 'qEJwOuvDf7I',
1715 'ext': 'webm',
1716 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1717 'description': '',
1718 'upload_date': '20150404',
1720 'params': {
1721 'skip_download': 'requires avconv',
1723 'skip': 'This live event has ended.',
1725 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1727 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1728 'info_dict': {
1729 'id': 'FIl7x6_3R5Y',
1730 'ext': 'webm',
1731 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1732 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1733 'duration': 220,
1734 'upload_date': '20150625',
1735 'formats': 'mincount:31',
1737 'skip': 'not actual anymore',
1739 # DASH manifest with segment_list
1741 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1742 'md5': '8ce563a1d667b599d21064e982ab9e31',
1743 'info_dict': {
1744 'id': 'CsmdDsKjzN8',
1745 'ext': 'mp4',
1746 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1747 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1748 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1750 'params': {
1751 'youtube_include_dash_manifest': True,
1752 'format': '135', # bestvideo
1754 'skip': 'This live event has ended.',
1757 # Multifeed videos (multiple cameras), URL can be of any Camera
1758 # TODO: fix multifeed titles
1759 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1760 'info_dict': {
1761 'id': 'zaPI8MvL8pg',
1762 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1763 'description': 'md5:563ccbc698b39298481ca3c571169519',
1765 'playlist': [{
1766 'info_dict': {
1767 'id': 'j5yGuxZ8lLU',
1768 'ext': 'mp4',
1769 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1770 'description': 'md5:563ccbc698b39298481ca3c571169519',
1771 'duration': 10120,
1772 'channel_follower_count': int,
1773 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1774 'availability': 'public',
1775 'playable_in_embed': True,
1776 'upload_date': '20131105',
1777 'categories': ['Gaming'],
1778 'live_status': 'was_live',
1779 'tags': 'count:24',
1780 'release_timestamp': 1383701910,
1781 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1782 'comment_count': int,
1783 'age_limit': 0,
1784 'like_count': int,
1785 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1786 'channel': 'WiiLikeToPlay',
1787 'view_count': int,
1788 'release_date': '20131106',
1789 'uploader': 'WiiLikeToPlay',
1790 'uploader_id': '@WLTP',
1791 'uploader_url': 'https://www.youtube.com/@WLTP',
1793 }, {
1794 'info_dict': {
1795 'id': 'zaPI8MvL8pg',
1796 'ext': 'mp4',
1797 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1798 'availability': 'public',
1799 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1800 'channel': 'WiiLikeToPlay',
1801 'channel_follower_count': int,
1802 'description': 'md5:563ccbc698b39298481ca3c571169519',
1803 'duration': 10108,
1804 'age_limit': 0,
1805 'like_count': int,
1806 'tags': 'count:24',
1807 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1808 'release_timestamp': 1383701915,
1809 'comment_count': int,
1810 'upload_date': '20131105',
1811 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1812 'release_date': '20131106',
1813 'playable_in_embed': True,
1814 'live_status': 'was_live',
1815 'categories': ['Gaming'],
1816 'view_count': int,
1817 'uploader': 'WiiLikeToPlay',
1818 'uploader_id': '@WLTP',
1819 'uploader_url': 'https://www.youtube.com/@WLTP',
1821 }, {
1822 'info_dict': {
1823 'id': 'R7r3vfO7Hao',
1824 'ext': 'mp4',
1825 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1826 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1827 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1828 'like_count': int,
1829 'availability': 'public',
1830 'playable_in_embed': True,
1831 'upload_date': '20131105',
1832 'description': 'md5:563ccbc698b39298481ca3c571169519',
1833 'channel_follower_count': int,
1834 'tags': 'count:24',
1835 'release_date': '20131106',
1836 'comment_count': int,
1837 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1838 'channel': 'WiiLikeToPlay',
1839 'categories': ['Gaming'],
1840 'release_timestamp': 1383701914,
1841 'live_status': 'was_live',
1842 'age_limit': 0,
1843 'duration': 10128,
1844 'view_count': int,
1845 'uploader': 'WiiLikeToPlay',
1846 'uploader_id': '@WLTP',
1847 'uploader_url': 'https://www.youtube.com/@WLTP',
1850 'params': {'skip_download': True},
1851 'skip': 'Not multifeed anymore',
1854 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1855 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1856 'info_dict': {
1857 'id': 'gVfLd0zydlo',
1858 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1860 'playlist_count': 2,
1861 'skip': 'Not multifeed anymore',
1864 'url': 'https://vid.plus/FlRa-iH7PGw',
1865 'only_matching': True,
1868 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1869 'only_matching': True,
1872 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1873 # Also tests cut-off URL expansion in video description (see
1874 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1875 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1876 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1877 'info_dict': {
1878 'id': 'lsguqyKfVQg',
1879 'ext': 'mp4',
1880 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1881 'alt_title': 'Dark Walk',
1882 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1883 'duration': 133,
1884 'upload_date': '20151119',
1885 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1886 'track': 'Dark Walk',
1887 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1888 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1889 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1890 'categories': ['Film & Animation'],
1891 'view_count': int,
1892 'live_status': 'not_live',
1893 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1894 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1895 'tags': 'count:13',
1896 'availability': 'public',
1897 'channel': 'IronSoulElf',
1898 'playable_in_embed': True,
1899 'like_count': int,
1900 'age_limit': 0,
1901 'channel_follower_count': int,
1903 'params': {
1904 'skip_download': True,
1908 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1909 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1910 'only_matching': True,
1913 # Video with yt:stretch=17:0
1914 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1915 'info_dict': {
1916 'id': 'Q39EVAstoRM',
1917 'ext': 'mp4',
1918 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1919 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1920 'upload_date': '20151107',
1922 'params': {
1923 'skip_download': True,
1925 'skip': 'This video does not exist.',
1928 # Video with incomplete 'yt:stretch=16:'
1929 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1930 'only_matching': True,
1933 # Video licensed under Creative Commons
1934 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1935 'info_dict': {
1936 'id': 'M4gD1WSo5mA',
1937 'ext': 'mp4',
1938 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1939 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1940 'duration': 721,
1941 'upload_date': '20150128',
1942 'license': 'Creative Commons Attribution license (reuse allowed)',
1943 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1944 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1945 'like_count': int,
1946 'age_limit': 0,
1947 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1948 'channel': 'The Berkman Klein Center for Internet & Society',
1949 'availability': 'public',
1950 'view_count': int,
1951 'categories': ['Education'],
1952 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1953 'live_status': 'not_live',
1954 'playable_in_embed': True,
1955 'channel_follower_count': int,
1956 'chapters': list,
1957 'uploader': 'The Berkman Klein Center for Internet & Society',
1958 'uploader_id': '@BKCHarvard',
1959 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1960 'timestamp': 1422422076,
1962 'params': {
1963 'skip_download': True,
1967 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1968 'info_dict': {
1969 'id': 'eQcmzGIKrzg',
1970 'ext': 'mp4',
1971 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1972 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1973 'duration': 4060,
1974 'upload_date': '20151120',
1975 'license': 'Creative Commons Attribution license (reuse allowed)',
1976 'playable_in_embed': True,
1977 'tags': 'count:12',
1978 'like_count': int,
1979 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1980 'age_limit': 0,
1981 'availability': 'public',
1982 'categories': ['News & Politics'],
1983 'channel': 'Bernie Sanders',
1984 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1985 'view_count': int,
1986 'live_status': 'not_live',
1987 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1988 'comment_count': int,
1989 'channel_follower_count': int,
1990 'chapters': list,
1991 'uploader': 'Bernie Sanders',
1992 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1993 'uploader_id': '@BernieSanders',
1994 'channel_is_verified': True,
1995 'heatmap': 'count:100',
1996 'timestamp': 1447987198,
1998 'params': {
1999 'skip_download': True,
2003 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
2004 'only_matching': True,
2007 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2008 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
2009 'only_matching': True,
2012 # Rental video preview
2013 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
2014 'info_dict': {
2015 'id': 'uGpuVWrhIzE',
2016 'ext': 'mp4',
2017 'title': 'Piku - Trailer',
2018 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
2019 'upload_date': '20150811',
2020 'license': 'Standard YouTube License',
2022 'params': {
2023 'skip_download': True,
2025 'skip': 'This video is not available.',
2028 # YouTube Red video with episode data
2029 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
2030 'info_dict': {
2031 'id': 'iqKdEhx-dD4',
2032 'ext': 'mp4',
2033 'title': 'Isolation - Mind Field (Ep 1)',
2034 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
2035 'duration': 2085,
2036 'upload_date': '20170118',
2037 'series': 'Mind Field',
2038 'season_number': 1,
2039 'episode_number': 1,
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
2041 'tags': 'count:12',
2042 'view_count': int,
2043 'availability': 'public',
2044 'age_limit': 0,
2045 'channel': 'Vsauce',
2046 'episode': 'Episode 1',
2047 'categories': ['Entertainment'],
2048 'season': 'Season 1',
2049 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2050 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2051 'like_count': int,
2052 'playable_in_embed': True,
2053 'live_status': 'not_live',
2054 'channel_follower_count': int,
2055 'uploader': 'Vsauce',
2056 'uploader_url': 'https://www.youtube.com/@Vsauce',
2057 'uploader_id': '@Vsauce',
2058 'comment_count': int,
2059 'channel_is_verified': True,
2060 'timestamp': 1484761047,
2062 'params': {
2063 'skip_download': True,
2065 'expected_warnings': [
2066 'Skipping DASH manifest',
2070 # The following content has been identified by the YouTube community
2071 # as inappropriate or offensive to some audiences.
2072 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2073 'info_dict': {
2074 'id': '6SJNVb0GnPI',
2075 'ext': 'mp4',
2076 'title': 'Race Differences in Intelligence',
2077 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2078 'duration': 965,
2079 'upload_date': '20140124',
2081 'params': {
2082 'skip_download': True,
2084 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2087 # itag 212
2088 'url': '1t24XAntNCY',
2089 'only_matching': True,
2092 # geo restricted to JP
2093 'url': 'sJL6WA-aGkQ',
2094 'only_matching': True,
2097 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2098 'only_matching': True,
2101 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2102 'only_matching': True,
2105 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2106 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2107 'only_matching': True,
2110 # DRM protected
2111 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2112 'only_matching': True,
2115 # Video with unsupported adaptive stream type formats
2116 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2117 'info_dict': {
2118 'id': 'Z4Vy8R84T1U',
2119 'ext': 'mp4',
2120 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2121 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2122 'duration': 433,
2123 'upload_date': '20130923',
2124 'formats': 'maxcount:10',
2126 'params': {
2127 'skip_download': True,
2128 'youtube_include_dash_manifest': False,
2130 'skip': 'not actual anymore',
2133 # Youtube Music Auto-generated description
2134 # TODO: fix metadata extraction
2135 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2136 'info_dict': {
2137 'id': 'MgNrAu2pzNs',
2138 'ext': 'mp4',
2139 'title': 'Voyeur Girl',
2140 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2141 'upload_date': '20190312',
2142 'artists': ['Stephen'],
2143 'creators': ['Stephen'],
2144 'track': 'Voyeur Girl',
2145 'album': 'it\'s too much love to know my dear',
2146 'release_date': '20190313',
2147 'alt_title': 'Voyeur Girl',
2148 'view_count': int,
2149 'playable_in_embed': True,
2150 'like_count': int,
2151 'categories': ['Music'],
2152 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2153 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2154 'uploader': 'Stephen',
2155 'availability': 'public',
2156 'duration': 169,
2157 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2158 'age_limit': 0,
2159 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2160 'tags': 'count:11',
2161 'live_status': 'not_live',
2162 'channel_follower_count': int,
2164 'params': {
2165 'skip_download': True,
2169 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2170 'only_matching': True,
2173 # invalid -> valid video id redirection
2174 'url': 'DJztXj2GPfl',
2175 'info_dict': {
2176 'id': 'DJztXj2GPfk',
2177 'ext': 'mp4',
2178 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2179 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2180 'upload_date': '20090125',
2181 'artist': 'Panjabi MC',
2182 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2183 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2185 'params': {
2186 'skip_download': True,
2188 'skip': 'Video unavailable',
2191 # empty description results in an empty string
2192 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2193 'info_dict': {
2194 'id': 'x41yOUIvK2k',
2195 'ext': 'mp4',
2196 'title': 'IMG 3456',
2197 'description': '',
2198 'upload_date': '20170613',
2199 'view_count': int,
2200 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2201 'like_count': int,
2202 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2203 'tags': [],
2204 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2205 'availability': 'public',
2206 'age_limit': 0,
2207 'categories': ['Pets & Animals'],
2208 'duration': 7,
2209 'playable_in_embed': True,
2210 'live_status': 'not_live',
2211 'channel': 'l\'Or Vert asbl',
2212 'channel_follower_count': int,
2213 'uploader': 'l\'Or Vert asbl',
2214 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2215 'uploader_id': '@ElevageOrVert',
2216 'timestamp': 1497343210,
2218 'params': {
2219 'skip_download': True,
2223 # with '};' inside yt initial data (see [1])
2224 # see [2] for an example with '};' inside ytInitialPlayerResponse
2225 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2226 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2227 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2228 'info_dict': {
2229 'id': 'CHqg6qOn4no',
2230 'ext': 'mp4',
2231 'title': 'Part 77 Sort a list of simple types in c#',
2232 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2233 'upload_date': '20130831',
2234 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2235 'like_count': int,
2236 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2237 'live_status': 'not_live',
2238 'categories': ['Education'],
2239 'availability': 'public',
2240 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2241 'tags': 'count:12',
2242 'playable_in_embed': True,
2243 'age_limit': 0,
2244 'view_count': int,
2245 'duration': 522,
2246 'channel': 'kudvenkat',
2247 'comment_count': int,
2248 'channel_follower_count': int,
2249 'chapters': list,
2250 'uploader': 'kudvenkat',
2251 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2252 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2253 'channel_is_verified': True,
2254 'heatmap': 'count:100',
2255 'timestamp': 1377976349,
2257 'params': {
2258 'skip_download': True,
2262 # another example of '};' in ytInitialData
2263 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2264 'only_matching': True,
2267 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2268 'only_matching': True,
2271 # https://github.com/ytdl-org/youtube-dl/pull/28094
2272 'url': 'OtqTfy26tG0',
2273 'info_dict': {
2274 'id': 'OtqTfy26tG0',
2275 'ext': 'mp4',
2276 'title': 'Burn Out',
2277 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2278 'upload_date': '20141120',
2279 'artist': 'The Cinematic Orchestra',
2280 'track': 'Burn Out',
2281 'album': 'Every Day',
2282 'like_count': int,
2283 'live_status': 'not_live',
2284 'alt_title': 'Burn Out',
2285 'duration': 614,
2286 'age_limit': 0,
2287 'view_count': int,
2288 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2289 'creator': 'The Cinematic Orchestra',
2290 'channel': 'The Cinematic Orchestra',
2291 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2292 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2293 'availability': 'public',
2294 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2295 'categories': ['Music'],
2296 'playable_in_embed': True,
2297 'channel_follower_count': int,
2298 'uploader': 'The Cinematic Orchestra',
2299 'comment_count': int,
2301 'params': {
2302 'skip_download': True,
2306 # controversial video, only works with bpctr when authenticated with cookies
2307 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2308 'only_matching': True,
2311 # controversial video, requires bpctr/contentCheckOk
2312 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2313 'info_dict': {
2314 'id': 'SZJvDhaSDnc',
2315 'ext': 'mp4',
2316 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2317 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2318 'upload_date': '20140716',
2319 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2320 'duration': 170,
2321 'categories': ['News & Politics'],
2322 'view_count': int,
2323 'channel': 'CBS Mornings',
2324 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2325 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2326 'age_limit': 18,
2327 'availability': 'needs_auth',
2328 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2329 'like_count': int,
2330 'live_status': 'not_live',
2331 'playable_in_embed': True,
2332 'channel_follower_count': int,
2333 'uploader': 'CBS Mornings',
2334 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2335 'uploader_id': '@CBSMornings',
2336 'comment_count': int,
2337 'channel_is_verified': True,
2338 'timestamp': 1405513526,
2342 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2343 'url': 'cBvYw8_A0vQ',
2344 'info_dict': {
2345 'id': 'cBvYw8_A0vQ',
2346 'ext': 'mp4',
2347 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2348 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2349 'upload_date': '20201120',
2350 'duration': 1456,
2351 'categories': ['Travel & Events'],
2352 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2353 'view_count': int,
2354 'channel': 'Walk around Japan',
2355 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2356 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2357 'age_limit': 0,
2358 'availability': 'public',
2359 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2360 'live_status': 'not_live',
2361 'playable_in_embed': True,
2362 'channel_follower_count': int,
2363 'uploader': 'Walk around Japan',
2364 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2365 'uploader_id': '@walkaroundjapan7124',
2366 'timestamp': 1605884416,
2368 'params': {
2369 'skip_download': True,
2371 }, {
2372 # Has multiple audio streams
2373 'url': 'WaOKSUlf4TM',
2374 'only_matching': True,
2375 }, {
2376 # Requires Premium: has format 141 when requested using YTM url
2377 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2378 'only_matching': True,
2379 }, {
2380 # multiple subtitles with same lang_code
2381 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2382 'only_matching': True,
2383 }, {
2384 # Force use android client fallback
2385 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2386 'info_dict': {
2387 'id': 'YOelRv7fMxY',
2388 'title': 'DIGGING A SECRET TUNNEL Part 1',
2389 'ext': '3gp',
2390 'upload_date': '20210624',
2391 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2392 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2393 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2394 'duration': 596,
2395 'categories': ['Entertainment'],
2396 'view_count': int,
2397 'channel': 'colinfurze',
2398 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2399 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2400 'age_limit': 0,
2401 'availability': 'public',
2402 'like_count': int,
2403 'live_status': 'not_live',
2404 'playable_in_embed': True,
2405 'channel_follower_count': int,
2406 'chapters': list,
2407 'uploader': 'colinfurze',
2408 'uploader_url': 'https://www.youtube.com/@colinfurze',
2409 'uploader_id': '@colinfurze',
2410 'comment_count': int,
2411 'channel_is_verified': True,
2412 'heatmap': 'count:100',
2414 'params': {
2415 'format': '17', # 3gp format available on android
2416 'extractor_args': {'youtube': {'player_client': ['android']}},
2418 'skip': 'android client broken',
2421 # Skip download of additional client configs (remix client config in this case)
2422 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2423 'only_matching': True,
2424 'params': {
2425 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2427 }, {
2428 # shorts
2429 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2430 'only_matching': True,
2431 }, {
2432 'note': 'Storyboards',
2433 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2434 'info_dict': {
2435 'id': '5KLPxDtMqe8',
2436 'ext': 'mhtml',
2437 'format_id': 'sb0',
2438 'title': 'Your Brain is Plastic',
2439 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2440 'upload_date': '20140324',
2441 'like_count': int,
2442 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2443 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2444 'view_count': int,
2445 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2446 'playable_in_embed': True,
2447 'tags': 'count:12',
2448 'availability': 'public',
2449 'channel': 'SciShow',
2450 'live_status': 'not_live',
2451 'duration': 248,
2452 'categories': ['Education'],
2453 'age_limit': 0,
2454 'channel_follower_count': int,
2455 'chapters': list,
2456 'uploader': 'SciShow',
2457 'uploader_url': 'https://www.youtube.com/@SciShow',
2458 'uploader_id': '@SciShow',
2459 'comment_count': int,
2460 'channel_is_verified': True,
2461 'heatmap': 'count:100',
2462 'timestamp': 1395685455,
2463 }, 'params': {'format': 'mhtml', 'skip_download': True},
2464 }, {
2465 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2466 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2467 'info_dict': {
2468 'id': '2NUZ8W2llS4',
2469 'ext': 'mp4',
2470 'title': 'The NP that test your phone performance 🙂',
2471 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2472 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2473 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2474 'duration': 21,
2475 'view_count': int,
2476 'age_limit': 0,
2477 'categories': ['Gaming'],
2478 'tags': 'count:23',
2479 'playable_in_embed': True,
2480 'live_status': 'not_live',
2481 'upload_date': '20220103',
2482 'like_count': int,
2483 'availability': 'public',
2484 'channel': 'Leon Nguyen',
2485 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2486 'comment_count': int,
2487 'channel_follower_count': int,
2488 'uploader': 'Leon Nguyen',
2489 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2490 'uploader_id': '@LeonNguyen',
2491 'heatmap': 'count:100',
2492 'timestamp': 1641170939,
2494 }, {
2495 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2496 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2497 'info_dict': {
2498 'id': 'mzZzzBU6lrM',
2499 'ext': 'mp4',
2500 'title': 'I Met GeorgeNotFound In Real Life...',
2501 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2502 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2503 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2504 'duration': 955,
2505 'view_count': int,
2506 'age_limit': 0,
2507 'categories': ['Entertainment'],
2508 'tags': 'count:26',
2509 'playable_in_embed': True,
2510 'live_status': 'not_live',
2511 'release_timestamp': 1641172509,
2512 'release_date': '20220103',
2513 'upload_date': '20220103',
2514 'like_count': int,
2515 'availability': 'public',
2516 'channel': 'Quackity',
2517 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2518 'channel_follower_count': int,
2519 'uploader': 'Quackity',
2520 'uploader_id': '@Quackity',
2521 'uploader_url': 'https://www.youtube.com/@Quackity',
2522 'comment_count': int,
2523 'channel_is_verified': True,
2524 'heatmap': 'count:100',
2525 'timestamp': 1641172509,
2528 { # continuous livestream.
2529 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2530 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2531 'info_dict': {
2532 'id': 'jfKfPfyJRdk',
2533 'ext': 'mp4',
2534 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2535 'like_count': int,
2536 'uploader': 'Lofi Girl',
2537 'categories': ['Music'],
2538 'concurrent_view_count': int,
2539 'playable_in_embed': True,
2540 'timestamp': 1657627949,
2541 'release_date': '20220712',
2542 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2543 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2544 'age_limit': 0,
2545 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2546 'release_timestamp': 1657641570,
2547 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2548 'channel_follower_count': int,
2549 'channel_is_verified': True,
2550 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2551 'view_count': int,
2552 'live_status': 'is_live',
2553 'tags': 'count:32',
2554 'channel': 'Lofi Girl',
2555 'availability': 'public',
2556 'upload_date': '20220712',
2557 'uploader_id': '@LofiGirl',
2559 'params': {'skip_download': True},
2560 }, {
2561 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2562 'info_dict': {
2563 'id': 'tjjjtzRLHvA',
2564 'ext': 'mp4',
2565 'title': 'ハッシュタグ無し };if window.ytcsi',
2566 'upload_date': '20220323',
2567 'like_count': int,
2568 'availability': 'unlisted',
2569 'channel': 'Lesmiscore',
2570 'thumbnail': r're:^https?://.*\.jpg',
2571 'age_limit': 0,
2572 'categories': ['Music'],
2573 'view_count': int,
2574 'description': '',
2575 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2576 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2577 'live_status': 'not_live',
2578 'playable_in_embed': True,
2579 'channel_follower_count': int,
2580 'duration': 6,
2581 'tags': [],
2582 'uploader_id': '@lesmiscore',
2583 'uploader': 'Lesmiscore',
2584 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2585 'timestamp': 1648005313,
2587 }, {
2588 # Prefer primary title+description language metadata by default
2589 # Do not prefer translated description if primary is empty
2590 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2591 'info_dict': {
2592 'id': 'el3E4MbxRqQ',
2593 'ext': 'mp4',
2594 'title': 'dlp test video 2 - primary sv no desc',
2595 'description': '',
2596 'channel': 'cole-dlp-test-acc',
2597 'tags': [],
2598 'view_count': int,
2599 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2600 'like_count': int,
2601 'playable_in_embed': True,
2602 'availability': 'unlisted',
2603 'thumbnail': r're:^https?://.*\.jpg',
2604 'age_limit': 0,
2605 'duration': 5,
2606 'live_status': 'not_live',
2607 'upload_date': '20220908',
2608 'categories': ['People & Blogs'],
2609 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2610 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2611 'uploader_id': '@coletdjnz',
2612 'uploader': 'cole-dlp-test-acc',
2613 'timestamp': 1662677394,
2615 'params': {'skip_download': True},
2616 }, {
2617 # Extractor argument: prefer translated title+description
2618 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2619 'info_dict': {
2620 'id': 'gHKT4uU8Zng',
2621 'ext': 'mp4',
2622 'channel': 'cole-dlp-test-acc',
2623 'tags': [],
2624 'duration': 5,
2625 'live_status': 'not_live',
2626 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2627 'upload_date': '20220729',
2628 'view_count': int,
2629 'categories': ['People & Blogs'],
2630 'thumbnail': r're:^https?://.*\.jpg',
2631 'title': 'dlp test video title translated (fr)',
2632 'availability': 'public',
2633 'age_limit': 0,
2634 'description': 'dlp test video description translated (fr)',
2635 'playable_in_embed': True,
2636 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2637 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2638 'uploader_id': '@coletdjnz',
2639 'uploader': 'cole-dlp-test-acc',
2640 'timestamp': 1659073275,
2641 'like_count': int,
2643 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2644 'expected_warnings': [r'Preferring "fr" translated fields'],
2645 }, {
2646 'note': '6 channel audio',
2647 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2648 'only_matching': True,
2649 }, {
2650 'note': 'Multiple HLS formats with same itag',
2651 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2652 'info_dict': {
2653 'id': 'kX3nB4PpJko',
2654 'ext': 'mp4',
2655 'categories': ['Entertainment'],
2656 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2657 'live_status': 'not_live',
2658 'duration': 937,
2659 'channel_follower_count': int,
2660 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2661 'title': 'Last To Take Hand Off Jet, Keeps It!',
2662 'channel': 'MrBeast',
2663 'playable_in_embed': True,
2664 'view_count': int,
2665 'upload_date': '20221112',
2666 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2667 'age_limit': 0,
2668 'availability': 'public',
2669 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2670 'like_count': int,
2671 'tags': [],
2672 'uploader': 'MrBeast',
2673 'uploader_url': 'https://www.youtube.com/@MrBeast',
2674 'uploader_id': '@MrBeast',
2675 'comment_count': int,
2676 'channel_is_verified': True,
2677 'heatmap': 'count:100',
2679 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2680 }, {
2681 'note': 'Audio formats with Dynamic Range Compression',
2682 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2683 'info_dict': {
2684 'id': 'Tq92D6wQ1mg',
2685 'ext': 'webm',
2686 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2687 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2688 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2689 'channel_follower_count': int,
2690 'description': 'md5:17eccca93a786d51bc67646756894066',
2691 'upload_date': '20191228',
2692 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2693 'playable_in_embed': True,
2694 'like_count': int,
2695 'categories': ['Entertainment'],
2696 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2697 'age_limit': 18,
2698 'channel': 'Projekt Melody',
2699 'view_count': int,
2700 'availability': 'needs_auth',
2701 'comment_count': int,
2702 'live_status': 'not_live',
2703 'duration': 106,
2704 'uploader': 'Projekt Melody',
2705 'uploader_id': '@ProjektMelody',
2706 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2707 'timestamp': 1577508724,
2709 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2712 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2713 'info_dict': {
2714 'id': 'qVv6vCqciTM',
2715 'ext': 'mp4',
2716 'age_limit': 0,
2717 'comment_count': int,
2718 'chapters': 'count:13',
2719 'upload_date': '20221223',
2720 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2721 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2722 'like_count': int,
2723 'release_date': '20221223',
2724 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2725 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2726 'view_count': int,
2727 'playable_in_embed': True,
2728 'duration': 4438,
2729 'availability': 'public',
2730 'channel_follower_count': int,
2731 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2732 'categories': ['Entertainment'],
2733 'live_status': 'was_live',
2734 'release_timestamp': 1671793345,
2735 'channel': 'さなちゃんねる',
2736 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2737 'uploader': 'さなちゃんねる',
2738 'uploader_url': 'https://www.youtube.com/@sana_natori',
2739 'uploader_id': '@sana_natori',
2740 'channel_is_verified': True,
2741 'heatmap': 'count:100',
2742 'timestamp': 1671798112,
2746 # Fallbacks when webpage and web client is unavailable
2747 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2748 'info_dict': {
2749 'id': 'wSSmNUl9Snw',
2750 'ext': 'mp4',
2751 # 'categories': ['Science & Technology'],
2752 'view_count': int,
2753 'chapters': 'count:2',
2754 'channel': 'Scott Manley',
2755 'like_count': int,
2756 'age_limit': 0,
2757 # 'availability': 'public',
2758 'channel_follower_count': int,
2759 'live_status': 'not_live',
2760 'upload_date': '20170831',
2761 'duration': 682,
2762 'tags': 'count:8',
2763 'uploader_url': 'https://www.youtube.com/@scottmanley',
2764 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2765 'uploader': 'Scott Manley',
2766 'uploader_id': '@scottmanley',
2767 'title': 'The Computer Hack That Saved Apollo 14',
2768 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2769 'thumbnail': r're:^https?://.*\.webp',
2770 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2771 'playable_in_embed': True,
2772 'comment_count': int,
2773 'channel_is_verified': True,
2774 'heatmap': 'count:100',
2776 'params': {
2777 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
2782 _WEBPAGE_TESTS = [
2783 # YouTube <object> embed
2785 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2786 'md5': '873c81d308b979f0e23ee7e620b312a3',
2787 'info_dict': {
2788 'id': 'msN87y-iEx0',
2789 'ext': 'mp4',
2790 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2791 'upload_date': '20080526',
2792 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2793 'age_limit': 0,
2794 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2795 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2796 'playable_in_embed': True,
2797 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2798 'like_count': int,
2799 'comment_count': int,
2800 'channel': 'Christopher Sykes',
2801 'live_status': 'not_live',
2802 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2803 'availability': 'public',
2804 'duration': 195,
2805 'view_count': int,
2806 'categories': ['Science & Technology'],
2807 'channel_follower_count': int,
2808 'uploader': 'Christopher Sykes',
2809 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2810 'uploader_id': '@ChristopherSykesDocumentaries',
2811 'heatmap': 'count:100',
2812 'timestamp': 1211825920,
2814 'params': {
2815 'skip_download': True,
2820 @classmethod
2821 def suitable(cls, url):
2822 from ..utils import parse_qs
2824 qs = parse_qs(url)
2825 if qs.get('list', [None])[0]:
2826 return False
2827 return super().suitable(url)
2829 def __init__(self, *args, **kwargs):
2830 super().__init__(*args, **kwargs)
2831 self._code_cache = {}
2832 self._player_cache = {}
2834 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2835 lock = threading.Lock()
2836 start_time = time.time()
2837 formats = [f for f in formats if f.get('is_from_start')]
2839 def refetch_manifest(format_id, delay):
2840 nonlocal formats, start_time, is_live
2841 if time.time() <= start_time + delay:
2842 return
2844 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2845 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2846 microformats = traverse_obj(
2847 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2848 expected_type=dict)
2849 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2850 is_live = live_status == 'is_live'
2851 start_time = time.time()
2853 def mpd_feed(format_id, delay):
2855 @returns (manifest_url, manifest_stream_number, is_live) or None
2857 for retry in self.RetryManager(fatal=False):
2858 with lock:
2859 refetch_manifest(format_id, delay)
2861 f = next((f for f in formats if f['format_id'] == format_id), None)
2862 if not f:
2863 if not is_live:
2864 retry.error = f'{video_id}: Video is no longer live'
2865 else:
2866 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2867 continue
2868 return f['manifest_url'], f['manifest_stream_number'], is_live
2869 return None
2871 for f in formats:
2872 f['is_live'] = is_live
2873 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2874 live_start_time, mpd_feed, not is_live and f.copy())
2875 if is_live:
2876 f['fragments'] = gen
2877 f['protocol'] = 'http_dash_segments_generator'
2878 else:
2879 f['fragments'] = LazyList(gen({}))
2880 del f['is_from_start']
2882 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2883 FETCH_SPAN, MAX_DURATION = 5, 432000
2885 mpd_url, stream_number, is_live = None, None, True
2887 begin_index = 0
2888 download_start_time = ctx.get('start') or time.time()
2890 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2891 if lack_early_segments:
2892 self.report_warning(bug_reports_message(
2893 'Starting download from the last 120 hours of the live stream since '
2894 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2895 lack_early_segments = True
2897 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2898 fragments, fragment_base_url = None, None
2900 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2901 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2902 # Obtain from MPD's maximum seq value
2903 old_mpd_url = mpd_url
2904 last_error = ctx.pop('last_error', None)
2905 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2906 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2907 or (mpd_url, stream_number, False))
2908 if not refresh_sequence:
2909 if expire_fast and not is_live:
2910 return False, last_seq
2911 elif old_mpd_url == mpd_url:
2912 return True, last_seq
2913 if manifestless_orig_fmt:
2914 fmt_info = manifestless_orig_fmt
2915 else:
2916 try:
2917 fmts, _ = self._extract_mpd_formats_and_subtitles(
2918 mpd_url, None, note=False, errnote=False, fatal=False)
2919 except ExtractorError:
2920 fmts = None
2921 if not fmts:
2922 no_fragment_score += 2
2923 return False, last_seq
2924 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2925 fragments = fmt_info['fragments']
2926 fragment_base_url = fmt_info['fragment_base_url']
2927 assert fragment_base_url
2929 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2930 return True, _last_seq
2932 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2933 while is_live:
2934 fetch_time = time.time()
2935 if no_fragment_score > 30:
2936 return
2937 if last_segment_url:
2938 # Obtain from "X-Head-Seqnum" header value from each segment
2939 try:
2940 urlh = self._request_webpage(
2941 last_segment_url, None, note=False, errnote=False, fatal=False)
2942 except ExtractorError:
2943 urlh = None
2944 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2945 if last_seq is None:
2946 no_fragment_score += 2
2947 last_segment_url = None
2948 continue
2949 else:
2950 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2951 no_fragment_score += 2
2952 if not should_continue:
2953 continue
2955 if known_idx > last_seq:
2956 last_segment_url = None
2957 continue
2959 last_seq += 1
2961 if begin_index < 0 and known_idx < 0:
2962 # skip from the start when it's negative value
2963 known_idx = last_seq + begin_index
2964 if lack_early_segments:
2965 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2966 try:
2967 for idx in range(known_idx, last_seq):
2968 # do not update sequence here or you'll get skipped some part of it
2969 should_continue, _ = _extract_sequence_from_mpd(False, False)
2970 if not should_continue:
2971 known_idx = idx - 1
2972 raise ExtractorError('breaking out of outer loop')
2973 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
2974 yield {
2975 'url': last_segment_url,
2976 'fragment_count': last_seq,
2978 if known_idx == last_seq:
2979 no_fragment_score += 5
2980 else:
2981 no_fragment_score = 0
2982 known_idx = last_seq
2983 except ExtractorError:
2984 continue
2986 if manifestless_orig_fmt:
2987 # Stop at the first iteration if running for post-live manifestless;
2988 # fragment count no longer increase since it starts
2989 break
2991 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2993 def _extract_player_url(self, *ytcfgs, webpage=None):
2994 player_url = traverse_obj(
2995 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2996 get_all=False, expected_type=str)
2997 if not player_url:
2998 return
2999 return urljoin('https://www.youtube.com', player_url)
3001 def _download_player_url(self, video_id, fatal=False):
3002 res = self._download_webpage(
3003 'https://www.youtube.com/iframe_api',
3004 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
3005 if res:
3006 player_version = self._search_regex(
3007 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
3008 if player_version:
3009 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
3011 def _signature_cache_id(self, example_sig):
3012 """ Return a string representation of a signature """
3013 return '.'.join(str(len(part)) for part in example_sig.split('.'))
3015 @classmethod
3016 def _extract_player_info(cls, player_url):
3017 for player_re in cls._PLAYER_INFO_RE:
3018 id_m = re.search(player_re, player_url)
3019 if id_m:
3020 break
3021 else:
3022 raise ExtractorError(f'Cannot identify player {player_url!r}')
3023 return id_m.group('id')
3025 def _load_player(self, video_id, player_url, fatal=True):
3026 player_id = self._extract_player_info(player_url)
3027 if player_id not in self._code_cache:
3028 code = self._download_webpage(
3029 player_url, video_id, fatal=fatal,
3030 note='Downloading player ' + player_id,
3031 errnote=f'Download of {player_url} failed')
3032 if code:
3033 self._code_cache[player_id] = code
3034 return self._code_cache.get(player_id)
3036 def _extract_signature_function(self, video_id, player_url, example_sig):
3037 player_id = self._extract_player_info(player_url)
3039 # Read from filesystem cache
3040 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
3041 assert os.path.basename(func_id) == func_id
3043 self.write_debug(f'Extracting signature function {func_id}')
3044 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3046 if not cache_spec:
3047 code = self._load_player(video_id, player_url)
3048 if code:
3049 res = self._parse_sig_js(code)
3050 test_string = ''.join(map(chr, range(len(example_sig))))
3051 cache_spec = [ord(c) for c in res(test_string)]
3052 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3054 return lambda s: ''.join(s[i] for i in cache_spec)
3056 def _print_sig_code(self, func, example_sig):
3057 if not self.get_param('youtube_print_sig_code'):
3058 return
3060 def gen_sig_code(idxs):
3061 def _genslice(start, end, step):
3062 starts = '' if start == 0 else str(start)
3063 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3064 steps = '' if step == 1 else (':%d' % step)
3065 return f's[{starts}{ends}{steps}]'
3067 step = None
3068 # Quelch pyflakes warnings - start will be set when step is set
3069 start = '(Never used)'
3070 for i, prev in zip(idxs[1:], idxs[:-1]):
3071 if step is not None:
3072 if i - prev == step:
3073 continue
3074 yield _genslice(start, prev, step)
3075 step = None
3076 continue
3077 if i - prev in [-1, 1]:
3078 step = i - prev
3079 start = prev
3080 continue
3081 else:
3082 yield 's[%d]' % prev
3083 if step is None:
3084 yield 's[%d]' % i
3085 else:
3086 yield _genslice(start, i, step)
3088 test_string = ''.join(map(chr, range(len(example_sig))))
3089 cache_res = func(test_string)
3090 cache_spec = [ord(c) for c in cache_res]
3091 expr_code = ' + '.join(gen_sig_code(cache_spec))
3092 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3093 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3094 f' return {expr_code}\n')
3095 self.to_screen('Extracted signature function:\n' + code)
3097 def _parse_sig_js(self, jscode):
3098 funcname = self._search_regex(
3099 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3100 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3101 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3102 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3103 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3104 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3105 # Obsolete patterns
3106 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3107 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3108 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3109 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3110 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3111 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3112 jscode, 'Initial JS player signature function name', group='sig')
3114 jsi = JSInterpreter(jscode)
3115 initial_function = jsi.extract_function(funcname)
3116 return lambda s: initial_function([s])
3118 def _cached(self, func, *cache_id):
3119 def inner(*args, **kwargs):
3120 if cache_id not in self._player_cache:
3121 try:
3122 self._player_cache[cache_id] = func(*args, **kwargs)
3123 except ExtractorError as e:
3124 self._player_cache[cache_id] = e
3125 except Exception as e:
3126 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3128 ret = self._player_cache[cache_id]
3129 if isinstance(ret, Exception):
3130 raise ret
3131 return ret
3132 return inner
3134 def _decrypt_signature(self, s, video_id, player_url):
3135 """Turn the encrypted s field into a working signature"""
3136 extract_sig = self._cached(
3137 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3138 func = extract_sig(video_id, player_url, s)
3139 self._print_sig_code(func, s)
3140 return func(s)
3142 def _decrypt_nsig(self, s, video_id, player_url):
3143 """Turn the encrypted n field into a working signature"""
3144 if player_url is None:
3145 raise ExtractorError('Cannot decrypt nsig without player_url')
3146 player_url = urljoin('https://www.youtube.com', player_url)
3148 try:
3149 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3150 except ExtractorError as e:
3151 raise ExtractorError('Unable to extract nsig function code', cause=e)
3152 if self.get_param('youtube_print_sig_code'):
3153 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3155 try:
3156 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3157 ret = extract_nsig(jsi, func_code)(s)
3158 except JSInterpreter.Exception as e:
3159 try:
3160 jsi = PhantomJSwrapper(self, timeout=5000)
3161 except ExtractorError:
3162 raise e
3163 self.report_warning(
3164 f'Native nsig extraction failed: Trying with PhantomJS\n'
3165 f' n = {s} ; player = {player_url}', video_id)
3166 self.write_debug(e, only_once=True)
3168 args, func_body = func_code
3169 ret = jsi.execute(
3170 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3171 video_id=video_id, note='Executing signature code').strip()
3173 self.write_debug(f'Decrypted nsig {s} => {ret}')
3174 return ret
3176 def _extract_n_function_name(self, jscode, player_url=None):
3177 # Examples (with placeholders nfunc, narray, idx):
3178 # * .get("n"))&&(b=nfunc(b)
3179 # * .get("n"))&&(b=narray[idx](b)
3180 # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
3181 # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
3182 # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
3183 funcname, idx = self._search_regex(
3184 r'''(?x)
3186 \.get\("n"\)\)&&\(b=|
3188 b=String\.fromCharCode\(110\)|
3189 (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
3190 ),c=a\.get\(b\)\)&&\(c=|
3191 \b(?P<var>[a-zA-Z0-9_$]+)=
3192 )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
3193 (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
3194 jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
3195 if not funcname:
3196 self.report_warning(join_nonempty(
3197 'Falling back to generic n function search',
3198 player_url and f' player = {player_url}', delim='\n'))
3199 return self._search_regex(
3200 r'''(?xs)
3201 ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
3202 \s*\{(?:(?!};).)+?["']enhanced_except_''',
3203 jscode, 'Initial JS player n function name', group='name')
3204 elif not idx:
3205 return funcname
3207 return json.loads(js_to_json(self._search_regex(
3208 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3209 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3211 def _extract_n_function_code(self, video_id, player_url):
3212 player_id = self._extract_player_info(player_url)
3213 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
3214 jscode = func_code or self._load_player(video_id, player_url)
3215 jsi = JSInterpreter(jscode)
3217 if func_code:
3218 return jsi, player_id, func_code
3220 func_name = self._extract_n_function_name(jscode, player_url=player_url)
3222 func_code = jsi.extract_function_code(func_name)
3224 self.cache.store('youtube-nsig', player_id, func_code)
3225 return jsi, player_id, func_code
3227 def _extract_n_function_from_code(self, jsi, func_code):
3228 func = jsi.extract_function_from_code(*func_code)
3230 def extract_nsig(s):
3231 try:
3232 ret = func([s])
3233 except JSInterpreter.Exception:
3234 raise
3235 except Exception as e:
3236 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3238 if ret.startswith('enhanced_except_'):
3239 raise JSInterpreter.Exception('Signature function returned an exception')
3240 return ret
3242 return extract_nsig
3244 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3246 Extract signatureTimestamp (sts)
3247 Required to tell API what sig/player version is in use.
3249 sts = None
3250 if isinstance(ytcfg, dict):
3251 sts = int_or_none(ytcfg.get('STS'))
3253 if not sts:
3254 # Attempt to extract from player
3255 if player_url is None:
3256 error_msg = 'Cannot extract signature timestamp without player_url.'
3257 if fatal:
3258 raise ExtractorError(error_msg)
3259 self.report_warning(error_msg)
3260 return
3261 code = self._load_player(video_id, player_url, fatal=fatal)
3262 if code:
3263 sts = int_or_none(self._search_regex(
3264 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3265 'JS player signature timestamp', group='sts', fatal=fatal))
3266 return sts
3268 def _mark_watched(self, video_id, player_responses):
3269 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3270 label = 'fully ' if is_full else ''
3271 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3272 expected_type=url_or_none)
3273 if not url:
3274 self.report_warning(f'Unable to mark {label}watched')
3275 return
3276 parsed_url = urllib.parse.urlparse(url)
3277 qs = urllib.parse.parse_qs(parsed_url.query)
3279 # cpn generation algorithm is reverse engineered from base.js.
3280 # In fact it works even with dummy cpn.
3281 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3282 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3284 # # more consistent results setting it to right before the end
3285 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3287 qs.update({
3288 'ver': ['2'],
3289 'cpn': [cpn],
3290 'cmt': video_length,
3291 'el': 'detailpage', # otherwise defaults to "shorts"
3294 if is_full:
3295 # these seem to mark watchtime "history" in the real world
3296 # they're required, so send in a single value
3297 qs.update({
3298 'st': 0,
3299 'et': video_length,
3302 url = urllib.parse.urlunparse(
3303 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3305 self._download_webpage(
3306 url, video_id, f'Marking {label}watched',
3307 'Unable to mark watched', fatal=False)
3309 @classmethod
3310 def _extract_from_webpage(cls, url, webpage):
3311 # Invidious Instances
3312 # https://github.com/yt-dlp/yt-dlp/issues/195
3313 # https://github.com/iv-org/invidious/pull/1730
3314 mobj = re.search(
3315 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3316 webpage)
3317 if mobj:
3318 yield cls.url_result(mobj.group('url'), cls)
3319 raise cls.StopExtraction
3321 yield from super()._extract_from_webpage(url, webpage)
3323 # lazyYT YouTube embed
3324 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3325 yield cls.url_result(unescapeHTML(id_), cls, id_)
3327 # Wordpress "YouTube Video Importer" plugin
3328 for m in re.findall(r'''(?x)<div[^>]+
3329 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3330 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3331 yield cls.url_result(m[-1], cls, m[-1])
3333 @classmethod
3334 def extract_id(cls, url):
3335 video_id = cls.get_temp_id(url)
3336 if not video_id:
3337 raise ExtractorError(f'Invalid URL: {url}')
3338 return video_id
3340 def _extract_chapters_from_json(self, data, duration):
3341 chapter_list = traverse_obj(
3342 data, (
3343 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3344 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
3345 ), expected_type=list)
3347 return self._extract_chapters_helper(
3348 chapter_list,
3349 start_function=lambda chapter: float_or_none(
3350 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3351 title_function=lambda chapter: traverse_obj(
3352 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3353 duration=duration)
3355 def _extract_chapters_from_engagement_panel(self, data, duration):
3356 content_list = traverse_obj(
3357 data,
3358 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3359 expected_type=list)
3360 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3361 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3363 return next(filter(None, (
3364 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3365 chapter_time, chapter_title, duration)
3366 for contents in content_list)), [])
3368 def _extract_heatmap(self, data):
3369 return traverse_obj(data, (
3370 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3371 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3372 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3373 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3374 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3375 'value': ('intensityScoreNormalized', {float_or_none}),
3376 })) or None
3378 def _extract_comment(self, entities, parent=None):
3379 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3380 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3381 return
3383 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3384 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3386 return {
3387 'id': comment_id,
3388 'parent': parent or 'root',
3389 **traverse_obj(comment_entity_payload, {
3390 'text': ('properties', 'content', 'content', {str}),
3391 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3392 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3393 'author': ('author', 'displayName', {str}),
3394 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3395 'author_is_uploader': ('author', 'isCreator', {bool}),
3396 'author_is_verified': ('author', 'isVerified', {bool}),
3397 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
3398 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
3399 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3400 }, get_all=False),
3401 'is_favorited': (None if toolbar_entity_payload is None else
3402 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3403 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3404 'timestamp': self._parse_time_text(time_text),
3407 def _extract_comment_old(self, comment_renderer, parent=None):
3408 comment_id = comment_renderer.get('commentId')
3409 if not comment_id:
3410 return
3412 info = {
3413 'id': comment_id,
3414 'text': self._get_text(comment_renderer, 'contentText'),
3415 'like_count': self._get_count(comment_renderer, 'voteCount'),
3416 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3417 'author': self._get_text(comment_renderer, 'authorText'),
3418 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3419 'parent': parent or 'root',
3422 # Timestamp is an estimate calculated from the current time and time_text
3423 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3424 timestamp = self._parse_time_text(time_text)
3426 info.update({
3427 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3428 '_time_text': time_text,
3429 'timestamp': timestamp,
3432 info['author_url'] = urljoin(
3433 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3434 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3435 expected_type=str, get_all=False))
3437 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3438 if author_is_uploader is not None:
3439 info['author_is_uploader'] = author_is_uploader
3441 comment_abr = traverse_obj(
3442 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3443 if comment_abr is not None:
3444 info['is_favorited'] = 'creatorHeart' in comment_abr
3446 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3447 if self._has_badge(badges, BadgeType.VERIFIED):
3448 info['author_is_verified'] = True
3450 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3451 if is_pinned:
3452 info['is_pinned'] = True
3454 return info
3456 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3458 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3460 def extract_header(contents):
3461 _continuation = None
3462 for content in contents:
3463 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3464 expected_comment_count = self._get_count(
3465 comments_header_renderer, 'countText', 'commentsCount')
3467 if expected_comment_count is not None:
3468 tracker['est_total'] = expected_comment_count
3469 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3470 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3472 sort_menu_item = try_get(
3473 comments_header_renderer,
3474 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3475 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3477 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3478 if not _continuation:
3479 continue
3481 sort_text = str_or_none(sort_menu_item.get('title'))
3482 if not sort_text:
3483 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3484 self.to_screen(f'Sorting comments by {sort_text.lower()}')
3485 break
3486 return _continuation
3488 def extract_thread(contents, entity_payloads):
3489 if not parent:
3490 tracker['current_page_thread'] = 0
3491 for content in contents:
3492 if not parent and tracker['total_parent_comments'] >= max_parents:
3493 yield
3494 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3496 # old comment format
3497 if not entity_payloads:
3498 comment_renderer = get_first(
3499 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3500 expected_type=dict, default={})
3502 comment = self._extract_comment_old(comment_renderer, parent)
3504 # new comment format
3505 else:
3506 view_model = (
3507 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3508 or traverse_obj(content, ('commentViewModel', {dict})))
3509 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3510 if not comment_keys:
3511 continue
3512 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3513 comment = self._extract_comment(entities, parent)
3514 if comment:
3515 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3517 if not comment:
3518 continue
3519 comment_id = comment['id']
3521 if comment.get('is_pinned'):
3522 tracker['pinned_comment_ids'].add(comment_id)
3523 # Sometimes YouTube may break and give us infinite looping comments.
3524 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3525 if comment_id in tracker['seen_comment_ids']:
3526 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3527 # Pinned comments may appear a second time in newest first sort
3528 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3529 continue
3530 self.report_warning(
3531 'Detected YouTube comments looping. Stopping comment extraction '
3532 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3533 yield
3534 else:
3535 tracker['seen_comment_ids'].add(comment['id'])
3537 tracker['running_total'] += 1
3538 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3539 yield comment
3541 # Attempt to get the replies
3542 comment_replies_renderer = try_get(
3543 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3545 if comment_replies_renderer:
3546 tracker['current_page_thread'] += 1
3547 comment_entries_iter = self._comment_entries(
3548 comment_replies_renderer, ytcfg, video_id,
3549 parent=comment.get('id'), tracker=tracker)
3550 yield from itertools.islice(comment_entries_iter, min(
3551 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3553 # Keeps track of counts across recursive calls
3554 if not tracker:
3555 tracker = {
3556 'running_total': 0,
3557 'est_total': None,
3558 'current_page_thread': 0,
3559 'total_parent_comments': 0,
3560 'total_reply_comments': 0,
3561 'seen_comment_ids': set(),
3562 'pinned_comment_ids': set(),
3565 # TODO: Deprecated
3566 # YouTube comments have a max depth of 2
3567 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3568 if max_depth:
3569 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3570 'Set max replies in the max-comments extractor argument instead')
3571 if max_depth == 1 and parent:
3572 return
3574 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3575 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
3577 continuation = self._extract_continuation(root_continuation_data)
3579 response = None
3580 is_forced_continuation = False
3581 is_first_continuation = parent is None
3582 if is_first_continuation and not continuation:
3583 # Sometimes you can get comments by generating the continuation yourself,
3584 # even if YouTube initially reports them being disabled - e.g. stories comments.
3585 # Note: if the comment section is actually disabled, YouTube may return a response with
3586 # required check_get_keys missing. So we will disable that check initially in this case.
3587 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3588 is_forced_continuation = True
3590 continuation_items_path = (
3591 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3592 for page_num in itertools.count(0):
3593 if not continuation:
3594 break
3595 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3596 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3597 if page_num == 0:
3598 if is_first_continuation:
3599 note_prefix = 'Downloading comment section API JSON'
3600 else:
3601 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3602 tracker['current_page_thread'], comment_prog_str)
3603 else:
3604 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
3605 ' ' if parent else '', ' replies' if parent else '',
3606 page_num, comment_prog_str)
3608 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3609 # Ignore check if YouTube says the comment count is 0.
3610 check_get_keys = None
3611 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3612 check_get_keys = [[*continuation_items_path, ..., (
3613 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
3614 try:
3615 response = self._extract_response(
3616 item_id=None, query=continuation,
3617 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3618 check_get_keys=check_get_keys)
3619 except ExtractorError as e:
3620 # Ignore incomplete data error for replies if retries didn't work.
3621 # This is to allow any other parent comments and comment threads to be downloaded.
3622 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3623 if 'incomplete data' in str(e).lower() and parent:
3624 if self.get_param('ignoreerrors') in (True, 'only_download'):
3625 self.report_warning(
3626 'Received incomplete data for a comment reply thread and retrying did not help. '
3627 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3628 return
3629 else:
3630 raise ExtractorError(
3631 'Incomplete data received for comment reply thread. '
3632 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3633 expected=True)
3634 raise
3635 is_forced_continuation = False
3636 continuation = None
3637 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
3638 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3639 if is_first_continuation:
3640 continuation = extract_header(continuation_items)
3641 is_first_continuation = False
3642 if continuation:
3643 break
3644 continue
3646 for entry in extract_thread(continuation_items, mutations):
3647 if not entry:
3648 return
3649 yield entry
3650 continuation = self._extract_continuation({'contents': continuation_items})
3651 if continuation:
3652 break
3654 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3655 if message and not parent and tracker['running_total'] == 0:
3656 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3657 raise self.CommentsDisabled
3659 @staticmethod
3660 def _generate_comment_continuation(video_id):
3662 Generates initial comment section continuation token from given video id
3664 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3665 return base64.b64encode(token.encode()).decode()
3667 def _get_comments(self, ytcfg, video_id, contents, webpage):
3668 """Entry for comment extraction"""
3669 def _real_comment_extract(contents):
3670 renderer = next((
3671 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3672 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3673 yield from self._comment_entries(renderer, ytcfg, video_id)
3675 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3676 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3678 @staticmethod
3679 def _get_checkok_params():
3680 return {'contentCheckOk': True, 'racyCheckOk': True}
3682 @classmethod
3683 def _generate_player_context(cls, sts=None):
3684 context = {
3685 'html5Preference': 'HTML5_PREF_WANTS',
3687 if sts is not None:
3688 context['signatureTimestamp'] = sts
3689 return {
3690 'playbackContext': {
3691 'contentPlaybackContext': context,
3693 **cls._get_checkok_params(),
3696 @staticmethod
3697 def _is_agegated(player_response):
3698 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3699 return True
3701 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3702 AGE_GATE_REASONS = (
3703 'confirm your age', 'age-restricted', 'inappropriate', # reason
3704 'age_verification_required', 'age_check_required', # status
3706 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3708 @staticmethod
3709 def _is_unplayable(player_response):
3710 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3712 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3714 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3715 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3716 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3717 headers = self.generate_api_headers(
3718 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3720 yt_query = {
3721 'videoId': video_id,
3724 default_pp = traverse_obj(
3725 INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
3726 if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
3727 yt_query['params'] = player_params
3729 yt_query.update(self._generate_player_context(sts))
3730 return self._extract_response(
3731 item_id=video_id, ep='player', query=yt_query,
3732 ytcfg=player_ytcfg, headers=headers, fatal=True,
3733 default_client=client,
3734 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
3735 ) or None
3737 def _get_requested_clients(self, url, smuggled_data):
3738 requested_clients = []
3739 broken_clients = []
3740 default = ['ios', 'tv']
3741 allowed_clients = sorted(
3742 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
3743 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3744 for client in self._configuration_arg('player_client'):
3745 if client == 'default':
3746 requested_clients.extend(default)
3747 elif client == 'all':
3748 requested_clients.extend(allowed_clients)
3749 elif client not in allowed_clients:
3750 self.report_warning(f'Skipping unsupported client {client}')
3751 elif client in self._BROKEN_CLIENTS.values():
3752 broken_clients.append(client)
3753 else:
3754 requested_clients.append(client)
3755 # Force deprioritization of _BROKEN_CLIENTS for format de-duplication
3756 requested_clients.extend(broken_clients)
3757 if not requested_clients:
3758 requested_clients = default
3760 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3761 for requested_client in requested_clients:
3762 _, base_client, variant = _split_innertube_client(requested_client)
3763 music_client = f'{base_client}_music'
3764 if variant != 'music' and music_client in INNERTUBE_CLIENTS:
3765 requested_clients.append(music_client)
3767 return orderedSet(requested_clients)
3769 def _invalid_player_response(self, pr, video_id):
3770 # YouTube may return a different video player response than expected.
3771 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3772 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3773 return pr_id
3775 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3776 initial_pr = ignore_initial_response = None
3777 if webpage:
3778 if 'web' in clients:
3779 experiments = traverse_obj(master_ytcfg, (
3780 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
3781 if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
3782 self.report_warning(
3783 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
3784 ignore_initial_response = True
3785 initial_pr = self._search_json(
3786 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3788 prs = []
3789 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3790 # Android player_response does not have microFormats which are needed for
3791 # extraction of some data. So we return the initial_pr with formats
3792 # stripped out even if not requested by the user
3793 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3794 prs.append({**initial_pr, 'streamingData': None})
3796 all_clients = set(clients)
3797 clients = clients[::-1]
3799 def append_client(*client_names):
3800 """ Append the first client name that exists but not already used """
3801 for client_name in client_names:
3802 actual_client = _split_innertube_client(client_name)[0]
3803 if actual_client in INNERTUBE_CLIENTS:
3804 if actual_client not in all_clients:
3805 clients.append(client_name)
3806 all_clients.add(actual_client)
3807 return
3809 tried_iframe_fallback = False
3810 player_url = None
3811 skipped_clients = {}
3812 while clients:
3813 client, base_client, variant = _split_innertube_client(clients.pop())
3814 player_ytcfg = {}
3815 if client == 'web':
3816 player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
3817 elif 'configs' not in self._configuration_arg('player_skip'):
3818 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3820 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3821 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3822 if 'js' in self._configuration_arg('player_skip'):
3823 require_js_player = False
3824 player_url = None
3826 if not player_url and not tried_iframe_fallback and require_js_player:
3827 player_url = self._download_player_url(video_id)
3828 tried_iframe_fallback = True
3830 pr = initial_pr if client == 'web' and not ignore_initial_response else None
3831 for retry in self.RetryManager(fatal=False):
3832 try:
3833 pr = pr or self._extract_player_response(
3834 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
3835 player_url if require_js_player else None, initial_pr, smuggled_data)
3836 except ExtractorError as e:
3837 self.report_warning(e)
3838 break
3839 experiments = traverse_obj(pr, (
3840 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
3841 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
3842 if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
3843 pr = None
3844 retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
3845 if not pr:
3846 continue
3848 if pr_id := self._invalid_player_response(pr, video_id):
3849 skipped_clients[client] = pr_id
3850 elif pr:
3851 # Save client name for introspection later
3852 name = short_client_name(client)
3853 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3854 sd[STREAMING_DATA_CLIENT_NAME] = name
3855 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3856 f[STREAMING_DATA_CLIENT_NAME] = name
3857 prs.append(pr)
3859 # tv_embedded can work around age-gate and age-verification IF the video is embeddable
3860 if self._is_agegated(pr) and variant != 'tv_embedded':
3861 append_client(f'tv_embedded.{base_client}')
3863 # Unauthenticated users will only get tv_embedded client formats if age-gated
3864 if self._is_agegated(pr) and not self.is_authenticated:
3865 self.to_screen(
3866 f'{video_id}: This video is age-restricted; some formats may be missing '
3867 f'without authentication. {self._login_hint()}', only_once=True)
3869 # EU countries require age-verification for accounts to access age-restricted videos
3870 # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
3871 # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded
3872 embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
3873 if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
3874 self.to_screen(
3875 f'{video_id}: This video is age-restricted and YouTube is requiring '
3876 'account age-verification; some formats may be missing', only_once=True)
3877 # web_creator and mediaconnect can work around the age-verification requirement
3878 # _producer, _testsuite, & _vr variants can also work around age-verification
3879 append_client('web_creator', 'mediaconnect')
3881 if skipped_clients:
3882 self.report_warning(
3883 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3884 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3885 if not prs:
3886 raise ExtractorError(
3887 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3888 elif not prs:
3889 raise ExtractorError('Failed to extract any player response')
3890 return prs, player_url
3892 def _needs_live_processing(self, live_status, duration):
3893 if (live_status == 'is_live' and self.get_param('live_from_start')
3894 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3895 return live_status
3897 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3898 CHUNK_SIZE = 10 << 20
3899 PREFERRED_LANG_VALUE = 10
3900 original_language = None
3901 itags, stream_ids = collections.defaultdict(set), []
3902 itag_qualities, res_qualities = {}, {0: None}
3903 q = qualities([
3904 # Normally tiny is the smallest video-only formats. But
3905 # audio-only formats with unknown quality may get tagged as tiny
3906 'tiny',
3907 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3908 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
3910 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3911 format_types = self._configuration_arg('formats')
3912 all_formats = 'duplicate' in format_types
3913 if self._configuration_arg('include_duplicate_formats'):
3914 all_formats = True
3915 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3916 'Use formats=duplicate extractor argument instead')
3918 def build_fragments(f):
3919 return LazyList({
3920 'url': update_url_query(f['url'], {
3921 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3923 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3925 for fmt in streaming_formats:
3926 if fmt.get('targetDurationSec'):
3927 continue
3929 itag = str_or_none(fmt.get('itag'))
3930 audio_track = fmt.get('audioTrack') or {}
3931 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3932 if not all_formats:
3933 if stream_id in stream_ids:
3934 continue
3936 quality = fmt.get('quality')
3937 height = int_or_none(fmt.get('height'))
3938 if quality == 'tiny' or not quality:
3939 quality = fmt.get('audioQuality', '').lower() or quality
3940 # The 3gp format (17) in android client has a quality of "small",
3941 # but is actually worse than other formats
3942 if itag == '17':
3943 quality = 'tiny'
3944 if quality:
3945 if itag:
3946 itag_qualities[itag] = quality
3947 if height:
3948 res_qualities[height] = quality
3950 is_default = audio_track.get('audioIsDefault')
3951 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
3952 language_code = audio_track.get('id', '').split('.')[0]
3953 if language_code and is_default:
3954 original_language = language_code
3956 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3957 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3958 # number of fragment that would subsequently requested with (`&sq=N`)
3959 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3960 continue
3962 fmt_url = fmt.get('url')
3963 if not fmt_url:
3964 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3965 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3966 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3967 if not all((sc, fmt_url, player_url, encrypted_sig)):
3968 continue
3969 try:
3970 fmt_url += '&{}={}'.format(
3971 traverse_obj(sc, ('sp', -1)) or 'signature',
3972 self._decrypt_signature(encrypted_sig, video_id, player_url),
3974 except ExtractorError as e:
3975 self.report_warning('Signature extraction failed: Some formats may be missing',
3976 video_id=video_id, only_once=True)
3977 self.write_debug(e, only_once=True)
3978 continue
3980 query = parse_qs(fmt_url)
3981 if query.get('n'):
3982 try:
3983 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3984 fmt_url = update_url_query(fmt_url, {
3985 'n': decrypt_nsig(query['n'][0], video_id, player_url),
3987 except ExtractorError as e:
3988 phantomjs_hint = ''
3989 if isinstance(e, JSInterpreter.Exception):
3990 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3991 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3992 if player_url:
3993 self.report_warning(
3994 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
3995 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3996 self.write_debug(e, only_once=True)
3997 else:
3998 self.report_warning(
3999 'Cannot decrypt nsig without player_url: Some formats may be missing',
4000 video_id=video_id, only_once=True)
4001 continue
4003 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
4004 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
4005 # Some formats may have much smaller duration than others (possibly damaged during encoding)
4006 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
4007 # Make sure to avoid false positives with small duration differences.
4008 # E.g. __2ABJjxzNo, ySuUZEjARPY
4009 is_damaged = try_call(lambda: format_duration < duration // 2)
4010 if is_damaged:
4011 self.report_warning(
4012 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
4014 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
4015 # _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
4016 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
4017 is_broken = client_name in self._BROKEN_CLIENTS
4018 if is_broken:
4019 self.report_warning(
4020 f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken '
4021 'and may yield HTTP Error 403. They will be deprioritized', only_once=True)
4023 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
4024 fps = int_or_none(fmt.get('fps')) or 0
4025 dct = {
4026 'asr': int_or_none(fmt.get('audioSampleRate')),
4027 'filesize': int_or_none(fmt.get('contentLength')),
4028 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
4029 'format_note': join_nonempty(
4030 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
4031 name, fmt.get('isDrc') and 'DRC',
4032 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
4033 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
4034 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
4035 (self.get_param('verbose') or all_formats) and client_name,
4036 delim=', '),
4037 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
4038 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
4039 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
4040 'audio_channels': fmt.get('audioChannels'),
4041 'height': height,
4042 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
4043 'has_drm': bool(fmt.get('drmFamilies')),
4044 'tbr': tbr,
4045 'filesize_approx': filesize_from_tbr(tbr, format_duration),
4046 'url': fmt_url,
4047 'width': int_or_none(fmt.get('width')),
4048 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
4049 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
4050 # Strictly de-prioritize broken, damaged and 3gp formats
4051 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
4053 mime_mobj = re.match(
4054 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
4055 if mime_mobj:
4056 dct['ext'] = mimetype2ext(mime_mobj.group(1))
4057 dct.update(parse_codecs(mime_mobj.group(2)))
4058 if itag:
4059 itags[itag].add(('https', dct.get('language')))
4060 stream_ids.append(stream_id)
4061 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
4062 if single_stream and dct.get('ext'):
4063 dct['container'] = dct['ext'] + '_dash'
4065 if (all_formats or 'dashy' in format_types) and dct['filesize']:
4066 yield {
4067 **dct,
4068 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
4069 'protocol': 'http_dash_segments',
4070 'fragments': build_fragments(dct),
4072 if all_formats or 'dashy' not in format_types:
4073 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
4074 yield dct
4076 needs_live_processing = self._needs_live_processing(live_status, duration)
4077 skip_bad_formats = 'incomplete' not in format_types
4078 if self._configuration_arg('include_incomplete_formats'):
4079 skip_bad_formats = False
4080 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
4081 'Use formats=incomplete extractor argument instead')
4083 skip_manifests = set(self._configuration_arg('skip'))
4084 if (not self.get_param('youtube_include_hls_manifest', True)
4085 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
4086 or needs_live_processing and skip_bad_formats):
4087 skip_manifests.add('hls')
4089 if not self.get_param('youtube_include_dash_manifest', True):
4090 skip_manifests.add('dash')
4091 if self._configuration_arg('include_live_dash'):
4092 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
4093 'Use formats=incomplete extractor argument instead')
4094 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
4095 skip_manifests.add('dash')
4097 def process_manifest_format(f, proto, client_name, itag):
4098 key = (proto, f.get('language'))
4099 if not all_formats and key in itags[itag]:
4100 return False
4101 itags[itag].add(key)
4103 if itag and all_formats:
4104 f['format_id'] = f'{itag}-{proto}'
4105 elif any(p != proto for p, _ in itags[itag]):
4106 f['format_id'] = f'{itag}-{proto}'
4107 elif itag:
4108 f['format_id'] = itag
4110 if original_language and f.get('language') == original_language:
4111 f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
4112 f['language_preference'] = PREFERRED_LANG_VALUE
4114 if f.get('source_preference') is None:
4115 f['source_preference'] = -1
4117 if itag in ('616', '235'):
4118 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
4119 f['source_preference'] += 100
4121 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
4122 if f['quality'] == -1 and f.get('height'):
4123 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
4124 if self.get_param('verbose') or all_formats:
4125 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
4126 if f.get('fps') and f['fps'] <= 1:
4127 del f['fps']
4129 if proto == 'hls' and f.get('has_drm'):
4130 f['has_drm'] = 'maybe'
4131 f['source_preference'] -= 5
4132 return True
4134 subtitles = {}
4135 for sd in streaming_data:
4136 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4138 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
4139 if hls_manifest_url:
4140 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4141 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
4142 subtitles = self._merge_subtitles(subs, subtitles)
4143 for f in fmts:
4144 if process_manifest_format(f, 'hls', client_name, self._search_regex(
4145 r'/itag/(\d+)', f['url'], 'itag', default=None)):
4146 yield f
4148 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
4149 if dash_manifest_url:
4150 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4151 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4152 for f in formats:
4153 if process_manifest_format(f, 'dash', client_name, f['format_id']):
4154 f['filesize'] = int_or_none(self._search_regex(
4155 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4156 if needs_live_processing:
4157 f['is_from_start'] = True
4159 yield f
4160 yield subtitles
4162 def _extract_storyboard(self, player_responses, duration):
4163 spec = get_first(
4164 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
4165 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4166 if not base_url:
4167 return
4168 L = len(spec) - 1
4169 for i, args in enumerate(spec):
4170 args = args.split('#')
4171 counts = list(map(int_or_none, args[:5]))
4172 if len(args) != 8 or not all(counts):
4173 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4174 continue
4175 width, height, frame_count, cols, rows = counts
4176 N, sigh = args[6:]
4178 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4179 fragment_count = frame_count / (cols * rows)
4180 fragment_duration = duration / fragment_count
4181 yield {
4182 'format_id': f'sb{i}',
4183 'format_note': 'storyboard',
4184 'ext': 'mhtml',
4185 'protocol': 'mhtml',
4186 'acodec': 'none',
4187 'vcodec': 'none',
4188 'url': url,
4189 'width': width,
4190 'height': height,
4191 'fps': frame_count / duration,
4192 'rows': rows,
4193 'columns': cols,
4194 'fragments': [{
4195 'url': url.replace('$M', str(j)),
4196 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4197 } for j in range(math.ceil(fragment_count))],
4200 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4201 webpage = None
4202 if 'webpage' not in self._configuration_arg('player_skip'):
4203 query = {'bpctr': '9999999999', 'has_verified': '1'}
4204 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
4205 if pp:
4206 query['pp'] = pp
4207 webpage = self._download_webpage(
4208 webpage_url, video_id, fatal=False, query=query)
4210 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4212 player_responses, player_url = self._extract_player_responses(
4213 self._get_requested_clients(url, smuggled_data),
4214 video_id, webpage, master_ytcfg, smuggled_data)
4216 return webpage, master_ytcfg, player_responses, player_url
4218 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4219 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4220 is_live = get_first(video_details, 'isLive')
4221 if is_live is None:
4222 is_live = get_first(live_broadcast_details, 'isLiveNow')
4223 live_content = get_first(video_details, 'isLiveContent')
4224 is_upcoming = get_first(video_details, 'isUpcoming')
4225 post_live = get_first(video_details, 'isPostLiveDvr')
4226 live_status = ('post_live' if post_live
4227 else 'is_live' if is_live
4228 else 'is_upcoming' if is_upcoming
4229 else 'was_live' if live_content
4230 else 'not_live' if False in (is_live, live_content)
4231 else None)
4232 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4233 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4234 if all(f.get('has_drm') for f in formats):
4235 # If there are no formats that definitely don't have DRM, all have DRM
4236 for f in formats:
4237 f['has_drm'] = True
4239 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4241 def _real_extract(self, url):
4242 url, smuggled_data = unsmuggle_url(url, {})
4243 video_id = self._match_id(url)
4245 base_url = self.http_scheme() + '//www.youtube.com/'
4246 webpage_url = base_url + 'watch?v=' + video_id
4248 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4250 playability_statuses = traverse_obj(
4251 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4253 trailer_video_id = get_first(
4254 playability_statuses,
4255 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4256 expected_type=str)
4257 if trailer_video_id:
4258 return self.url_result(
4259 trailer_video_id, self.ie_key(), trailer_video_id)
4261 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4262 if webpage else (lambda x: None))
4264 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4265 microformats = traverse_obj(
4266 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4267 expected_type=dict)
4269 translated_title = self._get_text(microformats, (..., 'title'))
4270 video_title = (self._preferred_lang and translated_title
4271 or get_first(video_details, 'title') # primary
4272 or translated_title
4273 or search_meta(['og:title', 'twitter:title', 'title']))
4274 translated_description = self._get_text(microformats, (..., 'description'))
4275 original_description = get_first(video_details, 'shortDescription')
4276 video_description = (
4277 self._preferred_lang and translated_description
4278 # If original description is blank, it will be an empty string.
4279 # Do not prefer translated description in this case.
4280 or original_description if original_description is not None else translated_description)
4282 multifeed_metadata_list = get_first(
4283 player_responses,
4284 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4285 expected_type=str)
4286 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4287 if self.get_param('noplaylist'):
4288 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4289 else:
4290 entries = []
4291 feed_ids = []
4292 for feed in multifeed_metadata_list.split(','):
4293 # Unquote should take place before split on comma (,) since textual
4294 # fields may contain comma as well (see
4295 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4296 feed_data = urllib.parse.parse_qs(
4297 urllib.parse.unquote_plus(feed))
4299 def feed_entry(name):
4300 return try_get(
4301 feed_data, lambda x: x[name][0], str)
4303 feed_id = feed_entry('id')
4304 if not feed_id:
4305 continue
4306 feed_title = feed_entry('title')
4307 title = video_title
4308 if feed_title:
4309 title += f' ({feed_title})'
4310 entries.append({
4311 '_type': 'url_transparent',
4312 'ie_key': 'Youtube',
4313 'url': smuggle_url(
4314 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
4315 {'force_singlefeed': True}),
4316 'title': title,
4318 feed_ids.append(feed_id)
4319 self.to_screen(
4320 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4321 ', '.join(feed_ids), video_id))
4322 return self.playlist_result(
4323 entries, video_id, video_title, video_description)
4325 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4326 or int_or_none(get_first(microformats, 'lengthSeconds'))
4327 or parse_duration(search_meta('duration')) or None)
4329 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4330 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4331 if live_status == 'post_live':
4332 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4334 if not formats:
4335 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4336 self.report_drm(video_id)
4337 pemr = get_first(
4338 playability_statuses,
4339 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4340 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4341 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4342 if subreason:
4343 if subreason == 'The uploader has not made this video available in your country.':
4344 countries = get_first(microformats, 'availableCountries')
4345 if not countries:
4346 regions_allowed = search_meta('regionsAllowed')
4347 countries = regions_allowed.split(',') if regions_allowed else None
4348 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4349 reason += f'. {subreason}'
4350 if reason:
4351 self.raise_no_formats(reason, expected=True)
4353 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4354 if not keywords and webpage:
4355 keywords = [
4356 unescapeHTML(m.group('content'))
4357 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4358 for keyword in keywords:
4359 if keyword.startswith('yt:stretch='):
4360 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4361 if mobj:
4362 # NB: float is intentional for forcing float division
4363 w, h = (float(v) for v in mobj.groups())
4364 if w > 0 and h > 0:
4365 ratio = w / h
4366 for f in formats:
4367 if f.get('vcodec') != 'none':
4368 f['stretched_ratio'] = ratio
4369 break
4370 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4371 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4372 if thumbnail_url:
4373 thumbnails.append({
4374 'url': thumbnail_url,
4376 original_thumbnails = thumbnails.copy()
4378 # The best resolution thumbnails sometimes does not appear in the webpage
4379 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4380 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4381 thumbnail_names = [
4382 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4383 # in resolution, these are not the custom thumbnail. So de-prioritize them
4384 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4385 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4387 n_thumbnail_names = len(thumbnail_names)
4388 thumbnails.extend({
4389 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4390 video_id=video_id, name=name, ext=ext,
4391 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4392 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4393 for thumb in thumbnails:
4394 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4395 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4396 self._remove_duplicate_formats(thumbnails)
4397 self._downloader._sort_thumbnails(original_thumbnails)
4399 category = get_first(microformats, 'category') or search_meta('genre')
4400 channel_id = self.ucid_or_none(str_or_none(
4401 get_first(video_details, 'channelId')
4402 or get_first(microformats, 'externalChannelId')
4403 or search_meta('channelId')))
4404 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4406 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4407 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4408 if not duration and live_end_time and live_start_time:
4409 duration = live_end_time - live_start_time
4411 needs_live_processing = self._needs_live_processing(live_status, duration)
4413 def is_bad_format(fmt):
4414 if needs_live_processing and not fmt.get('is_from_start'):
4415 return True
4416 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4417 and fmt.get('protocol') == 'http_dash_segments'):
4418 return True
4420 for fmt in filter(is_bad_format, formats):
4421 fmt['preference'] = (fmt.get('preference') or -1) - 10
4422 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4424 if needs_live_processing:
4425 self._prepare_live_from_start_formats(
4426 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4428 formats.extend(self._extract_storyboard(player_responses, duration))
4430 channel_handle = self.handle_from_url(owner_profile_url)
4432 info = {
4433 'id': video_id,
4434 'title': video_title,
4435 'formats': formats,
4436 'thumbnails': thumbnails,
4437 # The best thumbnail that we are sure exists. Prevents unnecessary
4438 # URL checking if user don't care about getting the best possible thumbnail
4439 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4440 'description': video_description,
4441 'channel_id': channel_id,
4442 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4443 'duration': duration,
4444 'view_count': int_or_none(
4445 get_first((video_details, microformats), (..., 'viewCount'))
4446 or search_meta('interactionCount')),
4447 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4448 'age_limit': 18 if (
4449 get_first(microformats, 'isFamilySafe') is False
4450 or search_meta('isFamilyFriendly') == 'false'
4451 or search_meta('og:restrictions:age') == '18+') else 0,
4452 'webpage_url': webpage_url,
4453 'categories': [category] if category else None,
4454 'tags': keywords,
4455 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4456 'live_status': live_status,
4457 'release_timestamp': live_start_time,
4458 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4459 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
4462 subtitles = {}
4463 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4464 if pctr:
4465 def get_lang_code(track):
4466 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4467 or track.get('languageCode'))
4469 # Converted into dicts to remove duplicates
4470 captions = {
4471 get_lang_code(sub): sub
4472 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4473 translation_languages = {
4474 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4475 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4477 def process_language(container, base_url, lang_code, sub_name, query):
4478 lang_subs = container.setdefault(lang_code, [])
4479 for fmt in self._SUBTITLE_FORMATS:
4480 query.update({
4481 'fmt': fmt,
4483 lang_subs.append({
4484 'ext': fmt,
4485 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4486 'name': sub_name,
4489 # NB: Constructing the full subtitle dictionary is slow
4490 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4491 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4492 for lang_code, caption_track in captions.items():
4493 base_url = caption_track.get('baseUrl')
4494 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4495 if not base_url:
4496 continue
4497 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4498 if caption_track.get('kind') != 'asr':
4499 if not lang_code:
4500 continue
4501 process_language(
4502 subtitles, base_url, lang_code, lang_name, {})
4503 if not caption_track.get('isTranslatable'):
4504 continue
4505 for trans_code, trans_name in translation_languages.items():
4506 if not trans_code:
4507 continue
4508 orig_trans_code = trans_code
4509 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4510 if not get_translated_subs:
4511 continue
4512 trans_code += f'-{lang_code}'
4513 trans_name += format_field(lang_name, None, ' from %s')
4514 if lang_code == f'a-{orig_trans_code}':
4515 # Set audio language based on original subtitles
4516 for f in formats:
4517 if f.get('acodec') != 'none' and not f.get('language'):
4518 f['language'] = orig_trans_code
4519 # Add an "-orig" label to the original language so that it can be distinguished.
4520 # The subs are returned without "-orig" as well for compatibility
4521 process_language(
4522 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4523 # Setting tlang=lang returns damaged subtitles.
4524 process_language(automatic_captions, base_url, trans_code, trans_name,
4525 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4527 info['automatic_captions'] = automatic_captions
4528 info['subtitles'] = subtitles
4530 parsed_url = urllib.parse.urlparse(url)
4531 for component in [parsed_url.fragment, parsed_url.query]:
4532 query = urllib.parse.parse_qs(component)
4533 for k, v in query.items():
4534 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4535 d_k += '_time'
4536 if d_k not in info and k in s_ks:
4537 info[d_k] = parse_duration(v[0])
4539 # Youtube Music Auto-generated description
4540 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4541 # XXX: Causes catastrophic backtracking if description has "·"
4542 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4543 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4544 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4545 mobj = re.search(
4546 r'''(?xs)
4547 (?=(?P<track>[^\n·]+))(?P=track)·
4548 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4549 (?=(?P<album>[^\n]+))(?P=album)\n
4550 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4551 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4552 (.+?\nArtist\s*:\s*
4553 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4554 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4555 ''', video_description)
4556 if mobj:
4557 release_year = mobj.group('release_year')
4558 release_date = mobj.group('release_date')
4559 if release_date:
4560 release_date = release_date.replace('-', '')
4561 if not release_year:
4562 release_year = release_date[:4]
4563 info.update({
4564 'album': mobj.group('album'.strip()),
4565 'artists': ([a] if (a := mobj.group('clean_artist'))
4566 else [a.strip() for a in mobj.group('artist').split('·')]),
4567 'track': mobj.group('track').strip(),
4568 'release_date': release_date,
4569 'release_year': int_or_none(release_year),
4572 initial_data = None
4573 if webpage:
4574 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4575 if not traverse_obj(initial_data, 'contents'):
4576 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4577 initial_data = None
4578 if not initial_data:
4579 query = {'videoId': video_id}
4580 query.update(self._get_checkok_params())
4581 initial_data = self._extract_response(
4582 item_id=video_id, ep='next', fatal=False,
4583 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4584 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4585 note='Downloading initial data API JSON')
4587 info['comment_count'] = traverse_obj(initial_data, (
4588 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4589 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4590 ), (
4591 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4592 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4593 ), expected_type=self._get_count, get_all=False)
4595 try: # This will error if there is no livechat
4596 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4597 except (KeyError, IndexError, TypeError):
4598 pass
4599 else:
4600 info.setdefault('subtitles', {})['live_chat'] = [{
4601 # url is needed to set cookies
4602 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4603 'video_id': video_id,
4604 'ext': 'json',
4605 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4606 else 'youtube_live_chat_replay'),
4609 if initial_data:
4610 info['chapters'] = (
4611 self._extract_chapters_from_json(initial_data, duration)
4612 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4613 or self._extract_chapters_from_description(video_description, duration)
4614 or None)
4616 info['heatmap'] = self._extract_heatmap(initial_data)
4618 contents = traverse_obj(
4619 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4620 expected_type=list, default=[])
4622 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4623 if vpir:
4624 stl = vpir.get('superTitleLink')
4625 if stl:
4626 stl = self._get_text(stl)
4627 if try_get(
4628 vpir,
4629 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4630 info['location'] = stl
4631 else:
4632 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4633 if mobj:
4634 info.update({
4635 'series': mobj.group(1),
4636 'season_number': int(mobj.group(2)),
4637 'episode_number': int(mobj.group(3)),
4639 for tlb in (try_get(
4640 vpir,
4641 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4642 list) or []):
4643 tbrs = variadic(
4644 traverse_obj(
4645 tlb, ('toggleButtonRenderer', ...),
4646 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4647 for tbr in tbrs:
4648 for getter, regex in [(
4649 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4650 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4651 lambda x: x['accessibility'],
4652 lambda x: x['accessibilityData']['accessibilityData'],
4653 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4654 label = (try_get(tbr, getter, dict) or {}).get('label')
4655 if label:
4656 mobj = re.match(regex, label)
4657 if mobj:
4658 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4659 break
4661 info['like_count'] = traverse_obj(vpir, (
4662 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4663 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4664 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4665 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4667 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4668 if vcr:
4669 vc = self._get_count(vcr, 'viewCount')
4670 # Upcoming premieres with waiting count are treated as live here
4671 if vcr.get('isLive'):
4672 info['concurrent_view_count'] = vc
4673 elif info.get('view_count') is None:
4674 info['view_count'] = vc
4676 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4677 if vsir:
4678 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4679 info.update({
4680 'channel': self._get_text(vor, 'title'),
4681 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4683 if not channel_handle:
4684 channel_handle = self.handle_from_url(
4685 traverse_obj(vor, (
4686 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4687 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4688 {str}), get_all=False))
4690 rows = try_get(
4691 vsir,
4692 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4693 list) or []
4694 multiple_songs = False
4695 for row in rows:
4696 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4697 multiple_songs = True
4698 break
4699 for row in rows:
4700 mrr = row.get('metadataRowRenderer') or {}
4701 mrr_title = mrr.get('title')
4702 if not mrr_title:
4703 continue
4704 mrr_title = self._get_text(mrr, 'title')
4705 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4706 if mrr_title == 'License':
4707 info['license'] = mrr_contents_text
4708 elif not multiple_songs:
4709 if mrr_title == 'Album':
4710 info['album'] = mrr_contents_text
4711 elif mrr_title == 'Artist':
4712 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
4713 elif mrr_title == 'Song':
4714 info['track'] = mrr_contents_text
4715 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4716 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4717 info['channel_is_verified'] = True
4719 info.update({
4720 'uploader': info.get('channel'),
4721 'uploader_id': channel_handle,
4722 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4725 # We only want timestamp IF it has time precision AND a timezone
4726 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4727 timestamp = (
4728 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
4729 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
4731 upload_date = (
4732 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
4734 unified_strdate(get_first(microformats, 'uploadDate'))
4735 or unified_strdate(search_meta('uploadDate'))
4738 # In the case we cannot get the timestamp:
4739 # The upload date for scheduled, live and past live streams / premieres in microformats
4740 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4741 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4742 if not upload_date or (not timestamp and live_status in ('not_live', None)):
4743 # this should be in UTC, as configured in the cookie/client context
4744 upload_date = strftime_or_none(
4745 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
4747 info['upload_date'] = upload_date
4748 info['timestamp'] = timestamp
4750 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4751 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4752 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
4753 if upload_datetime >= datetime_from_str('today-2days'):
4754 for fmt in info['formats']:
4755 if fmt.get('protocol') == 'm3u8_native':
4756 fmt['__needs_testing'] = True
4758 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
4759 v = info.get(s_k)
4760 if v:
4761 info[d_k] = v
4763 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
4765 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4766 or get_first(video_details, 'isPrivate', expected_type=bool))
4768 info['availability'] = (
4769 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4770 else self._availability(
4771 is_private=is_private,
4772 needs_premium=(
4773 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4774 or False if initial_data and is_private is not None else None),
4775 needs_subscription=(
4776 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4777 or False if initial_data and is_private is not None else None),
4778 needs_auth=info['age_limit'] >= 18,
4779 is_unlisted=None if is_private is None else (
4780 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4781 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4783 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4785 self.mark_watched(video_id, player_responses)
4787 return info
4790 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4791 @staticmethod
4792 def passthrough_smuggled_data(func):
4793 def _smuggle(info, smuggled_data):
4794 if info.get('_type') not in ('url', 'url_transparent'):
4795 return info
4796 if smuggled_data.get('is_music_url'):
4797 parsed_url = urllib.parse.urlparse(info['url'])
4798 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4799 smuggled_data.pop('is_music_url')
4800 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4801 if smuggled_data:
4802 info['url'] = smuggle_url(info['url'], smuggled_data)
4803 return info
4805 @functools.wraps(func)
4806 def wrapper(self, url):
4807 url, smuggled_data = unsmuggle_url(url, {})
4808 if self.is_music_url(url):
4809 smuggled_data['is_music_url'] = True
4810 info_dict = func(self, url, smuggled_data)
4811 if smuggled_data:
4812 _smuggle(info_dict, smuggled_data)
4813 if info_dict.get('entries'):
4814 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4815 return info_dict
4816 return wrapper
4818 @staticmethod
4819 def _extract_basic_item_renderer(item):
4820 # Modified from _extract_grid_item_renderer
4821 known_basic_renderers = (
4822 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
4824 for key, renderer in item.items():
4825 if not isinstance(renderer, dict):
4826 continue
4827 elif key in known_basic_renderers:
4828 return renderer
4829 elif key.startswith('grid') and key.endswith('Renderer'):
4830 return renderer
4832 def _extract_channel_renderer(self, renderer):
4833 channel_id = self.ucid_or_none(renderer['channelId'])
4834 title = self._get_text(renderer, 'title')
4835 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4836 channel_handle = self.handle_from_url(
4837 traverse_obj(renderer, (
4838 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4839 ('browseEndpoint', 'canonicalBaseUrl')),
4840 {str}), get_all=False))
4841 if not channel_handle:
4842 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4843 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
4844 return {
4845 '_type': 'url',
4846 'url': channel_url,
4847 'id': channel_id,
4848 'ie_key': YoutubeTabIE.ie_key(),
4849 'channel': title,
4850 'uploader': title,
4851 'channel_id': channel_id,
4852 'channel_url': channel_url,
4853 'title': title,
4854 'uploader_id': channel_handle,
4855 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4856 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4857 # However, in feed/channels this is set correctly to the subscriber count
4858 'channel_follower_count': traverse_obj(
4859 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
4860 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4861 'playlist_count': (
4862 # videoCountText may be the subscriber count
4863 self._get_count(renderer, 'videoCountText')
4864 if self._get_count(renderer, 'subscriberCountText') is not None else None),
4865 'description': self._get_text(renderer, 'descriptionSnippet'),
4866 'channel_is_verified': True if self._has_badge(
4867 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
4870 def _grid_entries(self, grid_renderer):
4871 for item in grid_renderer['items']:
4872 if not isinstance(item, dict):
4873 continue
4874 renderer = self._extract_basic_item_renderer(item)
4875 if not isinstance(renderer, dict):
4876 continue
4877 title = self._get_text(renderer, 'title')
4879 # playlist
4880 playlist_id = renderer.get('playlistId')
4881 if playlist_id:
4882 yield self.url_result(
4883 f'https://www.youtube.com/playlist?list={playlist_id}',
4884 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4885 video_title=title)
4886 continue
4887 # video
4888 video_id = renderer.get('videoId')
4889 if video_id:
4890 yield self._extract_video(renderer)
4891 continue
4892 # channel
4893 channel_id = renderer.get('channelId')
4894 if channel_id:
4895 yield self._extract_channel_renderer(renderer)
4896 continue
4897 # generic endpoint URL support
4898 ep_url = urljoin('https://www.youtube.com/', try_get(
4899 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4900 str))
4901 if ep_url:
4902 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4903 if ie.suitable(ep_url):
4904 yield self.url_result(
4905 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4906 break
4908 def _music_reponsive_list_entry(self, renderer):
4909 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4910 if video_id:
4911 title = traverse_obj(renderer, (
4912 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4913 'text', 'runs', 0, 'text'))
4914 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4915 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
4916 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4917 if playlist_id:
4918 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4919 if video_id:
4920 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4921 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4922 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4923 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4924 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4925 if browse_id:
4926 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4927 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4929 def _shelf_entries_from_content(self, shelf_renderer):
4930 content = shelf_renderer.get('content')
4931 if not isinstance(content, dict):
4932 return
4933 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4934 if renderer:
4935 # TODO: add support for nested playlists so each shelf is processed
4936 # as separate playlist
4937 # TODO: this includes only first N items
4938 yield from self._grid_entries(renderer)
4939 renderer = content.get('horizontalListRenderer')
4940 if renderer:
4941 # TODO: handle case
4942 pass
4944 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4945 ep = try_get(
4946 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4947 str)
4948 shelf_url = urljoin('https://www.youtube.com', ep)
4949 if shelf_url:
4950 # Skipping links to another channels, note that checking for
4951 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4952 # will not work
4953 if skip_channels and '/channels?' in shelf_url:
4954 return
4955 title = self._get_text(shelf_renderer, 'title')
4956 yield self.url_result(shelf_url, video_title=title)
4957 # Shelf may not contain shelf URL, fallback to extraction from content
4958 yield from self._shelf_entries_from_content(shelf_renderer)
4960 def _playlist_entries(self, video_list_renderer):
4961 for content in video_list_renderer['contents']:
4962 if not isinstance(content, dict):
4963 continue
4964 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4965 if not isinstance(renderer, dict):
4966 continue
4967 video_id = renderer.get('videoId')
4968 if not video_id:
4969 continue
4970 yield self._extract_video(renderer)
4972 def _rich_entries(self, rich_grid_renderer):
4973 renderer = traverse_obj(
4974 rich_grid_renderer,
4975 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
4976 video_id = renderer.get('videoId')
4977 if video_id:
4978 yield self._extract_video(renderer)
4979 return
4980 playlist_id = renderer.get('playlistId')
4981 if playlist_id:
4982 yield self.url_result(
4983 f'https://www.youtube.com/playlist?list={playlist_id}',
4984 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4985 video_title=self._get_text(renderer, 'title'))
4986 return
4988 def _video_entry(self, video_renderer):
4989 video_id = video_renderer.get('videoId')
4990 if video_id:
4991 return self._extract_video(video_renderer)
4993 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4994 url = urljoin('https://youtube.com', traverse_obj(
4995 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4996 if url:
4997 return self.url_result(
4998 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
5000 def _post_thread_entries(self, post_thread_renderer):
5001 post_renderer = try_get(
5002 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
5003 if not post_renderer:
5004 return
5005 # video attachment
5006 video_renderer = try_get(
5007 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
5008 video_id = video_renderer.get('videoId')
5009 if video_id:
5010 entry = self._extract_video(video_renderer)
5011 if entry:
5012 yield entry
5013 # playlist attachment
5014 playlist_id = try_get(
5015 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
5016 if playlist_id:
5017 yield self.url_result(
5018 f'https://www.youtube.com/playlist?list={playlist_id}',
5019 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5020 # inline video links
5021 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
5022 for run in runs:
5023 if not isinstance(run, dict):
5024 continue
5025 ep_url = try_get(
5026 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
5027 if not ep_url:
5028 continue
5029 if not YoutubeIE.suitable(ep_url):
5030 continue
5031 ep_video_id = YoutubeIE._match_id(ep_url)
5032 if video_id == ep_video_id:
5033 continue
5034 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
5036 def _post_thread_continuation_entries(self, post_thread_continuation):
5037 contents = post_thread_continuation.get('contents')
5038 if not isinstance(contents, list):
5039 return
5040 for content in contents:
5041 renderer = content.get('backstagePostThreadRenderer')
5042 if isinstance(renderer, dict):
5043 yield from self._post_thread_entries(renderer)
5044 continue
5045 renderer = content.get('videoRenderer')
5046 if isinstance(renderer, dict):
5047 yield self._video_entry(renderer)
5049 r''' # unused
5050 def _rich_grid_entries(self, contents):
5051 for content in contents:
5052 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
5053 if video_renderer:
5054 entry = self._video_entry(video_renderer)
5055 if entry:
5056 yield entry
5059 def _report_history_entries(self, renderer):
5060 for url in traverse_obj(renderer, (
5061 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
5062 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
5063 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
5064 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
5066 def _extract_entries(self, parent_renderer, continuation_list):
5067 # continuation_list is modified in-place with continuation_list = [continuation_token]
5068 continuation_list[:] = [None]
5069 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
5070 for content in contents:
5071 if not isinstance(content, dict):
5072 continue
5073 is_renderer = traverse_obj(
5074 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
5075 expected_type=dict)
5076 if not is_renderer:
5077 if content.get('richItemRenderer'):
5078 for entry in self._rich_entries(content['richItemRenderer']):
5079 yield entry
5080 continuation_list[0] = self._extract_continuation(parent_renderer)
5081 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
5082 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
5083 yield from self._report_history_entries(table)
5084 continuation_list[0] = self._extract_continuation(table)
5085 continue
5087 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
5088 for isr_content in isr_contents:
5089 if not isinstance(isr_content, dict):
5090 continue
5092 known_renderers = {
5093 'playlistVideoListRenderer': self._playlist_entries,
5094 'gridRenderer': self._grid_entries,
5095 'reelShelfRenderer': self._grid_entries,
5096 'shelfRenderer': self._shelf_entries,
5097 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
5098 'backstagePostThreadRenderer': self._post_thread_entries,
5099 'videoRenderer': lambda x: [self._video_entry(x)],
5100 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
5101 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
5102 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
5103 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
5105 for key, renderer in isr_content.items():
5106 if key not in known_renderers:
5107 continue
5108 for entry in known_renderers[key](renderer):
5109 if entry:
5110 yield entry
5111 continuation_list[0] = self._extract_continuation(renderer)
5112 break
5114 if not continuation_list[0]:
5115 continuation_list[0] = self._extract_continuation(is_renderer)
5117 if not continuation_list[0]:
5118 continuation_list[0] = self._extract_continuation(parent_renderer)
5120 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5121 continuation_list = [None]
5122 extract_entries = lambda x: self._extract_entries(x, continuation_list)
5123 tab_content = try_get(tab, lambda x: x['content'], dict)
5124 if not tab_content:
5125 return
5126 parent_renderer = (
5127 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5128 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
5129 yield from extract_entries(parent_renderer)
5130 continuation = continuation_list[0]
5131 seen_continuations = set()
5132 for page_num in itertools.count(1):
5133 if not continuation:
5134 break
5135 continuation_token = continuation.get('continuation')
5136 if continuation_token is not None and continuation_token in seen_continuations:
5137 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5138 break
5139 seen_continuations.add(continuation_token)
5140 headers = self.generate_api_headers(
5141 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
5142 response = self._extract_response(
5143 item_id=f'{item_id} page {page_num}',
5144 query=continuation, headers=headers, ytcfg=ytcfg,
5145 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5147 if not response:
5148 break
5149 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5150 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5151 visitor_data = self._extract_visitor_data(response) or visitor_data
5153 known_renderers = {
5154 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
5155 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5156 'gridVideoRenderer': (self._grid_entries, 'items'),
5157 'gridChannelRenderer': (self._grid_entries, 'items'),
5158 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
5159 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
5160 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
5161 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5162 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
5163 'playlistVideoListContinuation': (self._playlist_entries, None),
5164 'gridContinuation': (self._grid_entries, None),
5165 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5166 'sectionListContinuation': (extract_entries, None), # for feeds
5169 continuation_items = traverse_obj(response, (
5170 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5171 'appendContinuationItemsAction', 'continuationItems',
5172 ), 'continuationContents', get_all=False)
5173 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5175 video_items_renderer = None
5176 for key in continuation_item:
5177 if key not in known_renderers:
5178 continue
5179 func, parent_key = known_renderers[key]
5180 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
5181 continuation_list = [None]
5182 yield from func(video_items_renderer)
5183 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
5185 if not video_items_renderer:
5186 break
5188 @staticmethod
5189 def _extract_selected_tab(tabs, fatal=True):
5190 for tab_renderer in tabs:
5191 if tab_renderer.get('selected'):
5192 return tab_renderer
5193 if fatal:
5194 raise ExtractorError('Unable to find selected tab')
5196 @staticmethod
5197 def _extract_tab_renderers(response):
5198 return traverse_obj(
5199 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
5201 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
5202 metadata = self._extract_metadata_from_tabs(item_id, data)
5204 selected_tab = self._extract_selected_tab(tabs)
5205 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5206 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5208 return self.playlist_result(
5209 self._entries(
5210 selected_tab, metadata['id'], ytcfg,
5211 self._extract_account_syncid(ytcfg, data),
5212 self._extract_visitor_data(data, ytcfg)),
5213 **metadata)
5215 def _extract_metadata_from_tabs(self, item_id, data):
5216 info = {'id': item_id}
5218 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5219 if metadata_renderer:
5220 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5221 ('channelUrl', {self.ucid_from_url}))
5222 info.update({
5223 'channel': metadata_renderer.get('title'),
5224 'channel_id': channel_id,
5226 if info['channel_id']:
5227 info['id'] = info['channel_id']
5228 else:
5229 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5231 # pageHeaderViewModel slow rollout began April 2024
5232 page_header_view_model = traverse_obj(data, (
5233 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
5235 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5236 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5237 def _get_uncropped(url):
5238 return url_or_none((url or '').split('=')[0] + '=s0')
5240 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5241 if avatar_thumbnails:
5242 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5243 if uncropped_avatar:
5244 avatar_thumbnails.append({
5245 'url': uncropped_avatar,
5246 'id': 'avatar_uncropped',
5247 'preference': 1,
5250 channel_banners = (
5251 self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5252 or self._extract_thumbnails(
5253 page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
5254 for banner in channel_banners:
5255 banner['preference'] = -10
5257 if channel_banners:
5258 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5259 if uncropped_banner:
5260 channel_banners.append({
5261 'url': uncropped_banner,
5262 'id': 'banner_uncropped',
5263 'preference': -5,
5266 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5267 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5268 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5270 primary_thumbnails = self._extract_thumbnails(
5271 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5272 playlist_thumbnails = self._extract_thumbnails(
5273 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5275 info.update({
5276 'title': (traverse_obj(metadata_renderer, 'title')
5277 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5278 or info['id']),
5279 'availability': self._extract_availability(data),
5280 'channel_follower_count': (
5281 self._get_count(data, ('header', ..., 'subscriberCountText'))
5282 or traverse_obj(page_header_view_model, (
5283 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5284 lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
5285 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5286 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5287 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
5288 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5291 channel_handle = (
5292 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5293 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5295 if channel_handle:
5296 info.update({
5297 'uploader_id': channel_handle,
5298 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5301 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5302 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5303 info['channel_is_verified'] = True
5304 # Playlist stats is a text runs array containing [video count, view count, last updated].
5305 # last updated or (view count and last updated) may be missing.
5306 playlist_stats = get_first(
5307 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5309 last_updated_unix = self._parse_time_text(
5310 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5311 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5312 info['modified_date'] = strftime_or_none(last_updated_unix)
5314 info['view_count'] = self._get_count(playlist_stats, 1)
5315 if info['view_count'] is None: # 0 is allowed
5316 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5317 if info['view_count'] is None:
5318 info['view_count'] = self._get_count(data, (
5319 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5320 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5322 info['playlist_count'] = self._get_count(playlist_stats, 0)
5323 if info['playlist_count'] is None: # 0 is allowed
5324 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5326 if not info.get('channel_id'):
5327 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5328 if not owner: # Deprecated
5329 owner = traverse_obj(
5330 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5331 ('videoOwner', 'videoOwnerRenderer', 'title'))
5332 owner_text = self._get_text(owner)
5333 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5334 info.update({
5335 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5336 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5337 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
5340 info.update({
5341 'uploader': info['channel'],
5342 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5343 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5346 return info
5348 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5349 first_id = last_id = response = None
5350 for page_num in itertools.count(1):
5351 videos = list(self._playlist_entries(playlist))
5352 if not videos:
5353 return
5354 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5355 if start >= len(videos):
5356 return
5357 yield from videos[start:]
5358 first_id = first_id or videos[0]['id']
5359 last_id = videos[-1]['id']
5360 watch_endpoint = try_get(
5361 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5362 headers = self.generate_api_headers(
5363 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5364 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5365 query = {
5366 'playlistId': playlist_id,
5367 'videoId': watch_endpoint.get('videoId') or last_id,
5368 'index': watch_endpoint.get('index') or len(videos),
5369 'params': watch_endpoint.get('params') or 'OAE%3D',
5371 response = self._extract_response(
5372 item_id=f'{playlist_id} page {page_num}',
5373 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5374 check_get_keys='contents',
5376 playlist = try_get(
5377 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5379 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5380 title = playlist.get('title') or try_get(
5381 data, lambda x: x['titleText']['simpleText'], str)
5382 playlist_id = playlist.get('playlistId') or item_id
5384 # Delegating everything except mix playlists to regular tab-based playlist URL
5385 playlist_url = urljoin(url, try_get(
5386 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5387 str))
5389 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5390 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5391 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5393 if playlist_url and playlist_url != url and not is_known_unviewable:
5394 return self.url_result(
5395 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5396 video_title=title)
5398 return self.playlist_result(
5399 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5400 playlist_id=playlist_id, playlist_title=title)
5402 def _extract_availability(self, data):
5404 Gets the availability of a given playlist/tab.
5405 Note: Unless YouTube tells us explicitly, we do not assume it is public
5406 @param data: response
5408 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5409 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5410 player_header_privacy = playlist_header_renderer.get('privacy')
5412 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5414 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5415 privacy_setting_icon = get_first(
5416 (playlist_header_renderer, sidebar_renderer),
5417 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5418 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5419 expected_type=str)
5421 microformats_is_unlisted = traverse_obj(
5422 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5424 return (
5425 'public' if (
5426 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5427 or player_header_privacy == 'PUBLIC'
5428 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5429 else self._availability(
5430 is_private=(
5431 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5432 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5433 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5434 is_unlisted=(
5435 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5436 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5437 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5438 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5439 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5440 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5441 needs_auth=False))
5443 @staticmethod
5444 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5445 sidebar_renderer = try_get(
5446 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5447 for item in sidebar_renderer:
5448 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5449 if renderer:
5450 return renderer
5452 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5454 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5456 is_playlist = bool(traverse_obj(
5457 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5458 if not is_playlist:
5459 return
5460 headers = self.generate_api_headers(
5461 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5462 visitor_data=self._extract_visitor_data(data, ytcfg))
5463 query = {
5464 'params': 'wgYCCAA=',
5465 'browseId': f'VL{item_id}',
5467 return self._extract_response(
5468 item_id=item_id, headers=headers, query=query,
5469 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5470 note='Redownloading playlist API JSON with unavailable videos')
5472 @functools.cached_property
5473 def skip_webpage(self):
5474 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5476 def _extract_webpage(self, url, item_id, fatal=True):
5477 webpage, data = None, None
5478 for retry in self.RetryManager(fatal=fatal):
5479 try:
5480 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5481 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5482 except ExtractorError as e:
5483 if isinstance(e.cause, network_exceptions):
5484 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5485 retry.error = e
5486 continue
5487 self._error_or_warning(e, fatal=fatal)
5488 break
5490 try:
5491 self._extract_and_report_alerts(data)
5492 except ExtractorError as e:
5493 self._error_or_warning(e, fatal=fatal)
5494 break
5496 # Sometimes youtube returns a webpage with incomplete ytInitialData
5497 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5498 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5499 retry.error = ExtractorError('Incomplete yt initial data received')
5500 data = None
5501 continue
5503 return webpage, data
5505 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5506 """Use if failed to extract ytcfg (and data) from initial webpage"""
5507 if not ytcfg and self.is_authenticated:
5508 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5509 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5510 raise ExtractorError(
5511 f'{msg}. If you are not downloading private content, or '
5512 'your cookies are only for the first account and channel,'
5513 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5514 expected=True)
5515 self.report_warning(msg, only_once=True)
5517 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5518 data = None
5519 if not self.skip_webpage:
5520 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5521 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5522 # Reject webpage data if redirected to home page without explicitly requesting
5523 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5524 if (url != 'https://www.youtube.com/feed/recommended'
5525 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5526 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5527 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5528 if fatal:
5529 raise ExtractorError(msg, expected=True)
5530 self.report_warning(msg, only_once=True)
5531 if not data:
5532 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5533 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5534 return data, ytcfg
5536 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5537 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5538 resolve_response = self._extract_response(
5539 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5540 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5541 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5542 for ep_key, ep in endpoints.items():
5543 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5544 if params:
5545 return self._extract_response(
5546 item_id=item_id, query=params, ep=ep, headers=headers,
5547 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5548 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5549 err_note = 'Failed to resolve url (does the playlist exist?)'
5550 if fatal:
5551 raise ExtractorError(err_note, expected=True)
5552 self.report_warning(err_note, item_id)
5554 _SEARCH_PARAMS = None
5556 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5557 data = {'query': query}
5558 if params is NO_DEFAULT:
5559 params = self._SEARCH_PARAMS
5560 if params:
5561 data['params'] = params
5563 content_keys = (
5564 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5565 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5566 # ytmusic search
5567 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5568 ('continuationContents', ),
5570 display_id = f'query "{query}"'
5571 check_get_keys = tuple({keys[0] for keys in content_keys})
5572 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5573 self._report_playlist_authcheck(ytcfg, fatal=False)
5575 continuation_list = [None]
5576 search = None
5577 for page_num in itertools.count(1):
5578 data.update(continuation_list[0] or {})
5579 headers = self.generate_api_headers(
5580 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5581 search = self._extract_response(
5582 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5583 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5584 slr_contents = traverse_obj(search, *content_keys)
5585 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5586 if not continuation_list[0]:
5587 break
5590 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5591 IE_DESC = 'YouTube Tabs'
5592 _VALID_URL = r'''(?x:
5593 https?://
5594 (?!consent\.)(?:\w+\.)?
5596 youtube(?:kids)?\.com|
5597 {invidious}
5600 (?P<channel_type>channel|c|user|browse)/|
5601 (?P<not_channel>
5602 feed/|hashtag/|
5603 (?:playlist|watch)\?.*?\blist=
5605 (?!(?:{reserved_names})\b) # Direct URLs
5607 (?P<id>[^/?\#&]+)
5608 )'''.format(
5609 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5610 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5612 IE_NAME = 'youtube:tab'
5614 _TESTS = [{
5615 'note': 'playlists, multipage',
5616 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5617 'playlist_mincount': 94,
5618 'info_dict': {
5619 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5620 'title': 'Igor Kleiner Ph.D. - Playlists',
5621 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5622 'uploader': 'Igor Kleiner Ph.D.',
5623 'uploader_id': '@IgorDataScience',
5624 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5625 'channel': 'Igor Kleiner Ph.D.',
5626 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5627 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5628 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5629 'channel_follower_count': int,
5631 }, {
5632 'note': 'playlists, multipage, different order',
5633 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5634 'playlist_mincount': 94,
5635 'info_dict': {
5636 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5637 'title': 'Igor Kleiner Ph.D. - Playlists',
5638 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5639 'uploader': 'Igor Kleiner Ph.D.',
5640 'uploader_id': '@IgorDataScience',
5641 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5642 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5643 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5644 'channel': 'Igor Kleiner Ph.D.',
5645 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5646 'channel_follower_count': int,
5648 }, {
5649 'note': 'playlists, series',
5650 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5651 'playlist_mincount': 5,
5652 'info_dict': {
5653 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5654 'title': '3Blue1Brown - Playlists',
5655 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5656 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5657 'channel': '3Blue1Brown',
5658 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5659 'uploader_id': '@3blue1brown',
5660 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5661 'uploader': '3Blue1Brown',
5662 'tags': ['Mathematics'],
5663 'channel_follower_count': int,
5664 'channel_is_verified': True,
5666 }, {
5667 'note': 'playlists, singlepage',
5668 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5669 'playlist_mincount': 4,
5670 'info_dict': {
5671 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5672 'title': 'ThirstForScience - Playlists',
5673 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5674 'uploader': 'ThirstForScience',
5675 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5676 'uploader_id': '@ThirstForScience',
5677 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5678 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5679 'tags': 'count:12',
5680 'channel': 'ThirstForScience',
5681 'channel_follower_count': int,
5683 }, {
5684 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5685 'only_matching': True,
5686 }, {
5687 'note': 'basic, single video playlist',
5688 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5689 'info_dict': {
5690 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5691 'title': 'youtube-dl public playlist',
5692 'description': '',
5693 'tags': [],
5694 'view_count': int,
5695 'modified_date': '20201130',
5696 'channel': 'Sergey M.',
5697 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5698 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5699 'availability': 'public',
5700 'uploader': 'Sergey M.',
5701 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5702 'uploader_id': '@sergeym.6173',
5704 'playlist_count': 1,
5705 }, {
5706 'note': 'empty playlist',
5707 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5708 'info_dict': {
5709 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5710 'title': 'youtube-dl empty playlist',
5711 'tags': [],
5712 'channel': 'Sergey M.',
5713 'description': '',
5714 'modified_date': '20230921',
5715 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5716 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5717 'availability': 'unlisted',
5718 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5719 'uploader_id': '@sergeym.6173',
5720 'uploader': 'Sergey M.',
5722 'playlist_count': 0,
5723 }, {
5724 'note': 'Home tab',
5725 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5726 'info_dict': {
5727 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5728 'title': 'lex will - Home',
5729 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5730 'uploader': 'lex will',
5731 'uploader_id': '@lexwill718',
5732 'channel': 'lex will',
5733 'tags': ['bible', 'history', 'prophesy'],
5734 'uploader_url': 'https://www.youtube.com/@lexwill718',
5735 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5736 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5737 'channel_follower_count': int,
5739 'playlist_mincount': 2,
5740 }, {
5741 'note': 'Videos tab',
5742 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5743 'info_dict': {
5744 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5745 'title': 'lex will - Videos',
5746 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5747 'uploader': 'lex will',
5748 'uploader_id': '@lexwill718',
5749 'tags': ['bible', 'history', 'prophesy'],
5750 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5751 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5752 'uploader_url': 'https://www.youtube.com/@lexwill718',
5753 'channel': 'lex will',
5754 'channel_follower_count': int,
5756 'playlist_mincount': 975,
5757 }, {
5758 'note': 'Videos tab, sorted by popular',
5759 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5760 'info_dict': {
5761 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5762 'title': 'lex will - Videos',
5763 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5764 'uploader': 'lex will',
5765 'uploader_id': '@lexwill718',
5766 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5767 'uploader_url': 'https://www.youtube.com/@lexwill718',
5768 'channel': 'lex will',
5769 'tags': ['bible', 'history', 'prophesy'],
5770 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5771 'channel_follower_count': int,
5773 'playlist_mincount': 199,
5774 }, {
5775 'note': 'Playlists tab',
5776 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5777 'info_dict': {
5778 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5779 'title': 'lex will - Playlists',
5780 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5781 'uploader': 'lex will',
5782 'uploader_id': '@lexwill718',
5783 'uploader_url': 'https://www.youtube.com/@lexwill718',
5784 'channel': 'lex will',
5785 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5786 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5787 'tags': ['bible', 'history', 'prophesy'],
5788 'channel_follower_count': int,
5790 'playlist_mincount': 17,
5791 }, {
5792 'note': 'Community tab',
5793 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5794 'info_dict': {
5795 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5796 'title': 'lex will - Community',
5797 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5798 'channel': 'lex will',
5799 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5800 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5801 'tags': ['bible', 'history', 'prophesy'],
5802 'channel_follower_count': int,
5803 'uploader_url': 'https://www.youtube.com/@lexwill718',
5804 'uploader_id': '@lexwill718',
5805 'uploader': 'lex will',
5807 'playlist_mincount': 18,
5808 }, {
5809 'note': 'Channels tab',
5810 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5811 'info_dict': {
5812 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5813 'title': 'lex will - Channels',
5814 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5815 'channel': 'lex will',
5816 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5817 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5818 'tags': ['bible', 'history', 'prophesy'],
5819 'channel_follower_count': int,
5820 'uploader_url': 'https://www.youtube.com/@lexwill718',
5821 'uploader_id': '@lexwill718',
5822 'uploader': 'lex will',
5824 'playlist_mincount': 12,
5825 }, {
5826 'note': 'Search tab',
5827 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5828 'playlist_mincount': 40,
5829 'info_dict': {
5830 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5831 'title': '3Blue1Brown - Search - linear algebra',
5832 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5833 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5834 'tags': ['Mathematics'],
5835 'channel': '3Blue1Brown',
5836 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5837 'channel_follower_count': int,
5838 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5839 'uploader_id': '@3blue1brown',
5840 'uploader': '3Blue1Brown',
5841 'channel_is_verified': True,
5843 }, {
5844 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5845 'only_matching': True,
5846 }, {
5847 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5848 'only_matching': True,
5849 }, {
5850 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5851 'only_matching': True,
5852 }, {
5853 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5854 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5855 'info_dict': {
5856 'title': '29C3: Not my department',
5857 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5858 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5859 'tags': [],
5860 'view_count': int,
5861 'modified_date': '20150605',
5862 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5863 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5864 'channel': 'Christiaan008',
5865 'availability': 'public',
5866 'uploader_id': '@ChRiStIaAn008',
5867 'uploader': 'Christiaan008',
5868 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5870 'playlist_count': 96,
5871 }, {
5872 'note': 'Large playlist',
5873 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5874 'info_dict': {
5875 'title': 'Uploads from Cauchemar',
5876 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5877 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5878 'tags': [],
5879 'modified_date': r're:\d{8}',
5880 'channel': 'Cauchemar',
5881 'view_count': int,
5882 'description': '',
5883 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5884 'availability': 'public',
5885 'uploader_id': '@Cauchemar89',
5886 'uploader': 'Cauchemar',
5887 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5889 'playlist_mincount': 1123,
5890 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5891 }, {
5892 'note': 'even larger playlist, 8832 videos',
5893 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5894 'only_matching': True,
5895 }, {
5896 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5897 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5898 'info_dict': {
5899 'title': 'Uploads from Interstellar Movie',
5900 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5901 'tags': [],
5902 'view_count': int,
5903 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5904 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5905 'channel': 'Interstellar Movie',
5906 'description': '',
5907 'modified_date': r're:\d{8}',
5908 'availability': 'public',
5909 'uploader_id': '@InterstellarMovie',
5910 'uploader': 'Interstellar Movie',
5911 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5913 'playlist_mincount': 21,
5914 }, {
5915 'note': 'Playlist with "show unavailable videos" button',
5916 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5917 'info_dict': {
5918 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5919 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5920 'view_count': int,
5921 'channel': 'Phim Siêu Nhân Nhật Bản',
5922 'tags': [],
5923 'description': '',
5924 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5925 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5926 'modified_date': r're:\d{8}',
5927 'availability': 'public',
5928 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5929 'uploader_id': '@phimsieunhannhatban',
5930 'uploader': 'Phim Siêu Nhân Nhật Bản',
5932 'playlist_mincount': 200,
5933 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5934 }, {
5935 'note': 'Playlist with unavailable videos in page 7',
5936 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5937 'info_dict': {
5938 'title': 'Uploads from BlankTV',
5939 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5940 'channel': 'BlankTV',
5941 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5942 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5943 'view_count': int,
5944 'tags': [],
5945 'modified_date': r're:\d{8}',
5946 'description': '',
5947 'availability': 'public',
5948 'uploader_id': '@blanktv',
5949 'uploader': 'BlankTV',
5950 'uploader_url': 'https://www.youtube.com/@blanktv',
5952 'playlist_mincount': 1000,
5953 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5954 }, {
5955 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5956 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5957 'info_dict': {
5958 'title': 'Data Analysis with Dr Mike Pound',
5959 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5960 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5961 'tags': [],
5962 'view_count': int,
5963 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5964 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5965 'channel': 'Computerphile',
5966 'availability': 'public',
5967 'modified_date': '20190712',
5968 'uploader_id': '@Computerphile',
5969 'uploader': 'Computerphile',
5970 'uploader_url': 'https://www.youtube.com/@Computerphile',
5972 'playlist_mincount': 11,
5973 }, {
5974 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5975 'only_matching': True,
5976 }, {
5977 'note': 'Playlist URL that does not actually serve a playlist',
5978 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5979 'info_dict': {
5980 'id': 'FqZTN594JQw',
5981 'ext': 'webm',
5982 'title': "Smiley's People 01 detective, Adventure Series, Action",
5983 'upload_date': '20150526',
5984 'license': 'Standard YouTube License',
5985 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5986 'categories': ['People & Blogs'],
5987 'tags': list,
5988 'view_count': int,
5989 'like_count': int,
5991 'params': {
5992 'skip_download': True,
5994 'skip': 'This video is not available.',
5995 'add_ie': [YoutubeIE.ie_key()],
5996 }, {
5997 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5998 'only_matching': True,
5999 }, {
6000 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
6001 'only_matching': True,
6002 }, {
6003 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
6004 'info_dict': {
6005 'id': 'hGkQjiJLjWQ', # This will keep changing
6006 'ext': 'mp4',
6007 'title': str,
6008 'upload_date': r're:\d{8}',
6009 'description': str,
6010 'categories': ['News & Politics'],
6011 'tags': list,
6012 'like_count': int,
6013 'release_timestamp': int,
6014 'channel': 'Sky News',
6015 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
6016 'age_limit': 0,
6017 'view_count': int,
6018 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
6019 'playable_in_embed': True,
6020 'release_date': r're:\d+',
6021 'availability': 'public',
6022 'live_status': 'is_live',
6023 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6024 'channel_follower_count': int,
6025 'concurrent_view_count': int,
6026 'uploader_url': 'https://www.youtube.com/@SkyNews',
6027 'uploader_id': '@SkyNews',
6028 'uploader': 'Sky News',
6029 'channel_is_verified': True,
6031 'params': {
6032 'skip_download': True,
6034 'expected_warnings': ['Ignoring subtitle tracks found in '],
6035 }, {
6036 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
6037 'info_dict': {
6038 'id': 'a48o2S1cPoo',
6039 'ext': 'mp4',
6040 'title': 'The Young Turks - Live Main Show',
6041 'upload_date': '20150715',
6042 'license': 'Standard YouTube License',
6043 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
6044 'categories': ['News & Politics'],
6045 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
6046 'like_count': int,
6048 'params': {
6049 'skip_download': True,
6051 'only_matching': True,
6052 }, {
6053 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
6054 'only_matching': True,
6055 }, {
6056 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
6057 'only_matching': True,
6058 }, {
6059 'note': 'A channel that is not live. Should raise error',
6060 'url': 'https://www.youtube.com/user/numberphile/live',
6061 'only_matching': True,
6062 }, {
6063 'url': 'https://www.youtube.com/feed/trending',
6064 'only_matching': True,
6065 }, {
6066 'url': 'https://www.youtube.com/feed/library',
6067 'only_matching': True,
6068 }, {
6069 'url': 'https://www.youtube.com/feed/history',
6070 'only_matching': True,
6071 }, {
6072 'url': 'https://www.youtube.com/feed/subscriptions',
6073 'only_matching': True,
6074 }, {
6075 'url': 'https://www.youtube.com/feed/watch_later',
6076 'only_matching': True,
6077 }, {
6078 'note': 'Recommended - redirects to home page.',
6079 'url': 'https://www.youtube.com/feed/recommended',
6080 'only_matching': True,
6081 }, {
6082 'note': 'inline playlist with not always working continuations',
6083 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
6084 'only_matching': True,
6085 }, {
6086 'url': 'https://www.youtube.com/course',
6087 'only_matching': True,
6088 }, {
6089 'url': 'https://www.youtube.com/zsecurity',
6090 'only_matching': True,
6091 }, {
6092 'url': 'http://www.youtube.com/NASAgovVideo/videos',
6093 'only_matching': True,
6094 }, {
6095 'url': 'https://www.youtube.com/TheYoungTurks/live',
6096 'only_matching': True,
6097 }, {
6098 'url': 'https://www.youtube.com/hashtag/cctv9',
6099 'info_dict': {
6100 'id': 'cctv9',
6101 'title': 'cctv9 - All',
6102 'tags': [],
6104 'playlist_mincount': 300, # not consistent but should be over 300
6105 }, {
6106 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6107 'only_matching': True,
6108 }, {
6109 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6110 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6111 'only_matching': True,
6112 }, {
6113 'note': '/browse/ should redirect to /channel/',
6114 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6115 'only_matching': True,
6116 }, {
6117 'note': 'VLPL, should redirect to playlist?list=PL...',
6118 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6119 'info_dict': {
6120 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6121 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6122 'title': 'NCS : All Releases 💿',
6123 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6124 'modified_date': r're:\d{8}',
6125 'view_count': int,
6126 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6127 'tags': [],
6128 'channel': 'NoCopyrightSounds',
6129 'availability': 'public',
6130 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6131 'uploader': 'NoCopyrightSounds',
6132 'uploader_id': '@NoCopyrightSounds',
6134 'playlist_mincount': 166,
6135 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6136 }, {
6137 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6138 'note': 'Topic, should redirect to playlist?list=UU...',
6139 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6140 'info_dict': {
6141 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6142 'title': 'Uploads from Royalty Free Music - Topic',
6143 'tags': [],
6144 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6145 'channel': 'Royalty Free Music - Topic',
6146 'view_count': int,
6147 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6148 'modified_date': r're:\d{8}',
6149 'description': '',
6150 'availability': 'public',
6151 'uploader': 'Royalty Free Music - Topic',
6153 'playlist_mincount': 101,
6154 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6155 }, {
6156 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6157 # Treat as a general feed
6158 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6159 'info_dict': {
6160 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6161 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6162 'tags': [],
6164 'playlist_mincount': 9,
6165 }, {
6166 'note': 'Youtube music Album',
6167 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6168 'info_dict': {
6169 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6170 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6171 'tags': [],
6172 'view_count': int,
6173 'description': '',
6174 'availability': 'unlisted',
6175 'modified_date': r're:\d{8}',
6177 'playlist_count': 50,
6178 'expected_warnings': ['YouTube Music is not directly supported'],
6179 }, {
6180 'note': 'unlisted single video playlist',
6181 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6182 'info_dict': {
6183 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6184 'title': 'yt-dlp unlisted playlist test',
6185 'availability': 'unlisted',
6186 'tags': [],
6187 'modified_date': '20220418',
6188 'channel': 'colethedj',
6189 'view_count': int,
6190 'description': '',
6191 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6192 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6193 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6194 'uploader_id': '@colethedj1894',
6195 'uploader': 'colethedj',
6197 'playlist': [{
6198 'info_dict': {
6199 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6200 'id': 'BaW_jenozKc',
6201 '_type': 'url',
6202 'ie_key': 'Youtube',
6203 'duration': 10,
6204 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6205 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6206 'view_count': int,
6207 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6208 'channel': 'Philipp Hagemeister',
6209 'uploader_id': '@PhilippHagemeister',
6210 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6211 'uploader': 'Philipp Hagemeister',
6214 'playlist_count': 1,
6215 'params': {'extract_flat': True},
6216 }, {
6217 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6218 'url': 'https://www.youtube.com/feed/recommended',
6219 'info_dict': {
6220 'id': 'recommended',
6221 'title': 'recommended',
6222 'tags': [],
6224 'playlist_mincount': 50,
6225 'params': {
6226 'skip_download': True,
6227 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6229 }, {
6230 'note': 'API Fallback: /videos tab, sorted by oldest first',
6231 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6232 'info_dict': {
6233 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6234 'title': 'Cody\'sLab - Videos',
6235 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6236 'channel': 'Cody\'sLab',
6237 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6238 'tags': [],
6239 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6240 'channel_follower_count': int,
6242 'playlist_mincount': 650,
6243 'params': {
6244 'skip_download': True,
6245 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6247 'skip': 'Query for sorting no longer works',
6248 }, {
6249 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6250 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6251 'info_dict': {
6252 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6253 'title': 'Uploads from Royalty Free Music - Topic',
6254 'modified_date': r're:\d{8}',
6255 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6256 'description': '',
6257 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6258 'tags': [],
6259 'channel': 'Royalty Free Music - Topic',
6260 'view_count': int,
6261 'availability': 'public',
6262 'uploader': 'Royalty Free Music - Topic',
6264 'playlist_mincount': 101,
6265 'params': {
6266 'skip_download': True,
6267 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6269 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6270 }, {
6271 'note': 'non-standard redirect to regional channel',
6272 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6273 'only_matching': True,
6274 }, {
6275 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6276 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6277 'info_dict': {
6278 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6279 'modified_date': '20220407',
6280 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6281 'tags': [],
6282 'availability': 'unlisted',
6283 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6284 'channel': 'pukkandan',
6285 'description': 'Test for collaborative playlist',
6286 'title': 'yt-dlp test - collaborative playlist',
6287 'view_count': int,
6288 'uploader_url': 'https://www.youtube.com/@pukkandan',
6289 'uploader_id': '@pukkandan',
6290 'uploader': 'pukkandan',
6292 'playlist_mincount': 2,
6293 }, {
6294 'note': 'translated tab name',
6295 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6296 'info_dict': {
6297 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6298 'tags': [],
6299 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6300 'description': 'test description',
6301 'title': 'cole-dlp-test-acc - 再生リスト',
6302 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6303 'channel': 'cole-dlp-test-acc',
6304 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6305 'uploader_id': '@coletdjnz',
6306 'uploader': 'cole-dlp-test-acc',
6308 'playlist_mincount': 1,
6309 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6310 'expected_warnings': ['Preferring "ja"'],
6311 }, {
6312 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6313 'note': 'preferred lang set with playlist with translated video titles',
6314 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6315 'info_dict': {
6316 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6317 'tags': [],
6318 'view_count': int,
6319 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6320 'channel': 'cole-dlp-test-acc',
6321 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6322 'description': 'test',
6323 'title': 'dlp test playlist',
6324 'availability': 'public',
6325 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6326 'uploader_id': '@coletdjnz',
6327 'uploader': 'cole-dlp-test-acc',
6329 'playlist_mincount': 1,
6330 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6331 'expected_warnings': ['Preferring "ja"'],
6332 }, {
6333 # shorts audio pivot for 2GtVksBMYFM.
6334 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6335 'info_dict': {
6336 'id': 'sfv_audio_pivot',
6337 'title': 'sfv_audio_pivot',
6338 'tags': [],
6340 'playlist_mincount': 50,
6342 }, {
6343 # Channel with a real live tab (not to be mistaken with streams tab)
6344 # Do not treat like it should redirect to live stream
6345 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6346 'info_dict': {
6347 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6348 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6349 'tags': [],
6351 'playlist_mincount': 20,
6352 }, {
6353 # Tab name is not the same as tab id
6354 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6355 'info_dict': {
6356 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6357 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6358 'tags': [],
6360 'playlist_mincount': 8,
6361 }, {
6362 # Home tab id is literally home. Not to get mistaken with featured
6363 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6364 'info_dict': {
6365 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6366 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6367 'tags': [],
6369 'playlist_mincount': 8,
6370 }, {
6371 # Should get three playlists for videos, shorts and streams tabs
6372 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6373 'info_dict': {
6374 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6375 'title': 'Polka Ch. 尾丸ポルカ',
6376 'channel_follower_count': int,
6377 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6378 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6379 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6380 'channel': 'Polka Ch. 尾丸ポルカ',
6381 'tags': 'count:35',
6382 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6383 'uploader': 'Polka Ch. 尾丸ポルカ',
6384 'uploader_id': '@OmaruPolka',
6385 'channel_is_verified': True,
6387 'playlist_count': 3,
6388 }, {
6389 # Shorts tab with channel with handle
6390 # TODO: fix channel description
6391 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6392 'info_dict': {
6393 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6394 'title': 'Not Just Bikes - Shorts',
6395 'tags': 'count:10',
6396 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6397 'description': 'md5:5e82545b3a041345927a92d0585df247',
6398 'channel_follower_count': int,
6399 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6400 'channel': 'Not Just Bikes',
6401 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6402 'uploader': 'Not Just Bikes',
6403 'uploader_id': '@NotJustBikes',
6404 'channel_is_verified': True,
6406 'playlist_mincount': 10,
6407 }, {
6408 # Streams tab
6409 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6410 'info_dict': {
6411 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6412 'title': '中村悠一 - Live',
6413 'tags': 'count:7',
6414 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6415 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6416 'channel': '中村悠一',
6417 'channel_follower_count': int,
6418 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6419 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6420 'uploader_id': '@Yuichi-Nakamura',
6421 'uploader': '中村悠一',
6423 'playlist_mincount': 60,
6424 }, {
6425 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6426 # See test_youtube_lists
6427 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6428 'only_matching': True,
6429 }, {
6430 # No uploads and no UCID given. Should fail with no uploads error
6431 # See test_youtube_lists
6432 'url': 'https://www.youtube.com/news',
6433 'only_matching': True,
6434 }, {
6435 # No videos tab but has a shorts tab
6436 'url': 'https://www.youtube.com/c/TKFShorts',
6437 'info_dict': {
6438 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6439 'title': 'Shorts Break - Shorts',
6440 'tags': 'count:48',
6441 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6442 'channel': 'Shorts Break',
6443 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6444 'channel_follower_count': int,
6445 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6446 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6447 'uploader': 'Shorts Break',
6448 'uploader_id': '@ShortsBreak_Official',
6450 'playlist_mincount': 30,
6451 }, {
6452 # Trending Now Tab. tab id is empty
6453 'url': 'https://www.youtube.com/feed/trending',
6454 'info_dict': {
6455 'id': 'trending',
6456 'title': 'trending - Now',
6457 'tags': [],
6459 'playlist_mincount': 30,
6460 }, {
6461 # Trending Gaming Tab. tab id is empty
6462 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6463 'info_dict': {
6464 'id': 'trending',
6465 'title': 'trending - Gaming',
6466 'tags': [],
6468 'playlist_mincount': 30,
6469 }, {
6470 # Shorts url result in shorts tab
6471 # TODO: Fix channel id extraction
6472 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6473 'info_dict': {
6474 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6475 'title': 'cole-dlp-test-acc - Shorts',
6476 'channel': 'cole-dlp-test-acc',
6477 'description': 'test description',
6478 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6479 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6480 'tags': [],
6481 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6482 'uploader_id': '@coletdjnz',
6483 'uploader': 'cole-dlp-test-acc',
6485 'playlist': [{
6486 'info_dict': {
6487 # Channel data is not currently available for short renderers (as of 2023-03-01)
6488 '_type': 'url',
6489 'ie_key': 'Youtube',
6490 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6491 'id': 'sSM9J5YH_60',
6492 'title': 'SHORT short',
6493 'view_count': int,
6494 'thumbnails': list,
6497 'params': {'extract_flat': True},
6498 }, {
6499 # Live video status should be extracted
6500 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6501 'info_dict': {
6502 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6503 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6504 'tags': [],
6506 'playlist': [{
6507 'info_dict': {
6508 '_type': 'url',
6509 'ie_key': 'Youtube',
6510 'url': 'startswith:https://www.youtube.com/watch?v=',
6511 'id': str,
6512 'title': str,
6513 'live_status': 'is_live',
6514 'channel_id': str,
6515 'channel_url': str,
6516 'concurrent_view_count': int,
6517 'channel': str,
6518 'uploader': str,
6519 'uploader_url': str,
6520 'uploader_id': str,
6521 'channel_is_verified': bool, # this will keep changing
6524 'params': {'extract_flat': True, 'playlist_items': '1'},
6525 'playlist_mincount': 1,
6526 }, {
6527 # Channel renderer metadata. Contains number of videos on the channel
6528 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6529 'info_dict': {
6530 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6531 'title': 'cole-dlp-test-acc - Channels',
6532 'channel': 'cole-dlp-test-acc',
6533 'description': 'test description',
6534 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6535 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6536 'tags': [],
6537 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6538 'uploader_id': '@coletdjnz',
6539 'uploader': 'cole-dlp-test-acc',
6541 'playlist': [{
6542 'info_dict': {
6543 '_type': 'url',
6544 'ie_key': 'YoutubeTab',
6545 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6546 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6547 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6548 'title': 'PewDiePie',
6549 'channel': 'PewDiePie',
6550 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6551 'thumbnails': list,
6552 'channel_follower_count': int,
6553 'playlist_count': int,
6554 'uploader': 'PewDiePie',
6555 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6556 'uploader_id': '@PewDiePie',
6557 'channel_is_verified': True,
6560 'params': {'extract_flat': True},
6561 }, {
6562 'url': 'https://www.youtube.com/@3blue1brown/about',
6563 'info_dict': {
6564 'id': '@3blue1brown',
6565 'tags': ['Mathematics'],
6566 'title': '3Blue1Brown',
6567 'channel_follower_count': int,
6568 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6569 'channel': '3Blue1Brown',
6570 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6571 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6572 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6573 'uploader_id': '@3blue1brown',
6574 'uploader': '3Blue1Brown',
6575 'channel_is_verified': True,
6577 'playlist_count': 0,
6578 }, {
6579 # Podcasts tab, with rich entry playlistRenderers
6580 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6581 'info_dict': {
6582 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6583 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6584 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6585 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6586 'title': '99 Percent Invisible - Podcasts',
6587 'uploader': '99 Percent Invisible',
6588 'channel_follower_count': int,
6589 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6590 'tags': [],
6591 'channel': '99 Percent Invisible',
6592 'uploader_id': '@99percentinvisiblepodcast',
6594 'playlist_count': 0,
6595 }, {
6596 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6597 'url': 'https://www.youtube.com/@AHimitsu/releases',
6598 'info_dict': {
6599 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6600 'channel': 'A Himitsu',
6601 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6602 'title': 'A Himitsu - Releases',
6603 'uploader_id': '@AHimitsu',
6604 'uploader': 'A Himitsu',
6605 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6606 'tags': 'count:12',
6607 'description': 'I make music',
6608 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6609 'channel_follower_count': int,
6610 'channel_is_verified': True,
6612 'playlist_mincount': 10,
6613 }, {
6614 # Playlist with only shorts, shown as reel renderers
6615 # FIXME: future: YouTube currently doesn't give continuation for this,
6616 # may do in future.
6617 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6618 'info_dict': {
6619 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6620 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6621 'view_count': int,
6622 'uploader_id': '@BangyShorts',
6623 'description': '',
6624 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6625 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6626 'channel': 'Bangy Shorts',
6627 'uploader': 'Bangy Shorts',
6628 'tags': [],
6629 'availability': 'public',
6630 'modified_date': r're:\d{8}',
6631 'title': 'Uploads from Bangy Shorts',
6633 'playlist_mincount': 100,
6634 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6635 }, {
6636 'note': 'Tags containing spaces',
6637 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6638 'playlist_count': 3,
6639 'info_dict': {
6640 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6641 'channel': 'Markiplier',
6642 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6643 'title': 'Markiplier',
6644 'channel_follower_count': int,
6645 'description': 'md5:0c010910558658824402809750dc5d97',
6646 'uploader_id': '@markiplier',
6647 'uploader_url': 'https://www.youtube.com/@markiplier',
6648 'uploader': 'Markiplier',
6649 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6650 'channel_is_verified': True,
6651 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6652 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6653 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6654 'mark fischbach'],
6658 @classmethod
6659 def suitable(cls, url):
6660 return False if YoutubeIE.suitable(url) else super().suitable(url)
6662 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6664 def _get_url_mobj(self, url):
6665 mobj = self._URL_RE.match(url).groupdict()
6666 mobj.update((k, '') for k, v in mobj.items() if v is None)
6667 return mobj
6669 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6670 tab_name = (tab.get('title') or '').lower()
6671 tab_url = urljoin(base_url, traverse_obj(
6672 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6674 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6675 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6676 if tab_id:
6677 return {
6678 'TAB_ID_SPONSORSHIPS': 'membership',
6679 }.get(tab_id, tab_id), tab_name
6681 # Fallback to tab name if we cannot get the tab id.
6682 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6683 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6684 if tab_name:
6685 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6686 return {
6687 'home': 'featured',
6688 'live': 'streams',
6689 }.get(tab_name, tab_name), tab_name
6691 def _has_tab(self, tabs, tab_id):
6692 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6694 def _empty_playlist(self, item_id, data):
6695 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6697 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6698 def _real_extract(self, url, smuggled_data):
6699 item_id = self._match_id(url)
6700 url = urllib.parse.urlunparse(
6701 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6702 compat_opts = self.get_param('compat_opts', [])
6704 mobj = self._get_url_mobj(url)
6705 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6706 if is_channel and smuggled_data.get('is_music_url'):
6707 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6708 return self.url_result(
6709 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6710 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6711 mdata = self._extract_tab_endpoint(
6712 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6713 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6714 get_all=False, expected_type=str)
6715 if not murl:
6716 raise ExtractorError('Failed to resolve album to playlist')
6717 return self.url_result(murl, YoutubeTabIE)
6718 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6719 return self.url_result(
6720 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6722 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6723 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6724 url = f'{pre}/videos{post}'
6725 if smuggled_data.get('is_music_url'):
6726 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
6728 # Handle both video/playlist URLs
6729 qs = parse_qs(url)
6730 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
6731 if not video_id and mobj['not_channel'].startswith('watch'):
6732 if not playlist_id:
6733 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6734 raise ExtractorError('A video URL was given without video ID', expected=True)
6735 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6736 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6737 return self.url_result(
6738 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6740 if not self._yes_playlist(playlist_id, video_id):
6741 return self.url_result(
6742 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6744 data, ytcfg = self._extract_data(url, display_id)
6746 # YouTube may provide a non-standard redirect to the regional channel
6747 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6748 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6749 redirect_url = traverse_obj(
6750 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6751 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6752 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6753 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6754 return self.url_result(redirect_url, YoutubeTabIE)
6756 tabs, extra_tabs = self._extract_tab_renderers(data), []
6757 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6758 selected_tab = self._extract_selected_tab(tabs)
6759 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6760 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6762 # /about is no longer a tab
6763 if original_tab_id == 'about':
6764 return self._empty_playlist(item_id, data)
6766 if not original_tab_id and selected_tab_name:
6767 self.to_screen('Downloading all uploads of the channel. '
6768 'To download only the videos in a specific tab, pass the tab\'s URL')
6769 if self._has_tab(tabs, 'streams'):
6770 extra_tabs.append(''.join((pre, '/streams', post)))
6771 if self._has_tab(tabs, 'shorts'):
6772 extra_tabs.append(''.join((pre, '/shorts', post)))
6773 # XXX: Members-only tab should also be extracted
6775 if not extra_tabs and selected_tab_id != 'videos':
6776 # Channel does not have streams, shorts or videos tabs
6777 if item_id[:2] != 'UC':
6778 return self._empty_playlist(item_id, data)
6780 # Topic channels don't have /videos. Use the equivalent playlist instead
6781 pl_id = f'UU{item_id[2:]}'
6782 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6783 try:
6784 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6785 except ExtractorError:
6786 return self._empty_playlist(item_id, data)
6787 else:
6788 item_id, url = pl_id, pl_url
6789 self.to_screen(
6790 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6792 elif extra_tabs and selected_tab_id != 'videos':
6793 # When there are shorts/live tabs but not videos tab
6794 url, data = f'{pre}{post}', None
6796 elif (original_tab_id or 'videos') != selected_tab_id:
6797 if original_tab_id == 'live':
6798 # Live tab should have redirected to the video
6799 # Except in the case the channel has an actual live tab
6800 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6801 raise UserNotLive(video_id=item_id)
6802 elif selected_tab_name:
6803 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6805 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6806 url = f'{pre}{post}'
6808 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6809 if 'no-youtube-unavailable-videos' not in compat_opts:
6810 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6811 self._extract_and_report_alerts(data, only_once=True)
6813 tabs, entries = self._extract_tab_renderers(data), []
6814 if tabs:
6815 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6816 entries[0].update({
6817 'extractor_key': YoutubeTabIE.ie_key(),
6818 'extractor': YoutubeTabIE.IE_NAME,
6819 'webpage_url': url,
6821 if self.get_param('playlist_items') == '0':
6822 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6823 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6824 entries.extend(map(self._real_extract, extra_tabs))
6826 if len(entries) == 1:
6827 return entries[0]
6828 elif entries:
6829 metadata = self._extract_metadata_from_tabs(item_id, data)
6830 uploads_url = 'the Uploads (UU) playlist URL'
6831 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6832 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6833 self.to_screen(
6834 'Downloading as multiple playlists, separated by tabs. '
6835 f'To download as a single playlist instead, pass {uploads_url}')
6836 return self.playlist_result(entries, item_id, **metadata)
6838 # Inline playlist
6839 playlist = traverse_obj(
6840 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6841 if playlist:
6842 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6844 video_id = traverse_obj(
6845 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6846 if video_id:
6847 if tab != '/live': # live tab is expected to redirect to video
6848 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6849 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6851 raise ExtractorError('Unable to recognize tab page')
6854 class YoutubePlaylistIE(InfoExtractor):
6855 IE_DESC = 'YouTube playlists'
6856 _VALID_URL = r'''(?x)(?:
6857 (?:https?://)?
6858 (?:\w+\.)?
6861 youtube(?:kids)?\.com|
6862 {invidious}
6864 /.*?\?.*?\blist=
6866 (?P<id>{playlist_id})
6867 )'''.format(
6868 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6869 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6871 IE_NAME = 'youtube:playlist'
6872 _TESTS = [{
6873 'note': 'issue #673',
6874 'url': 'PLBB231211A4F62143',
6875 'info_dict': {
6876 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6877 'id': 'PLBB231211A4F62143',
6878 'uploader': 'Wickman',
6879 'uploader_id': '@WickmanVT',
6880 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6881 'view_count': int,
6882 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6883 'modified_date': r're:\d{8}',
6884 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6885 'channel': 'Wickman',
6886 'tags': [],
6887 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6888 'availability': 'public',
6890 'playlist_mincount': 29,
6891 }, {
6892 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6893 'info_dict': {
6894 'title': 'YDL_safe_search',
6895 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6897 'playlist_count': 2,
6898 'skip': 'This playlist is private',
6899 }, {
6900 'note': 'embedded',
6901 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6902 'playlist_count': 4,
6903 'info_dict': {
6904 'title': 'JODA15',
6905 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6906 'uploader': 'milan',
6907 'uploader_id': '@milan5503',
6908 'description': '',
6909 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6910 'tags': [],
6911 'modified_date': '20140919',
6912 'view_count': int,
6913 'channel': 'milan',
6914 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6915 'uploader_url': 'https://www.youtube.com/@milan5503',
6916 'availability': 'public',
6918 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
6919 }, {
6920 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6921 'playlist_mincount': 455,
6922 'info_dict': {
6923 'title': '2018 Chinese New Singles (11/6 updated)',
6924 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6925 'uploader': 'LBK',
6926 'uploader_id': '@music_king',
6927 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6928 'channel': 'LBK',
6929 'view_count': int,
6930 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6931 'tags': [],
6932 'uploader_url': 'https://www.youtube.com/@music_king',
6933 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6934 'modified_date': r're:\d{8}',
6935 'availability': 'public',
6937 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6938 }, {
6939 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6940 'only_matching': True,
6941 }, {
6942 # music album playlist
6943 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6944 'only_matching': True,
6947 @classmethod
6948 def suitable(cls, url):
6949 if YoutubeTabIE.suitable(url):
6950 return False
6951 from ..utils import parse_qs
6952 qs = parse_qs(url)
6953 if qs.get('v', [None])[0]:
6954 return False
6955 return super().suitable(url)
6957 def _real_extract(self, url):
6958 playlist_id = self._match_id(url)
6959 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6960 url = update_url_query(
6961 'https://www.youtube.com/playlist',
6962 parse_qs(url) or {'list': playlist_id})
6963 if is_music_url:
6964 url = smuggle_url(url, {'is_music_url': True})
6965 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6968 class YoutubeYtBeIE(InfoExtractor):
6969 IE_DESC = 'youtu.be'
6970 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
6971 _TESTS = [{
6972 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6973 'info_dict': {
6974 'id': 'yeWKywCrFtk',
6975 'ext': 'mp4',
6976 'title': 'Small Scale Baler and Braiding Rugs',
6977 'uploader': 'Backus-Page House Museum',
6978 'uploader_id': '@backuspagemuseum',
6979 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6980 'upload_date': '20161008',
6981 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6982 'categories': ['Nonprofits & Activism'],
6983 'tags': list,
6984 'like_count': int,
6985 'age_limit': 0,
6986 'playable_in_embed': True,
6987 'thumbnail': r're:^https?://.*\.webp',
6988 'channel': 'Backus-Page House Museum',
6989 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6990 'live_status': 'not_live',
6991 'view_count': int,
6992 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6993 'availability': 'public',
6994 'duration': 59,
6995 'comment_count': int,
6996 'channel_follower_count': int,
6998 'params': {
6999 'noplaylist': True,
7000 'skip_download': True,
7002 }, {
7003 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
7004 'only_matching': True,
7007 def _real_extract(self, url):
7008 mobj = self._match_valid_url(url)
7009 video_id = mobj.group('id')
7010 playlist_id = mobj.group('playlist_id')
7011 return self.url_result(
7012 update_url_query('https://www.youtube.com/watch', {
7013 'v': video_id,
7014 'list': playlist_id,
7015 'feature': 'youtu.be',
7016 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
7019 class YoutubeLivestreamEmbedIE(InfoExtractor):
7020 IE_DESC = 'YouTube livestream embeds'
7021 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
7022 _TESTS = [{
7023 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
7024 'only_matching': True,
7027 def _real_extract(self, url):
7028 channel_id = self._match_id(url)
7029 return self.url_result(
7030 f'https://www.youtube.com/channel/{channel_id}/live',
7031 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
7034 class YoutubeYtUserIE(InfoExtractor):
7035 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
7036 IE_NAME = 'youtube:user'
7037 _VALID_URL = r'ytuser:(?P<id>.+)'
7038 _TESTS = [{
7039 'url': 'ytuser:phihag',
7040 'only_matching': True,
7043 def _real_extract(self, url):
7044 user_id = self._match_id(url)
7045 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
7048 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
7049 IE_NAME = 'youtube:favorites'
7050 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
7051 _VALID_URL = r':ytfav(?:ou?rite)?s?'
7052 _LOGIN_REQUIRED = True
7053 _TESTS = [{
7054 'url': ':ytfav',
7055 'only_matching': True,
7056 }, {
7057 'url': ':ytfavorites',
7058 'only_matching': True,
7061 def _real_extract(self, url):
7062 return self.url_result(
7063 'https://www.youtube.com/playlist?list=LL',
7064 ie=YoutubeTabIE.ie_key())
7067 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
7068 IE_NAME = 'youtube:notif'
7069 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
7070 _VALID_URL = r':ytnotif(?:ication)?s?'
7071 _LOGIN_REQUIRED = True
7072 _TESTS = [{
7073 'url': ':ytnotif',
7074 'only_matching': True,
7075 }, {
7076 'url': ':ytnotifications',
7077 'only_matching': True,
7080 def _extract_notification_menu(self, response, continuation_list):
7081 notification_list = traverse_obj(
7082 response,
7083 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
7084 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
7085 expected_type=list) or []
7086 continuation_list[0] = None
7087 for item in notification_list:
7088 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
7089 if entry:
7090 yield entry
7091 continuation = item.get('continuationItemRenderer')
7092 if continuation:
7093 continuation_list[0] = continuation
7095 def _extract_notification_renderer(self, notification):
7096 video_id = traverse_obj(
7097 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
7098 url = f'https://www.youtube.com/watch?v={video_id}'
7099 channel_id = None
7100 if not video_id:
7101 browse_ep = traverse_obj(
7102 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7103 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
7104 post_id = self._search_regex(
7105 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
7106 'post id', default=None)
7107 if not channel_id or not post_id:
7108 return
7109 # The direct /post url redirects to this in the browser
7110 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7112 channel = traverse_obj(
7113 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7114 expected_type=str)
7115 notification_title = self._get_text(notification, 'shortMessage')
7116 if notification_title:
7117 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7118 # TODO: handle recommended videos
7119 title = self._search_regex(
7120 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
7121 'video title', default=None)
7122 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7123 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7124 else None)
7125 return {
7126 '_type': 'url',
7127 'url': url,
7128 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7129 'video_id': video_id,
7130 'title': title,
7131 'channel_id': channel_id,
7132 'channel': channel,
7133 'uploader': channel,
7134 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
7135 'timestamp': timestamp,
7138 def _notification_menu_entries(self, ytcfg):
7139 continuation_list = [None]
7140 response = None
7141 for page in itertools.count(1):
7142 ctoken = traverse_obj(
7143 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7144 response = self._extract_response(
7145 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7146 ep='notification/get_notification_menu', check_get_keys='actions',
7147 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7148 yield from self._extract_notification_menu(response, continuation_list)
7149 if not continuation_list[0]:
7150 break
7152 def _real_extract(self, url):
7153 display_id = 'notifications'
7154 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7155 self._report_playlist_authcheck(ytcfg)
7156 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7159 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7160 IE_DESC = 'YouTube search'
7161 IE_NAME = 'youtube:search'
7162 _SEARCH_KEY = 'ytsearch'
7163 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
7164 _TESTS = [{
7165 'url': 'ytsearch5:youtube-dl test video',
7166 'playlist_count': 5,
7167 'info_dict': {
7168 'id': 'youtube-dl test video',
7169 'title': 'youtube-dl test video',
7171 }, {
7172 'note': 'Suicide/self-harm search warning',
7173 'url': 'ytsearch1:i hate myself and i wanna die',
7174 'playlist_count': 1,
7175 'info_dict': {
7176 'id': 'i hate myself and i wanna die',
7177 'title': 'i hate myself and i wanna die',
7182 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7183 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
7184 _SEARCH_KEY = 'ytsearchdate'
7185 IE_DESC = 'YouTube search, newest videos first'
7186 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
7187 _TESTS = [{
7188 'url': 'ytsearchdate5:youtube-dl test video',
7189 'playlist_count': 5,
7190 'info_dict': {
7191 'id': 'youtube-dl test video',
7192 'title': 'youtube-dl test video',
7197 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
7198 IE_DESC = 'YouTube search URLs with sorting and filter support'
7199 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
7200 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7201 _TESTS = [{
7202 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7203 'playlist_mincount': 5,
7204 'info_dict': {
7205 'id': 'youtube-dl test video',
7206 'title': 'youtube-dl test video',
7208 }, {
7209 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7210 'playlist_mincount': 5,
7211 'info_dict': {
7212 'id': 'python',
7213 'title': 'python',
7215 }, {
7216 'url': 'https://www.youtube.com/results?search_query=%23cats',
7217 'playlist_mincount': 1,
7218 'info_dict': {
7219 'id': '#cats',
7220 'title': '#cats',
7221 # The test suite does not have support for nested playlists
7222 # 'entries': [{
7223 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7224 # 'title': '#cats',
7225 # }],
7227 }, {
7228 # Channel results
7229 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7230 'info_dict': {
7231 'id': 'kurzgesagt',
7232 'title': 'kurzgesagt',
7234 'playlist': [{
7235 'info_dict': {
7236 '_type': 'url',
7237 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7238 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7239 'ie_key': 'YoutubeTab',
7240 'channel': 'Kurzgesagt – In a Nutshell',
7241 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7242 'title': 'Kurzgesagt – In a Nutshell',
7243 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7244 # No longer available for search as it is set to the handle.
7245 # 'playlist_count': int,
7246 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7247 'thumbnails': list,
7248 'uploader_id': '@kurzgesagt',
7249 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7250 'uploader': 'Kurzgesagt – In a Nutshell',
7251 'channel_is_verified': True,
7252 'channel_follower_count': int,
7255 'params': {'extract_flat': True, 'playlist_items': '1'},
7256 'playlist_mincount': 1,
7257 }, {
7258 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7259 'only_matching': True,
7262 def _real_extract(self, url):
7263 qs = parse_qs(url)
7264 query = (qs.get('search_query') or qs.get('q'))[0]
7265 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7268 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7269 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7270 IE_NAME = 'youtube:music:search_url'
7271 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7272 _TESTS = [{
7273 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7274 'playlist_count': 16,
7275 'info_dict': {
7276 'id': 'royalty free music',
7277 'title': 'royalty free music',
7279 }, {
7280 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7281 'playlist_mincount': 30,
7282 'info_dict': {
7283 'id': 'royalty free music - songs',
7284 'title': 'royalty free music - songs',
7286 'params': {'extract_flat': 'in_playlist'},
7287 }, {
7288 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7289 'playlist_mincount': 30,
7290 'info_dict': {
7291 'id': 'royalty free music - community playlists',
7292 'title': 'royalty free music - community playlists',
7294 'params': {'extract_flat': 'in_playlist'},
7297 _SECTIONS = {
7298 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7299 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7300 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7301 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7302 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7303 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7306 def _real_extract(self, url):
7307 qs = parse_qs(url)
7308 query = (qs.get('search_query') or qs.get('q'))[0]
7309 params = qs.get('sp', (None,))[0]
7310 if params:
7311 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7312 else:
7313 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
7314 params = self._SECTIONS.get(section)
7315 if not params:
7316 section = None
7317 title = join_nonempty(query, section, delim=' - ')
7318 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7321 class YoutubeFeedsInfoExtractor(InfoExtractor):
7323 Base class for feed extractors
7324 Subclasses must re-define the _FEED_NAME property.
7326 _LOGIN_REQUIRED = True
7327 _FEED_NAME = 'feeds'
7329 def _real_initialize(self):
7330 YoutubeBaseInfoExtractor._check_login_required(self)
7332 @classproperty
7333 def IE_NAME(cls):
7334 return f'youtube:{cls._FEED_NAME}'
7336 def _real_extract(self, url):
7337 return self.url_result(
7338 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7341 class YoutubeWatchLaterIE(InfoExtractor):
7342 IE_NAME = 'youtube:watchlater'
7343 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7344 _VALID_URL = r':ytwatchlater'
7345 _TESTS = [{
7346 'url': ':ytwatchlater',
7347 'only_matching': True,
7350 def _real_extract(self, url):
7351 return self.url_result(
7352 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7355 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7356 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7357 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7358 _FEED_NAME = 'recommended'
7359 _LOGIN_REQUIRED = False
7360 _TESTS = [{
7361 'url': ':ytrec',
7362 'only_matching': True,
7363 }, {
7364 'url': ':ytrecommended',
7365 'only_matching': True,
7366 }, {
7367 'url': 'https://youtube.com',
7368 'only_matching': True,
7372 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7373 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7374 _VALID_URL = r':ytsub(?:scription)?s?'
7375 _FEED_NAME = 'subscriptions'
7376 _TESTS = [{
7377 'url': ':ytsubs',
7378 'only_matching': True,
7379 }, {
7380 'url': ':ytsubscriptions',
7381 'only_matching': True,
7385 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7386 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7387 _VALID_URL = r':ythis(?:tory)?'
7388 _FEED_NAME = 'history'
7389 _TESTS = [{
7390 'url': ':ythistory',
7391 'only_matching': True,
7395 class YoutubeShortsAudioPivotIE(InfoExtractor):
7396 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7397 IE_NAME = 'youtube:shorts:pivot:audio'
7398 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7399 _TESTS = [{
7400 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7401 'only_matching': True,
7404 @staticmethod
7405 def _generate_audio_pivot_params(video_id):
7407 Generates sfv_audio_pivot browse params for this video id
7409 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7410 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7412 def _real_extract(self, url):
7413 video_id = self._match_id(url)
7414 return self.url_result(
7415 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7416 ie=YoutubeTabIE)
7419 class YoutubeTruncatedURLIE(InfoExtractor):
7420 IE_NAME = 'youtube:truncated_url'
7421 IE_DESC = False # Do not list
7422 _VALID_URL = r'''(?x)
7423 (?:https?://)?
7424 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7425 (?:watch\?(?:
7426 feature=[a-z_]+|
7427 annotation_id=annotation_[^&]+|
7428 x-yt-cl=[0-9]+|
7429 hl=[^&]*|
7430 t=[0-9]+
7433 attribution_link\?a=[^&]+
7438 _TESTS = [{
7439 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7440 'only_matching': True,
7441 }, {
7442 'url': 'https://www.youtube.com/watch?',
7443 'only_matching': True,
7444 }, {
7445 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7446 'only_matching': True,
7447 }, {
7448 'url': 'https://www.youtube.com/watch?feature=foo',
7449 'only_matching': True,
7450 }, {
7451 'url': 'https://www.youtube.com/watch?hl=en-GB',
7452 'only_matching': True,
7453 }, {
7454 'url': 'https://www.youtube.com/watch?t=2372',
7455 'only_matching': True,
7458 def _real_extract(self, url):
7459 raise ExtractorError(
7460 'Did you forget to quote the URL? Remember that & is a meta '
7461 'character in most shells, so you want to put the URL in quotes, '
7462 'like youtube-dl '
7463 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7464 ' or simply youtube-dl BaW_jenozKc .',
7465 expected=True)
7468 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7469 IE_NAME = 'youtube:clip'
7470 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7471 _TESTS = [{
7472 # FIXME: Other metadata should be extracted from the clip, not from the base video
7473 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7474 'info_dict': {
7475 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7476 'ext': 'mp4',
7477 'section_start': 29.0,
7478 'section_end': 39.7,
7479 'duration': 10.7,
7480 'age_limit': 0,
7481 'availability': 'public',
7482 'categories': ['Gaming'],
7483 'channel': 'Scott The Woz',
7484 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7485 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7486 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7487 'like_count': int,
7488 'playable_in_embed': True,
7489 'tags': 'count:17',
7490 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7491 'title': 'Mobile Games on Console - Scott The Woz',
7492 'upload_date': '20210920',
7493 'uploader': 'Scott The Woz',
7494 'uploader_id': '@ScottTheWoz',
7495 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7496 'view_count': int,
7497 'live_status': 'not_live',
7498 'channel_follower_count': int,
7499 'chapters': 'count:20',
7500 'comment_count': int,
7501 'heatmap': 'count:100',
7505 def _real_extract(self, url):
7506 clip_id = self._match_id(url)
7507 _, data = self._extract_webpage(url, clip_id)
7509 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7510 if not video_id:
7511 raise ExtractorError('Unable to find video ID')
7513 clip_data = traverse_obj(data, (
7514 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7515 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7516 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7517 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7519 return {
7520 '_type': 'url_transparent',
7521 'url': f'https://www.youtube.com/watch?v={video_id}',
7522 'ie_key': YoutubeIE.ie_key(),
7523 'id': clip_id,
7524 'section_start': int(clip_data['startTimeMs']) / 1000,
7525 'section_end': int(clip_data['endTimeMs']) / 1000,
7529 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7530 IE_NAME = 'youtube:consent'
7531 IE_DESC = False # Do not list
7532 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7533 _TESTS = [{
7534 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7535 'info_dict': {
7536 'id': 'qVv6vCqciTM',
7537 'ext': 'mp4',
7538 'age_limit': 0,
7539 'uploader_id': '@sana_natori',
7540 'comment_count': int,
7541 'chapters': 'count:13',
7542 'upload_date': '20221223',
7543 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7544 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7545 'uploader_url': 'https://www.youtube.com/@sana_natori',
7546 'like_count': int,
7547 'release_date': '20221223',
7548 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7549 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7550 'view_count': int,
7551 'playable_in_embed': True,
7552 'duration': 4438,
7553 'availability': 'public',
7554 'channel_follower_count': int,
7555 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7556 'categories': ['Entertainment'],
7557 'live_status': 'was_live',
7558 'release_timestamp': 1671793345,
7559 'channel': 'さなちゃんねる',
7560 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7561 'uploader': 'さなちゃんねる',
7562 'channel_is_verified': True,
7563 'heatmap': 'count:100',
7565 'add_ie': ['Youtube'],
7566 'params': {'skip_download': 'Youtube'},
7569 def _real_extract(self, url):
7570 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7571 if not redirect_url:
7572 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7573 return self.url_result(redirect_url)
7576 class YoutubeTruncatedIDIE(InfoExtractor):
7577 IE_NAME = 'youtube:truncated_id'
7578 IE_DESC = False # Do not list
7579 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7581 _TESTS = [{
7582 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7583 'only_matching': True,
7586 def _real_extract(self, url):
7587 video_id = self._match_id(url)
7588 raise ExtractorError(
7589 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7590 expected=True)