[ie/youtube] Fix age-verification workaround (#10610)
[yt-dlp3.git] / yt_dlp / extractor / youtube.py
blobb20dfda417116eaeea3a2b955d8a069baef9bc50
1 import base64
2 import calendar
3 import collections
4 import copy
5 import datetime as dt
6 import enum
7 import functools
8 import hashlib
9 import itertools
10 import json
11 import math
12 import os.path
13 import random
14 import re
15 import shlex
16 import sys
17 import threading
18 import time
19 import traceback
20 import urllib.parse
22 from .common import InfoExtractor, SearchInfoExtractor
23 from .openload import PhantomJSwrapper
24 from ..jsinterp import JSInterpreter
25 from ..networking.exceptions import HTTPError, network_exceptions
26 from ..utils import (
27 NO_DEFAULT,
28 ExtractorError,
29 LazyList,
30 UserNotLive,
31 bug_reports_message,
32 classproperty,
33 clean_html,
34 datetime_from_str,
35 dict_get,
36 filesize_from_tbr,
37 filter_dict,
38 float_or_none,
39 format_field,
40 get_first,
41 int_or_none,
42 is_html,
43 join_nonempty,
44 js_to_json,
45 mimetype2ext,
46 orderedSet,
47 parse_codecs,
48 parse_count,
49 parse_duration,
50 parse_iso8601,
51 parse_qs,
52 qualities,
53 remove_start,
54 smuggle_url,
55 str_or_none,
56 str_to_int,
57 strftime_or_none,
58 traverse_obj,
59 try_call,
60 try_get,
61 unescapeHTML,
62 unified_strdate,
63 unified_timestamp,
64 unsmuggle_url,
65 update_url_query,
66 url_or_none,
67 urljoin,
68 variadic,
71 STREAMING_DATA_CLIENT_NAME = '__yt_dlp_client'
72 # any clients starting with _ cannot be explicitly requested by the user
73 INNERTUBE_CLIENTS = {
74 'web': {
75 'INNERTUBE_CONTEXT': {
76 'client': {
77 'clientName': 'WEB',
78 'clientVersion': '2.20240726.00.00',
81 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
83 # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
84 'web_safari': {
85 'INNERTUBE_CONTEXT': {
86 'client': {
87 'clientName': 'WEB',
88 'clientVersion': '2.20240726.00.00',
89 'userAgent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15,gzip(gfe)',
92 'INNERTUBE_CONTEXT_CLIENT_NAME': 1,
94 'web_embedded': {
95 'INNERTUBE_CONTEXT': {
96 'client': {
97 'clientName': 'WEB_EMBEDDED_PLAYER',
98 'clientVersion': '1.20240723.01.00',
101 'INNERTUBE_CONTEXT_CLIENT_NAME': 56,
103 'web_music': {
104 'INNERTUBE_HOST': 'music.youtube.com',
105 'INNERTUBE_CONTEXT': {
106 'client': {
107 'clientName': 'WEB_REMIX',
108 'clientVersion': '1.20240724.00.00',
111 'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
113 'web_creator': {
114 'INNERTUBE_CONTEXT': {
115 'client': {
116 'clientName': 'WEB_CREATOR',
117 'clientVersion': '1.20240723.03.00',
120 'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
122 'android': {
123 'INNERTUBE_CONTEXT': {
124 'client': {
125 'clientName': 'ANDROID',
126 'clientVersion': '19.29.37',
127 'androidSdkVersion': 30,
128 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip',
129 'osName': 'Android',
130 'osVersion': '11',
133 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
134 'REQUIRE_JS_PLAYER': False,
136 'android_music': {
137 'INNERTUBE_CONTEXT': {
138 'client': {
139 'clientName': 'ANDROID_MUSIC',
140 'clientVersion': '7.11.50',
141 'androidSdkVersion': 30,
142 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip',
143 'osName': 'Android',
144 'osVersion': '11',
147 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
148 'REQUIRE_JS_PLAYER': False,
150 'android_creator': {
151 'INNERTUBE_CONTEXT': {
152 'client': {
153 'clientName': 'ANDROID_CREATOR',
154 'clientVersion': '24.30.100',
155 'androidSdkVersion': 30,
156 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip',
157 'osName': 'Android',
158 'osVersion': '11',
161 'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
162 'REQUIRE_JS_PLAYER': False,
164 # YouTube Kids videos aren't returned on this client for some reason
165 'android_vr': {
166 'INNERTUBE_CONTEXT': {
167 'client': {
168 'clientName': 'ANDROID_VR',
169 'clientVersion': '1.57.29',
170 'deviceMake': 'Oculus',
171 'deviceModel': 'Quest 3',
172 'androidSdkVersion': 32,
173 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
174 'osName': 'Android',
175 'osVersion': '12L',
178 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
179 'REQUIRE_JS_PLAYER': False,
181 'android_testsuite': {
182 'INNERTUBE_CONTEXT': {
183 'client': {
184 'clientName': 'ANDROID_TESTSUITE',
185 'clientVersion': '1.9',
186 'androidSdkVersion': 30,
187 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip',
188 'osName': 'Android',
189 'osVersion': '11',
192 'INNERTUBE_CONTEXT_CLIENT_NAME': 30,
193 'REQUIRE_JS_PLAYER': False,
194 'PLAYER_PARAMS': '2AMB',
196 # This client only has legacy formats and storyboards
197 'android_producer': {
198 'INNERTUBE_CONTEXT': {
199 'client': {
200 'clientName': 'ANDROID_PRODUCER',
201 'clientVersion': '0.111.1',
202 'androidSdkVersion': 30,
203 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip',
204 'osName': 'Android',
205 'osVersion': '11',
208 'INNERTUBE_CONTEXT_CLIENT_NAME': 91,
209 'REQUIRE_JS_PLAYER': False,
211 # iOS clients have HLS live streams. Setting device model to get 60fps formats.
212 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558
213 'ios': {
214 'INNERTUBE_CONTEXT': {
215 'client': {
216 'clientName': 'IOS',
217 'clientVersion': '19.29.1',
218 'deviceMake': 'Apple',
219 'deviceModel': 'iPhone16,2',
220 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
221 'osName': 'iPhone',
222 'osVersion': '17.5.1.21F90',
225 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
226 'REQUIRE_JS_PLAYER': False,
228 'ios_music': {
229 'INNERTUBE_CONTEXT': {
230 'client': {
231 'clientName': 'IOS_MUSIC',
232 'clientVersion': '7.08.2',
233 'deviceMake': 'Apple',
234 'deviceModel': 'iPhone16,2',
235 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
236 'osName': 'iPhone',
237 'osVersion': '17.5.1.21F90',
240 'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
241 'REQUIRE_JS_PLAYER': False,
243 'ios_creator': {
244 'INNERTUBE_CONTEXT': {
245 'client': {
246 'clientName': 'IOS_CREATOR',
247 'clientVersion': '24.30.100',
248 'deviceMake': 'Apple',
249 'deviceModel': 'iPhone16,2',
250 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)',
251 'osName': 'iPhone',
252 'osVersion': '17.5.1.21F90',
255 'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
256 'REQUIRE_JS_PLAYER': False,
258 # mweb has 'ultralow' formats
259 # See: https://github.com/yt-dlp/yt-dlp/pull/557
260 'mweb': {
261 'INNERTUBE_CONTEXT': {
262 'client': {
263 'clientName': 'MWEB',
264 'clientVersion': '2.20240726.01.00',
267 'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
269 'tv': {
270 'INNERTUBE_CONTEXT': {
271 'client': {
272 'clientName': 'TVHTML5',
273 'clientVersion': '7.20240724.13.00',
276 'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
278 # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
279 # See: https://github.com/zerodytrash/YouTube-Internal-Clients
280 'tv_embedded': {
281 'INNERTUBE_CONTEXT': {
282 'client': {
283 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
284 'clientVersion': '2.0',
287 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
289 # This client has pre-merged video+audio 720p/1080p streams
290 'mediaconnect': {
291 'INNERTUBE_CONTEXT': {
292 'client': {
293 'clientName': 'MEDIA_CONNECT_FRONTEND',
294 'clientVersion': '0.1',
297 'INNERTUBE_CONTEXT_CLIENT_NAME': 95,
298 'REQUIRE_JS_PLAYER': False,
303 def _split_innertube_client(client_name):
304 variant, *base = client_name.rsplit('.', 1)
305 if base:
306 return variant, base[0], variant
307 base, *variant = client_name.split('_', 1)
308 return client_name, base, variant[0] if variant else None
311 def short_client_name(client_name):
312 main, *parts = _split_innertube_client(client_name)[0].split('_')
313 return join_nonempty(main[:4], ''.join(x[0] for x in parts)).upper()
316 def build_innertube_clients():
317 THIRD_PARTY = {
318 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
320 BASE_CLIENTS = ('ios', 'web', 'tv', 'mweb', 'android')
321 priority = qualities(BASE_CLIENTS[::-1])
323 for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
324 ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
325 ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
326 ytcfg.setdefault('PLAYER_PARAMS', None)
327 ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
329 _, base_client, variant = _split_innertube_client(client)
330 ytcfg['priority'] = 10 * priority(base_client)
332 if variant == 'embedded':
333 ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
334 ytcfg['priority'] -= 2
335 elif variant:
336 ytcfg['priority'] -= 3
339 build_innertube_clients()
342 class BadgeType(enum.Enum):
343 AVAILABILITY_UNLISTED = enum.auto()
344 AVAILABILITY_PRIVATE = enum.auto()
345 AVAILABILITY_PUBLIC = enum.auto()
346 AVAILABILITY_PREMIUM = enum.auto()
347 AVAILABILITY_SUBSCRIPTION = enum.auto()
348 LIVE_NOW = enum.auto()
349 VERIFIED = enum.auto()
352 class YoutubeBaseInfoExtractor(InfoExtractor):
353 """Provide base functions for Youtube extractors"""
355 _RESERVED_NAMES = (
356 r'channel|c|user|playlist|watch|w|v|embed|e|live|watch_popup|clip|'
357 r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
358 r'browse|oembed|get_video_info|iframe_api|s/player|source|'
359 r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
361 _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
363 # _NETRC_MACHINE = 'youtube'
365 # If True it will raise an error if no login info is provided
366 _LOGIN_REQUIRED = False
368 _INVIDIOUS_SITES = (
369 # invidious-redirect websites
370 r'(?:www\.)?redirect\.invidious\.io',
371 r'(?:(?:www|dev)\.)?invidio\.us',
372 # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md
373 r'(?:www\.)?invidious\.pussthecat\.org',
374 r'(?:www\.)?invidious\.zee\.li',
375 r'(?:www\.)?invidious\.ethibox\.fr',
376 r'(?:www\.)?iv\.ggtyler\.dev',
377 r'(?:www\.)?inv\.vern\.i2p',
378 r'(?:www\.)?am74vkcrjp2d5v36lcdqgsj2m6x36tbrkhsruoegwfcizzabnfgf5zyd\.onion',
379 r'(?:www\.)?inv\.riverside\.rocks',
380 r'(?:www\.)?invidious\.silur\.me',
381 r'(?:www\.)?inv\.bp\.projectsegfau\.lt',
382 r'(?:www\.)?invidious\.g4c3eya4clenolymqbpgwz3q3tawoxw56yhzk4vugqrl6dtu3ejvhjid\.onion',
383 r'(?:www\.)?invidious\.slipfox\.xyz',
384 r'(?:www\.)?invidious\.esmail5pdn24shtvieloeedh7ehz3nrwcdivnfhfcedl7gf4kwddhkqd\.onion',
385 r'(?:www\.)?inv\.vernccvbvyi5qhfzyqengccj7lkove6bjot2xhh5kajhwvidqafczrad\.onion',
386 r'(?:www\.)?invidious\.tiekoetter\.com',
387 r'(?:www\.)?iv\.odysfvr23q5wgt7i456o5t3trw2cw5dgn56vbjfbq2m7xsc5vqbqpcyd\.onion',
388 r'(?:www\.)?invidious\.nerdvpn\.de',
389 r'(?:www\.)?invidious\.weblibre\.org',
390 r'(?:www\.)?inv\.odyssey346\.dev',
391 r'(?:www\.)?invidious\.dhusch\.de',
392 r'(?:www\.)?iv\.melmac\.space',
393 r'(?:www\.)?watch\.thekitty\.zone',
394 r'(?:www\.)?invidious\.privacydev\.net',
395 r'(?:www\.)?ng27owmagn5amdm7l5s3rsqxwscl5ynppnis5dqcasogkyxcfqn7psid\.onion',
396 r'(?:www\.)?invidious\.drivet\.xyz',
397 r'(?:www\.)?vid\.priv\.au',
398 r'(?:www\.)?euxxcnhsynwmfidvhjf6uzptsmh4dipkmgdmcmxxuo7tunp3ad2jrwyd\.onion',
399 r'(?:www\.)?inv\.vern\.cc',
400 r'(?:www\.)?invidious\.esmailelbob\.xyz',
401 r'(?:www\.)?invidious\.sethforprivacy\.com',
402 r'(?:www\.)?yt\.oelrichsgarcia\.de',
403 r'(?:www\.)?yt\.artemislena\.eu',
404 r'(?:www\.)?invidious\.flokinet\.to',
405 r'(?:www\.)?invidious\.baczek\.me',
406 r'(?:www\.)?y\.com\.sb',
407 r'(?:www\.)?invidious\.epicsite\.xyz',
408 r'(?:www\.)?invidious\.lidarshield\.cloud',
409 r'(?:www\.)?yt\.funami\.tech',
410 r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
411 r'(?:www\.)?osbivz6guyeahrwp2lnwyjk2xos342h4ocsxyqrlaopqjuhwn2djiiyd\.onion',
412 r'(?:www\.)?u2cvlit75owumwpy4dj2hsmvkq7nvrclkpht7xgyye2pyoxhpmclkrad\.onion',
413 # youtube-dl invidious instances list
414 r'(?:(?:www|no)\.)?invidiou\.sh',
415 r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
416 r'(?:www\.)?invidious\.kabi\.tk',
417 r'(?:www\.)?invidious\.mastodon\.host',
418 r'(?:www\.)?invidious\.zapashcanon\.fr',
419 r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
420 r'(?:www\.)?invidious\.tinfoil-hat\.net',
421 r'(?:www\.)?invidious\.himiko\.cloud',
422 r'(?:www\.)?invidious\.reallyancient\.tech',
423 r'(?:www\.)?invidious\.tube',
424 r'(?:www\.)?invidiou\.site',
425 r'(?:www\.)?invidious\.site',
426 r'(?:www\.)?invidious\.xyz',
427 r'(?:www\.)?invidious\.nixnet\.xyz',
428 r'(?:www\.)?invidious\.048596\.xyz',
429 r'(?:www\.)?invidious\.drycat\.fr',
430 r'(?:www\.)?inv\.skyn3t\.in',
431 r'(?:www\.)?tube\.poal\.co',
432 r'(?:www\.)?tube\.connect\.cafe',
433 r'(?:www\.)?vid\.wxzm\.sx',
434 r'(?:www\.)?vid\.mint\.lgbt',
435 r'(?:www\.)?vid\.puffyan\.us',
436 r'(?:www\.)?yewtu\.be',
437 r'(?:www\.)?yt\.elukerio\.org',
438 r'(?:www\.)?yt\.lelux\.fi',
439 r'(?:www\.)?invidious\.ggc-project\.de',
440 r'(?:www\.)?yt\.maisputain\.ovh',
441 r'(?:www\.)?ytprivate\.com',
442 r'(?:www\.)?invidious\.13ad\.de',
443 r'(?:www\.)?invidious\.toot\.koeln',
444 r'(?:www\.)?invidious\.fdn\.fr',
445 r'(?:www\.)?watch\.nettohikari\.com',
446 r'(?:www\.)?invidious\.namazso\.eu',
447 r'(?:www\.)?invidious\.silkky\.cloud',
448 r'(?:www\.)?invidious\.exonip\.de',
449 r'(?:www\.)?invidious\.riverside\.rocks',
450 r'(?:www\.)?invidious\.blamefran\.net',
451 r'(?:www\.)?invidious\.moomoo\.de',
452 r'(?:www\.)?ytb\.trom\.tf',
453 r'(?:www\.)?yt\.cyberhost\.uk',
454 r'(?:www\.)?kgg2m7yk5aybusll\.onion',
455 r'(?:www\.)?qklhadlycap4cnod\.onion',
456 r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
457 r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
458 r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
459 r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
460 r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
461 r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
462 r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
463 r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
464 r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
465 r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
466 # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances
467 r'(?:www\.)?piped\.kavin\.rocks',
468 r'(?:www\.)?piped\.tokhmi\.xyz',
469 r'(?:www\.)?piped\.syncpundit\.io',
470 r'(?:www\.)?piped\.mha\.fi',
471 r'(?:www\.)?watch\.whatever\.social',
472 r'(?:www\.)?piped\.garudalinux\.org',
473 r'(?:www\.)?piped\.rivo\.lol',
474 r'(?:www\.)?piped-libre\.kavin\.rocks',
475 r'(?:www\.)?yt\.jae\.fi',
476 r'(?:www\.)?piped\.mint\.lgbt',
477 r'(?:www\.)?il\.ax',
478 r'(?:www\.)?piped\.esmailelbob\.xyz',
479 r'(?:www\.)?piped\.projectsegfau\.lt',
480 r'(?:www\.)?piped\.privacydev\.net',
481 r'(?:www\.)?piped\.palveluntarjoaja\.eu',
482 r'(?:www\.)?piped\.smnz\.de',
483 r'(?:www\.)?piped\.adminforge\.de',
484 r'(?:www\.)?watch\.whatevertinfoil\.de',
485 r'(?:www\.)?piped\.qdi\.fi',
486 r'(?:(?:www|cf)\.)?piped\.video',
487 r'(?:www\.)?piped\.aeong\.one',
488 r'(?:www\.)?piped\.moomoo\.me',
489 r'(?:www\.)?piped\.chauvet\.pro',
490 r'(?:www\.)?watch\.leptons\.xyz',
491 r'(?:www\.)?pd\.vern\.cc',
492 r'(?:www\.)?piped\.hostux\.net',
493 r'(?:www\.)?piped\.lunar\.icu',
494 # Hyperpipe instances from https://hyperpipe.codeberg.page/
495 r'(?:www\.)?hyperpipe\.surge\.sh',
496 r'(?:www\.)?hyperpipe\.esmailelbob\.xyz',
497 r'(?:www\.)?listen\.whatever\.social',
498 r'(?:www\.)?music\.adminforge\.de',
501 # extracted from account/account_menu ep
502 # XXX: These are the supported YouTube UI and API languages,
503 # which is slightly different from languages supported for translation in YouTube studio
504 _SUPPORTED_LANG_CODES = [
505 'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
506 'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
507 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
508 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
509 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
510 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
513 _IGNORED_WARNINGS = {
514 'Unavailable videos will be hidden during playback',
515 'Unavailable videos are hidden',
518 _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
519 _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
521 def ucid_or_none(self, ucid):
522 return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
524 def handle_or_none(self, handle):
525 return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None)
527 def handle_from_url(self, url):
528 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
529 url, 'channel handle', default=None)
531 def ucid_from_url(self, url):
532 return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
533 url, 'channel id', default=None)
535 @functools.cached_property
536 def _preferred_lang(self):
538 Returns a language code supported by YouTube for the user preferred language.
539 Returns None if no preferred language set.
541 preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
542 if not preferred_lang:
543 return
544 if preferred_lang not in self._SUPPORTED_LANG_CODES:
545 raise ExtractorError(
546 f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
547 expected=True)
548 elif preferred_lang != 'en':
549 self.report_warning(
550 f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
551 return preferred_lang
553 def _initialize_consent(self):
554 cookies = self._get_cookies('https://www.youtube.com/')
555 if cookies.get('__Secure-3PSID'):
556 return
557 socs = cookies.get('SOCS')
558 if socs and not socs.value.startswith('CAA'): # not consented
559 return
560 self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes)
562 def _initialize_pref(self):
563 cookies = self._get_cookies('https://www.youtube.com/')
564 pref_cookie = cookies.get('PREF')
565 pref = {}
566 if pref_cookie:
567 try:
568 pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
569 except ValueError:
570 self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
571 pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
572 self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
574 def _real_initialize(self):
575 self._initialize_pref()
576 self._initialize_consent()
577 self._check_login_required()
579 def _check_login_required(self):
580 if self._LOGIN_REQUIRED and not self._cookies_passed:
581 self.raise_login_required('Login details are needed to download this content', method='cookies')
583 _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*='
584 _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*='
586 def _get_default_ytcfg(self, client='web'):
587 return copy.deepcopy(INNERTUBE_CLIENTS[client])
589 def _get_innertube_host(self, client='web'):
590 return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
592 def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
593 # try_get but with fallback to default ytcfg client values when present
594 _func = lambda y: try_get(y, getter, expected_type)
595 return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
597 def _extract_client_name(self, ytcfg, default_client='web'):
598 return self._ytcfg_get_safe(
599 ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
600 lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), str, default_client)
602 def _extract_client_version(self, ytcfg, default_client='web'):
603 return self._ytcfg_get_safe(
604 ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
605 lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), str, default_client)
607 def _select_api_hostname(self, req_api_hostname, default_client=None):
608 return (self._configuration_arg('innertube_host', [''], ie_key=YoutubeIE.ie_key())[0]
609 or req_api_hostname or self._get_innertube_host(default_client or 'web'))
611 def _extract_context(self, ytcfg=None, default_client='web'):
612 context = get_first(
613 (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
614 # Enforce language and tz for extraction
615 client_context = traverse_obj(context, 'client', expected_type=dict, default={})
616 client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
617 return context
619 _SAPISID = None
621 def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
622 time_now = round(time.time())
623 if self._SAPISID is None:
624 yt_cookies = self._get_cookies('https://www.youtube.com')
625 # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
626 # See: https://github.com/yt-dlp/yt-dlp/issues/393
627 sapisid_cookie = dict_get(
628 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
629 if sapisid_cookie and sapisid_cookie.value:
630 self._SAPISID = sapisid_cookie.value
631 self.write_debug('Extracted SAPISID cookie')
632 # SAPISID cookie is required if not already present
633 if not yt_cookies.get('SAPISID'):
634 self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
635 self._set_cookie(
636 '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
637 else:
638 self._SAPISID = False
639 if not self._SAPISID:
640 return None
641 # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
642 sapisidhash = hashlib.sha1(
643 f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest()
644 return f'SAPISIDHASH {time_now}_{sapisidhash}'
646 def _call_api(self, ep, query, video_id, fatal=True, headers=None,
647 note='Downloading API JSON', errnote='Unable to download API page',
648 context=None, api_key=None, api_hostname=None, default_client='web'):
650 data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
651 data.update(query)
652 real_headers = self.generate_api_headers(default_client=default_client)
653 real_headers.update({'content-type': 'application/json'})
654 if headers:
655 real_headers.update(headers)
656 return self._download_json(
657 f'https://{self._select_api_hostname(api_hostname, default_client)}/youtubei/v1/{ep}',
658 video_id=video_id, fatal=fatal, note=note, errnote=errnote,
659 data=json.dumps(data).encode('utf8'), headers=real_headers,
660 query=filter_dict({
661 'key': self._configuration_arg(
662 'innertube_key', [api_key], ie_key=YoutubeIE.ie_key(), casesense=True)[0],
663 'prettyPrint': 'false',
664 }, cndn=lambda _, v: v))
666 def extract_yt_initial_data(self, item_id, webpage, fatal=True):
667 return self._search_json(self._YT_INITIAL_DATA_RE, webpage, 'yt initial data', item_id, fatal=fatal)
669 @staticmethod
670 def _extract_session_index(*data):
672 Index of current account in account list.
673 See: https://github.com/yt-dlp/yt-dlp/pull/519
675 for ytcfg in data:
676 session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
677 if session_index is not None:
678 return session_index
680 # Deprecated?
681 def _extract_identity_token(self, ytcfg=None, webpage=None):
682 if ytcfg:
683 token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str)
684 if token:
685 return token
686 if webpage:
687 return self._search_regex(
688 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
689 'identity token', default=None, fatal=False)
691 @staticmethod
692 def _extract_account_syncid(*args):
694 Extract syncId required to download private playlists of secondary channels
695 @params response and/or ytcfg
697 for data in args:
698 # ytcfg includes channel_syncid if on secondary channel
699 delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], str)
700 if delegated_sid:
701 return delegated_sid
702 sync_ids = (try_get(
703 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
704 lambda x: x['DATASYNC_ID']), str) or '').split('||')
705 if len(sync_ids) >= 2 and sync_ids[1]:
706 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
707 # and just "user_syncid||" for primary channel. We only want the channel_syncid
708 return sync_ids[0]
710 @staticmethod
711 def _extract_visitor_data(*args):
713 Extracts visitorData from an API response or ytcfg
714 Appears to be used to track session state
716 return get_first(
717 args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))],
718 expected_type=str)
720 @functools.cached_property
721 def is_authenticated(self):
722 return bool(self._generate_sapisidhash_header())
724 def extract_ytcfg(self, video_id, webpage):
725 if not webpage:
726 return {}
727 return self._parse_json(
728 self._search_regex(
729 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
730 default='{}'), video_id, fatal=False) or {}
732 def generate_api_headers(
733 self, *, ytcfg=None, account_syncid=None, session_index=None,
734 visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
736 origin = 'https://' + (self._select_api_hostname(api_hostname, default_client))
737 headers = {
738 'X-YouTube-Client-Name': str(
739 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
740 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
741 'Origin': origin,
742 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
743 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
744 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
745 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
747 if session_index is None:
748 session_index = self._extract_session_index(ytcfg)
749 if account_syncid or session_index is not None:
750 headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
752 auth = self._generate_sapisidhash_header(origin)
753 if auth is not None:
754 headers['Authorization'] = auth
755 headers['X-Origin'] = origin
756 return filter_dict(headers)
758 def _download_ytcfg(self, client, video_id):
759 url = {
760 'web': 'https://www.youtube.com',
761 'web_music': 'https://music.youtube.com',
762 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1',
763 }.get(client)
764 if not url:
765 return {}
766 webpage = self._download_webpage(
767 url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
768 return self.extract_ytcfg(video_id, webpage) or {}
770 @staticmethod
771 def _build_api_continuation_query(continuation, ctp=None):
772 query = {
773 'continuation': continuation,
775 # TODO: Inconsistency with clickTrackingParams.
776 # Currently we have a fixed ctp contained within context (from ytcfg)
777 # and a ctp in root query for continuation.
778 if ctp:
779 query['clickTracking'] = {'clickTrackingParams': ctp}
780 return query
782 @classmethod
783 def _extract_next_continuation_data(cls, renderer):
784 next_continuation = try_get(
785 renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
786 lambda x: x['continuation']['reloadContinuationData']), dict)
787 if not next_continuation:
788 return
789 continuation = next_continuation.get('continuation')
790 if not continuation:
791 return
792 ctp = next_continuation.get('clickTrackingParams')
793 return cls._build_api_continuation_query(continuation, ctp)
795 @classmethod
796 def _extract_continuation_ep_data(cls, continuation_ep: dict):
797 if isinstance(continuation_ep, dict):
798 continuation = try_get(
799 continuation_ep, lambda x: x['continuationCommand']['token'], str)
800 if not continuation:
801 return
802 ctp = continuation_ep.get('clickTrackingParams')
803 return cls._build_api_continuation_query(continuation, ctp)
805 @classmethod
806 def _extract_continuation(cls, renderer):
807 next_continuation = cls._extract_next_continuation_data(renderer)
808 if next_continuation:
809 return next_continuation
811 return traverse_obj(renderer, (
812 ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
813 ('continuationEndpoint', ('button', 'buttonRenderer', 'command')),
814 ), get_all=False, expected_type=cls._extract_continuation_ep_data)
816 @classmethod
817 def _extract_alerts(cls, data):
818 for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
819 if not isinstance(alert_dict, dict):
820 continue
821 for alert in alert_dict.values():
822 alert_type = alert.get('type')
823 if not alert_type:
824 continue
825 message = cls._get_text(alert, 'text')
826 if message:
827 yield alert_type, message
829 def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
830 errors, warnings = [], []
831 for alert_type, alert_message in alerts:
832 if alert_type.lower() == 'error' and fatal:
833 errors.append([alert_type, alert_message])
834 elif alert_message not in self._IGNORED_WARNINGS:
835 warnings.append([alert_type, alert_message])
837 for alert_type, alert_message in (warnings + errors[:-1]):
838 self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once)
839 if errors:
840 raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected)
842 def _extract_and_report_alerts(self, data, *args, **kwargs):
843 return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
845 def _extract_badges(self, badge_list: list):
847 Extract known BadgeType's from a list of badge renderers.
848 @returns [{'type': BadgeType}]
850 icon_type_map = {
851 'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
852 'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
853 'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
854 'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
855 'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
856 'CHECK': BadgeType.VERIFIED,
859 badge_style_map = {
860 'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
861 'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
862 'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
863 'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
864 'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
867 label_map = {
868 'unlisted': BadgeType.AVAILABILITY_UNLISTED,
869 'private': BadgeType.AVAILABILITY_PRIVATE,
870 'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
871 'live': BadgeType.LIVE_NOW,
872 'premium': BadgeType.AVAILABILITY_PREMIUM,
873 'verified': BadgeType.VERIFIED,
874 'official artist channel': BadgeType.VERIFIED,
877 badges = []
878 for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
879 badge_type = (
880 icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
881 or badge_style_map.get(traverse_obj(badge, 'style'))
883 if badge_type:
884 badges.append({'type': badge_type})
885 continue
887 # fallback, won't work in some languages
888 label = traverse_obj(
889 badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
890 for match, label_badge_type in label_map.items():
891 if match in label.lower():
892 badges.append({'type': label_badge_type})
893 break
895 return badges
897 @staticmethod
898 def _has_badge(badges, badge_type):
899 return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
901 @staticmethod
902 def _get_text(data, *path_list, max_runs=None):
903 for path in path_list or [None]:
904 if path is None:
905 obj = [data]
906 else:
907 obj = traverse_obj(data, path, default=[])
908 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
909 obj = [obj]
910 for item in obj:
911 text = try_get(item, lambda x: x['simpleText'], str)
912 if text:
913 return text
914 runs = try_get(item, lambda x: x['runs'], list) or []
915 if not runs and isinstance(item, list):
916 runs = item
918 runs = runs[:min(len(runs), max_runs or len(runs))]
919 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str))
920 if text:
921 return text
923 def _get_count(self, data, *path_list):
924 count_text = self._get_text(data, *path_list) or ''
925 count = parse_count(count_text)
926 if count is None:
927 count = str_to_int(
928 self._search_regex(r'^([\d,]+)', re.sub(r'\s', '', count_text), 'count', default=None))
929 return count
931 @staticmethod
932 def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
934 Extract thumbnails from thumbnails dict
935 @param path_list: path list to level that contains 'thumbnails' key
937 thumbnails = []
938 for path in path_list or [()]:
939 for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
940 thumbnail_url = url_or_none(thumbnail.get('url'))
941 if not thumbnail_url:
942 continue
943 # Sometimes youtube gives a wrong thumbnail URL. See:
944 # https://github.com/yt-dlp/yt-dlp/issues/233
945 # https://github.com/ytdl-org/youtube-dl/issues/28023
946 if 'maxresdefault' in thumbnail_url:
947 thumbnail_url = thumbnail_url.split('?')[0]
948 thumbnails.append({
949 'url': thumbnail_url,
950 'height': int_or_none(thumbnail.get('height')),
951 'width': int_or_none(thumbnail.get('width')),
953 return thumbnails
955 @staticmethod
956 def extract_relative_time(relative_time_text):
958 Extracts a relative time from string and converts to dt object
959 e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
962 # XXX: this could be moved to a general function in utils/_utils.py
963 # The relative time text strings are roughly the same as what
964 # Javascript's Intl.RelativeTimeFormat function generates.
965 # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
966 mobj = re.search(
967 r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
968 relative_time_text)
969 if mobj:
970 start = mobj.group('start')
971 if start:
972 return datetime_from_str(start)
973 try:
974 return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit')))
975 except ValueError:
976 return None
978 def _parse_time_text(self, text):
979 if not text:
980 return
981 dt_ = self.extract_relative_time(text)
982 timestamp = None
983 if isinstance(dt_, dt.datetime):
984 timestamp = calendar.timegm(dt_.timetuple())
986 if timestamp is None:
987 timestamp = (
988 unified_timestamp(text) or unified_timestamp(
989 self._search_regex(
990 (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
991 text.lower(), 'time text', default=None)))
993 if text and timestamp is None and self._preferred_lang in (None, 'en'):
994 self.report_warning(
995 f'Cannot parse localized time text "{text}"', only_once=True)
996 return timestamp
998 def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
999 ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
1000 default_client='web'):
1001 raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
1002 # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
1003 icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
1004 icd_rm = next(icd_retries)
1005 main_retries = iter(self.RetryManager())
1006 main_rm = next(main_retries)
1007 # Manual retry loop for multiple RetryManagers
1008 # The proper RetryManager MUST be advanced after an error
1009 # and its result MUST be checked if the manager is non fatal
1010 while True:
1011 try:
1012 response = self._call_api(
1013 ep=ep, fatal=True, headers=headers,
1014 video_id=item_id, query=query, note=note,
1015 context=self._extract_context(ytcfg, default_client),
1016 api_hostname=api_hostname, default_client=default_client)
1017 except ExtractorError as e:
1018 if not isinstance(e.cause, network_exceptions):
1019 return self._error_or_warning(e, fatal=fatal)
1020 elif not isinstance(e.cause, HTTPError):
1021 main_rm.error = e
1022 next(main_retries)
1023 continue
1025 first_bytes = e.cause.response.read(512)
1026 if not is_html(first_bytes):
1027 yt_error = try_get(
1028 self._parse_json(
1029 self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
1030 lambda x: x['error']['message'], str)
1031 if yt_error:
1032 self._report_alerts([('ERROR', yt_error)], fatal=False)
1033 # Downloading page may result in intermittent 5xx HTTP error
1034 # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
1035 # We also want to catch all other network exceptions since errors in later pages can be troublesome
1036 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
1037 if e.cause.status not in (403, 429):
1038 main_rm.error = e
1039 next(main_retries)
1040 continue
1041 return self._error_or_warning(e, fatal=fatal)
1043 try:
1044 self._extract_and_report_alerts(response, only_once=True)
1045 except ExtractorError as e:
1046 # YouTube's servers may return errors we want to retry on in a 200 OK response
1047 # See: https://github.com/yt-dlp/yt-dlp/issues/839
1048 if 'unknown error' in e.msg.lower():
1049 main_rm.error = e
1050 next(main_retries)
1051 continue
1052 return self._error_or_warning(e, fatal=fatal)
1053 # Youtube sometimes sends incomplete data
1054 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
1055 if not traverse_obj(response, *variadic(check_get_keys)):
1056 icd_rm.error = ExtractorError('Incomplete data received', expected=True)
1057 should_retry = next(icd_retries, None)
1058 if not should_retry:
1059 return None
1060 continue
1062 return response
1064 @staticmethod
1065 def is_music_url(url):
1066 return re.match(r'(https?://)?music\.youtube\.com/', url) is not None
1068 def _extract_video(self, renderer):
1069 video_id = renderer.get('videoId')
1071 reel_header_renderer = traverse_obj(renderer, (
1072 'navigationEndpoint', 'reelWatchEndpoint', 'overlay', 'reelPlayerOverlayRenderer',
1073 'reelPlayerHeaderSupportedRenderers', 'reelPlayerHeaderRenderer'))
1075 title = self._get_text(renderer, 'title', 'headline') or self._get_text(reel_header_renderer, 'reelTitleText')
1076 description = self._get_text(renderer, 'descriptionSnippet')
1078 duration = int_or_none(renderer.get('lengthSeconds'))
1079 if duration is None:
1080 duration = parse_duration(self._get_text(
1081 renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
1082 if duration is None:
1083 # XXX: should write a parser to be more general to support more cases (e.g. shorts in shorts tab)
1084 duration = parse_duration(self._search_regex(
1085 r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
1086 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
1087 video_id, default=None, group='duration'))
1089 channel_id = traverse_obj(
1090 renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
1091 expected_type=str, get_all=False)
1092 if not channel_id:
1093 channel_id = traverse_obj(reel_header_renderer, ('channelNavigationEndpoint', 'browseEndpoint', 'browseId'))
1095 channel_id = self.ucid_or_none(channel_id)
1097 overlay_style = traverse_obj(
1098 renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
1099 get_all=False, expected_type=str)
1100 badges = self._extract_badges(traverse_obj(renderer, 'badges'))
1101 owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
1102 navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
1103 renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
1104 expected_type=str)) or ''
1105 url = f'https://www.youtube.com/watch?v={video_id}'
1106 if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
1107 url = f'https://www.youtube.com/shorts/{video_id}'
1109 time_text = (self._get_text(renderer, 'publishedTimeText', 'videoInfo')
1110 or self._get_text(reel_header_renderer, 'timestampText') or '')
1111 scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
1113 live_status = (
1114 'is_upcoming' if scheduled_timestamp is not None
1115 else 'was_live' if 'streamed' in time_text.lower()
1116 else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
1117 else None)
1119 # videoInfo is a string like '50K views • 10 years ago'.
1120 view_count_text = self._get_text(renderer, 'viewCountText', 'shortViewCountText', 'videoInfo') or ''
1121 view_count = (0 if 'no views' in view_count_text.lower()
1122 else self._get_count({'simpleText': view_count_text}))
1123 view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
1125 channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
1126 or self._get_text(reel_header_renderer, 'channelTitleText'))
1128 channel_handle = traverse_obj(renderer, (
1129 'shortBylineText', 'runs', ..., 'navigationEndpoint',
1130 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
1131 expected_type=self.handle_from_url, get_all=False)
1132 return {
1133 '_type': 'url',
1134 'ie_key': YoutubeIE.ie_key(),
1135 'id': video_id,
1136 'url': url,
1137 'title': title,
1138 'description': description,
1139 'duration': duration,
1140 'channel_id': channel_id,
1141 'channel': channel,
1142 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
1143 'uploader': channel,
1144 'uploader_id': channel_handle,
1145 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
1146 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
1147 'timestamp': (self._parse_time_text(time_text)
1148 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
1149 else None),
1150 'release_timestamp': scheduled_timestamp,
1151 'availability':
1152 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
1153 else self._availability(
1154 is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
1155 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
1156 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
1157 is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
1158 view_count_field: view_count,
1159 'live_status': live_status,
1160 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None,
1164 class YoutubeIE(YoutubeBaseInfoExtractor):
1165 IE_DESC = 'YouTube'
1166 _VALID_URL = r'''(?x)^
1168 (?:https?://|//) # http(s):// or protocol-independent URL
1169 (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
1170 (?:www\.)?deturl\.com/www\.youtube\.com|
1171 (?:www\.)?pwnyoutube\.com|
1172 (?:www\.)?hooktube\.com|
1173 (?:www\.)?yourepeat\.com|
1174 tube\.majestyc\.net|
1175 {invidious}|
1176 youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains
1177 (?:.*?\#/)? # handle anchor (#/) redirect urls
1178 (?: # the various things that can precede the ID:
1179 (?:(?:v|embed|e|shorts|live)/(?!videoseries|live_stream)) # v/ or embed/ or e/ or shorts/
1180 |(?: # or the v= param in all its forms
1181 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
1182 (?:\?|\#!?) # the params delimiter ? or # or #!
1183 (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
1187 |(?:
1188 youtu\.be| # just youtu.be/xxxx
1189 vid\.plus| # or vid.plus/xxxx
1190 zwearz\.com/watch| # or zwearz.com/watch/xxxx
1191 {invidious}
1193 |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
1195 )? # all until now is optional -> you can pass the naked ID
1196 (?P<id>[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID
1197 (?(1).+)? # if we found the ID, everything can follow
1198 (?:\#|$)'''.format(
1199 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
1201 _EMBED_REGEX = [
1202 r'''(?x)
1204 <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
1205 data-video-url=|
1206 <embed[^>]+?src=|
1207 embedSWF\(?:\s*|
1208 <object[^>]+data=|
1209 new\s+SWFObject\(
1211 (["\'])
1212 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1213 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1214 \1''',
1215 # https://wordpress.org/plugins/lazy-load-for-videos/
1216 r'''(?xs)
1217 <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
1218 \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
1220 _RETURN_TYPE = 'video' # XXX: How to handle multifeed?
1222 _PLAYER_INFO_RE = (
1223 r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
1224 r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
1225 r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
1227 _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE
1228 '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1229 '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1230 '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1231 '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1232 '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1233 '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1234 '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1235 '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1236 # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1237 '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1238 '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1239 '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1240 '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1241 '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1242 '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1243 '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1244 '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1245 '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1248 # 3D videos
1249 '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1250 '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1251 '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1252 '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1253 '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1254 '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1255 '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1257 # Apple HTTP Live Streaming
1258 '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1259 '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1260 '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1261 '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1262 '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1263 '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1264 '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1265 '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1267 # DASH mp4 video
1268 '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1269 '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1270 '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1271 '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1272 '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1273 '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1274 '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1275 '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1276 '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1277 '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1278 '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1279 '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1281 # Dash mp4 audio
1282 '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1283 '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1284 '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1285 '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1286 '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1287 '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1288 '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1290 # Dash webm
1291 '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1292 '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1293 '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1294 '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1295 '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1296 '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1297 '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1298 '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1299 '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1300 '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1301 '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1302 '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1303 '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1304 '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1305 '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1306 # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1307 '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1308 '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1309 '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1310 '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1311 '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1312 '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1314 # Dash webm audio
1315 '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1316 '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1318 # Dash webm audio with opus inside
1319 '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1320 '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1321 '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1323 # RTMP (unnamed)
1324 '_rtmp': {'protocol': 'rtmp'},
1326 # av01 video only formats sometimes served with "unknown" codecs
1327 '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1328 '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1329 '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1330 '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1331 '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1332 '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1333 '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1334 '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1336 _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1337 _POTOKEN_EXPERIMENTS = ('51217476', '51217102')
1338 _BROKEN_CLIENTS = {
1339 short_client_name(client): client
1340 for client in ('android', 'android_creator', 'android_music')
1343 _GEO_BYPASS = False
1345 IE_NAME = 'youtube'
1346 _TESTS = [
1348 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1349 'info_dict': {
1350 'id': 'BaW_jenozKc',
1351 'ext': 'mp4',
1352 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1353 'channel': 'Philipp Hagemeister',
1354 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1355 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1356 'upload_date': '20121002',
1357 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1358 'categories': ['Science & Technology'],
1359 'tags': ['youtube-dl'],
1360 'duration': 10,
1361 'view_count': int,
1362 'like_count': int,
1363 'availability': 'public',
1364 'playable_in_embed': True,
1365 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1366 'live_status': 'not_live',
1367 'age_limit': 0,
1368 'start_time': 1,
1369 'end_time': 9,
1370 'comment_count': int,
1371 'channel_follower_count': int,
1372 'uploader': 'Philipp Hagemeister',
1373 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1374 'uploader_id': '@PhilippHagemeister',
1375 'heatmap': 'count:100',
1376 'timestamp': 1349198244,
1380 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1381 'note': 'Embed-only video (#1746)',
1382 'info_dict': {
1383 'id': 'yZIXLfi8CZQ',
1384 'ext': 'mp4',
1385 'upload_date': '20120608',
1386 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1387 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1388 'age_limit': 18,
1390 'skip': 'Private video',
1393 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1394 'note': 'Use the first video ID in the URL',
1395 'info_dict': {
1396 'id': 'BaW_jenozKc',
1397 'ext': 'mp4',
1398 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1399 'channel': 'Philipp Hagemeister',
1400 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1401 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1402 'upload_date': '20121002',
1403 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
1404 'categories': ['Science & Technology'],
1405 'tags': ['youtube-dl'],
1406 'duration': 10,
1407 'view_count': int,
1408 'like_count': int,
1409 'availability': 'public',
1410 'playable_in_embed': True,
1411 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
1412 'live_status': 'not_live',
1413 'age_limit': 0,
1414 'comment_count': int,
1415 'channel_follower_count': int,
1416 'uploader': 'Philipp Hagemeister',
1417 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
1418 'uploader_id': '@PhilippHagemeister',
1419 'heatmap': 'count:100',
1420 'timestamp': 1349198244,
1422 'params': {
1423 'skip_download': True,
1427 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1428 'note': '256k DASH audio (format 141) via DASH manifest',
1429 'info_dict': {
1430 'id': 'a9LDPn-MO4I',
1431 'ext': 'm4a',
1432 'upload_date': '20121002',
1433 'description': '',
1434 'title': 'UHDTV TEST 8K VIDEO.mp4',
1436 'params': {
1437 'youtube_include_dash_manifest': True,
1438 'format': '141',
1440 'skip': 'format 141 not served anymore',
1442 # DASH manifest with encrypted signature
1444 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1445 'info_dict': {
1446 'id': 'IB3lcPjvWLA',
1447 'ext': 'm4a',
1448 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1449 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1450 'duration': 244,
1451 'upload_date': '20131011',
1452 'abr': 129.495,
1453 'like_count': int,
1454 'channel_id': 'UChuZAo1RKL85gev3Eal9_zg',
1455 'playable_in_embed': True,
1456 'channel_url': 'https://www.youtube.com/channel/UChuZAo1RKL85gev3Eal9_zg',
1457 'view_count': int,
1458 'track': 'The Spark',
1459 'live_status': 'not_live',
1460 'thumbnail': 'https://i.ytimg.com/vi_webp/IB3lcPjvWLA/maxresdefault.webp',
1461 'channel': 'Afrojack',
1462 'tags': 'count:19',
1463 'availability': 'public',
1464 'categories': ['Music'],
1465 'age_limit': 0,
1466 'alt_title': 'The Spark',
1467 'channel_follower_count': int,
1468 'uploader': 'Afrojack',
1469 'uploader_url': 'https://www.youtube.com/@Afrojack',
1470 'uploader_id': '@Afrojack',
1472 'params': {
1473 'youtube_include_dash_manifest': True,
1474 'format': '141/bestaudio[ext=m4a]',
1477 # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1479 'note': 'Embed allowed age-gate video',
1480 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1481 'info_dict': {
1482 'id': 'HtVdAasjOgU',
1483 'ext': 'mp4',
1484 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1485 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1486 'duration': 142,
1487 'upload_date': '20140605',
1488 'age_limit': 18,
1489 'categories': ['Gaming'],
1490 'thumbnail': 'https://i.ytimg.com/vi_webp/HtVdAasjOgU/maxresdefault.webp',
1491 'availability': 'needs_auth',
1492 'channel_url': 'https://www.youtube.com/channel/UCzybXLxv08IApdjdN0mJhEg',
1493 'like_count': int,
1494 'channel': 'The Witcher',
1495 'live_status': 'not_live',
1496 'tags': 'count:17',
1497 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg',
1498 'playable_in_embed': True,
1499 'view_count': int,
1500 'channel_follower_count': int,
1501 'uploader': 'The Witcher',
1502 'uploader_url': 'https://www.youtube.com/@thewitcher',
1503 'uploader_id': '@thewitcher',
1504 'comment_count': int,
1505 'channel_is_verified': True,
1506 'heatmap': 'count:100',
1507 'timestamp': 1401991663,
1511 'note': 'Age-gate video with embed allowed in public site',
1512 'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1513 'info_dict': {
1514 'id': 'HsUATh_Nc2U',
1515 'ext': 'mp4',
1516 'title': 'Godzilla 2 (Official Video)',
1517 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1518 'upload_date': '20200408',
1519 'age_limit': 18,
1520 'availability': 'needs_auth',
1521 'channel_id': 'UCYQT13AtrJC0gsM1far_zJg',
1522 'channel': 'FlyingKitty',
1523 'channel_url': 'https://www.youtube.com/channel/UCYQT13AtrJC0gsM1far_zJg',
1524 'view_count': int,
1525 'categories': ['Entertainment'],
1526 'live_status': 'not_live',
1527 'tags': ['Flyingkitty', 'godzilla 2'],
1528 'thumbnail': 'https://i.ytimg.com/vi/HsUATh_Nc2U/maxresdefault.jpg',
1529 'like_count': int,
1530 'duration': 177,
1531 'playable_in_embed': True,
1532 'channel_follower_count': int,
1533 'uploader': 'FlyingKitty',
1534 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
1535 'uploader_id': '@FlyingKitty900',
1536 'comment_count': int,
1537 'channel_is_verified': True,
1541 'note': 'Age-gate video embedable only with clientScreen=EMBED',
1542 'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1543 'info_dict': {
1544 'id': 'Tq92D6wQ1mg',
1545 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1546 'ext': 'mp4',
1547 'upload_date': '20191228',
1548 'description': 'md5:17eccca93a786d51bc67646756894066',
1549 'age_limit': 18,
1550 'like_count': int,
1551 'availability': 'needs_auth',
1552 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1553 'view_count': int,
1554 'thumbnail': 'https://i.ytimg.com/vi_webp/Tq92D6wQ1mg/sddefault.webp',
1555 'channel': 'Projekt Melody',
1556 'live_status': 'not_live',
1557 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
1558 'playable_in_embed': True,
1559 'categories': ['Entertainment'],
1560 'duration': 106,
1561 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
1562 'comment_count': int,
1563 'channel_follower_count': int,
1564 'uploader': 'Projekt Melody',
1565 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
1566 'uploader_id': '@ProjektMelody',
1567 'timestamp': 1577508724,
1571 'note': 'Non-Agegated non-embeddable video',
1572 'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1573 'info_dict': {
1574 'id': 'MeJVWBSsPAY',
1575 'ext': 'mp4',
1576 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1577 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1578 'upload_date': '20130730',
1579 'track': 'Such mich find mich',
1580 'age_limit': 0,
1581 'tags': ['oomph', 'such mich find mich', 'lyrics', 'german industrial', 'musica industrial'],
1582 'like_count': int,
1583 'playable_in_embed': False,
1584 'creator': 'OOMPH!',
1585 'thumbnail': 'https://i.ytimg.com/vi/MeJVWBSsPAY/sddefault.jpg',
1586 'view_count': int,
1587 'alt_title': 'Such mich find mich',
1588 'duration': 210,
1589 'channel': 'Herr Lurik',
1590 'channel_id': 'UCdR3RSDPqub28LjZx0v9-aA',
1591 'categories': ['Music'],
1592 'availability': 'public',
1593 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA',
1594 'live_status': 'not_live',
1595 'artist': 'OOMPH!',
1596 'channel_follower_count': int,
1597 'uploader': 'Herr Lurik',
1598 'uploader_url': 'https://www.youtube.com/@HerrLurik',
1599 'uploader_id': '@HerrLurik',
1603 'note': 'Non-bypassable age-gated video',
1604 'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1605 'only_matching': True,
1607 # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1608 # YouTube Red ad is not captured for creator
1610 'url': '__2ABJjxzNo',
1611 'info_dict': {
1612 'id': '__2ABJjxzNo',
1613 'ext': 'mp4',
1614 'duration': 266,
1615 'upload_date': '20100430',
1616 'creator': 'deadmau5',
1617 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1618 'title': 'Deadmau5 - Some Chords (HD)',
1619 'alt_title': 'Some Chords',
1620 'availability': 'public',
1621 'tags': 'count:14',
1622 'channel_id': 'UCYEK6xds6eo-3tr4xRdflmQ',
1623 'view_count': int,
1624 'live_status': 'not_live',
1625 'channel': 'deadmau5',
1626 'thumbnail': 'https://i.ytimg.com/vi_webp/__2ABJjxzNo/maxresdefault.webp',
1627 'like_count': int,
1628 'track': 'Some Chords',
1629 'artist': 'deadmau5',
1630 'playable_in_embed': True,
1631 'age_limit': 0,
1632 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ',
1633 'categories': ['Music'],
1634 'album': 'Some Chords',
1635 'channel_follower_count': int,
1636 'uploader': 'deadmau5',
1637 'uploader_url': 'https://www.youtube.com/@deadmau5',
1638 'uploader_id': '@deadmau5',
1640 'expected_warnings': [
1641 'DASH manifest missing',
1644 # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1646 'url': 'lqQg6PlCWgI',
1647 'info_dict': {
1648 'id': 'lqQg6PlCWgI',
1649 'ext': 'mp4',
1650 'duration': 6085,
1651 'upload_date': '20150827',
1652 'description': 'md5:04bbbf3ccceb6795947572ca36f45904',
1653 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
1654 'like_count': int,
1655 'release_timestamp': 1343767800,
1656 'playable_in_embed': True,
1657 'categories': ['Sports'],
1658 'release_date': '20120731',
1659 'channel': 'Olympics',
1660 'tags': ['Hockey', '2012-07-31', '31 July 2012', 'Riverbank Arena', 'Session', 'Olympics', 'Olympic Games', 'London 2012', '2012 Summer Olympics', 'Summer Games'],
1661 'channel_id': 'UCTl3QQTvqHFjurroKxexy2Q',
1662 'thumbnail': 'https://i.ytimg.com/vi/lqQg6PlCWgI/maxresdefault.jpg',
1663 'age_limit': 0,
1664 'availability': 'public',
1665 'live_status': 'was_live',
1666 'view_count': int,
1667 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q',
1668 'channel_follower_count': int,
1669 'uploader': 'Olympics',
1670 'uploader_url': 'https://www.youtube.com/@Olympics',
1671 'uploader_id': '@Olympics',
1672 'channel_is_verified': True,
1673 'timestamp': 1440707674,
1675 'params': {
1676 'skip_download': 'requires avconv',
1679 # Non-square pixels
1681 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1682 'info_dict': {
1683 'id': '_b-2C3KPAM0',
1684 'ext': 'mp4',
1685 'stretched_ratio': 16 / 9.,
1686 'duration': 85,
1687 'upload_date': '20110310',
1688 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1689 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1690 'playable_in_embed': True,
1691 'channel': '孫ᄋᄅ',
1692 'age_limit': 0,
1693 'tags': 'count:11',
1694 'channel_url': 'https://www.youtube.com/channel/UCS-xxCmRaA6BFdmgDPA_BIw',
1695 'channel_id': 'UCS-xxCmRaA6BFdmgDPA_BIw',
1696 'thumbnail': 'https://i.ytimg.com/vi/_b-2C3KPAM0/maxresdefault.jpg',
1697 'view_count': int,
1698 'categories': ['People & Blogs'],
1699 'like_count': int,
1700 'live_status': 'not_live',
1701 'availability': 'unlisted',
1702 'comment_count': int,
1703 'channel_follower_count': int,
1704 'uploader': '孫ᄋᄅ',
1705 'uploader_url': 'https://www.youtube.com/@AllenMeow',
1706 'uploader_id': '@AllenMeow',
1707 'timestamp': 1299776999,
1710 # url_encoded_fmt_stream_map is empty string
1712 'url': 'qEJwOuvDf7I',
1713 'info_dict': {
1714 'id': 'qEJwOuvDf7I',
1715 'ext': 'webm',
1716 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1717 'description': '',
1718 'upload_date': '20150404',
1720 'params': {
1721 'skip_download': 'requires avconv',
1723 'skip': 'This live event has ended.',
1725 # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1727 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1728 'info_dict': {
1729 'id': 'FIl7x6_3R5Y',
1730 'ext': 'webm',
1731 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1732 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1733 'duration': 220,
1734 'upload_date': '20150625',
1735 'formats': 'mincount:31',
1737 'skip': 'not actual anymore',
1739 # DASH manifest with segment_list
1741 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1742 'md5': '8ce563a1d667b599d21064e982ab9e31',
1743 'info_dict': {
1744 'id': 'CsmdDsKjzN8',
1745 'ext': 'mp4',
1746 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1747 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1748 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1750 'params': {
1751 'youtube_include_dash_manifest': True,
1752 'format': '135', # bestvideo
1754 'skip': 'This live event has ended.',
1757 # Multifeed videos (multiple cameras), URL can be of any Camera
1758 # TODO: fix multifeed titles
1759 'url': 'https://www.youtube.com/watch?v=zaPI8MvL8pg',
1760 'info_dict': {
1761 'id': 'zaPI8MvL8pg',
1762 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04',
1763 'description': 'md5:563ccbc698b39298481ca3c571169519',
1765 'playlist': [{
1766 'info_dict': {
1767 'id': 'j5yGuxZ8lLU',
1768 'ext': 'mp4',
1769 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Chris)',
1770 'description': 'md5:563ccbc698b39298481ca3c571169519',
1771 'duration': 10120,
1772 'channel_follower_count': int,
1773 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1774 'availability': 'public',
1775 'playable_in_embed': True,
1776 'upload_date': '20131105',
1777 'categories': ['Gaming'],
1778 'live_status': 'was_live',
1779 'tags': 'count:24',
1780 'release_timestamp': 1383701910,
1781 'thumbnail': 'https://i.ytimg.com/vi/j5yGuxZ8lLU/maxresdefault.jpg',
1782 'comment_count': int,
1783 'age_limit': 0,
1784 'like_count': int,
1785 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1786 'channel': 'WiiLikeToPlay',
1787 'view_count': int,
1788 'release_date': '20131106',
1789 'uploader': 'WiiLikeToPlay',
1790 'uploader_id': '@WLTP',
1791 'uploader_url': 'https://www.youtube.com/@WLTP',
1793 }, {
1794 'info_dict': {
1795 'id': 'zaPI8MvL8pg',
1796 'ext': 'mp4',
1797 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Tyson)',
1798 'availability': 'public',
1799 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1800 'channel': 'WiiLikeToPlay',
1801 'channel_follower_count': int,
1802 'description': 'md5:563ccbc698b39298481ca3c571169519',
1803 'duration': 10108,
1804 'age_limit': 0,
1805 'like_count': int,
1806 'tags': 'count:24',
1807 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1808 'release_timestamp': 1383701915,
1809 'comment_count': int,
1810 'upload_date': '20131105',
1811 'thumbnail': 'https://i.ytimg.com/vi/zaPI8MvL8pg/maxresdefault.jpg',
1812 'release_date': '20131106',
1813 'playable_in_embed': True,
1814 'live_status': 'was_live',
1815 'categories': ['Gaming'],
1816 'view_count': int,
1817 'uploader': 'WiiLikeToPlay',
1818 'uploader_id': '@WLTP',
1819 'uploader_url': 'https://www.youtube.com/@WLTP',
1821 }, {
1822 'info_dict': {
1823 'id': 'R7r3vfO7Hao',
1824 'ext': 'mp4',
1825 'title': 'Terraria 1.2 Live Stream | Let\'s Play - Part 04 (Spencer)',
1826 'thumbnail': 'https://i.ytimg.com/vi/R7r3vfO7Hao/maxresdefault.jpg',
1827 'channel_id': 'UCN2XePorRokPB9TEgRZpddg',
1828 'like_count': int,
1829 'availability': 'public',
1830 'playable_in_embed': True,
1831 'upload_date': '20131105',
1832 'description': 'md5:563ccbc698b39298481ca3c571169519',
1833 'channel_follower_count': int,
1834 'tags': 'count:24',
1835 'release_date': '20131106',
1836 'comment_count': int,
1837 'channel_url': 'https://www.youtube.com/channel/UCN2XePorRokPB9TEgRZpddg',
1838 'channel': 'WiiLikeToPlay',
1839 'categories': ['Gaming'],
1840 'release_timestamp': 1383701914,
1841 'live_status': 'was_live',
1842 'age_limit': 0,
1843 'duration': 10128,
1844 'view_count': int,
1845 'uploader': 'WiiLikeToPlay',
1846 'uploader_id': '@WLTP',
1847 'uploader_url': 'https://www.youtube.com/@WLTP',
1850 'params': {'skip_download': True},
1851 'skip': 'Not multifeed anymore',
1854 # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1855 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1856 'info_dict': {
1857 'id': 'gVfLd0zydlo',
1858 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1860 'playlist_count': 2,
1861 'skip': 'Not multifeed anymore',
1864 'url': 'https://vid.plus/FlRa-iH7PGw',
1865 'only_matching': True,
1868 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1869 'only_matching': True,
1872 # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1873 # Also tests cut-off URL expansion in video description (see
1874 # https://github.com/ytdl-org/youtube-dl/issues/1892,
1875 # https://github.com/ytdl-org/youtube-dl/issues/8164)
1876 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1877 'info_dict': {
1878 'id': 'lsguqyKfVQg',
1879 'ext': 'mp4',
1880 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1881 'alt_title': 'Dark Walk',
1882 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1883 'duration': 133,
1884 'upload_date': '20151119',
1885 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1886 'track': 'Dark Walk',
1887 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1888 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1889 'thumbnail': 'https://i.ytimg.com/vi_webp/lsguqyKfVQg/maxresdefault.webp',
1890 'categories': ['Film & Animation'],
1891 'view_count': int,
1892 'live_status': 'not_live',
1893 'channel_url': 'https://www.youtube.com/channel/UCTSRgz5jylBvFt_S7wnsqLQ',
1894 'channel_id': 'UCTSRgz5jylBvFt_S7wnsqLQ',
1895 'tags': 'count:13',
1896 'availability': 'public',
1897 'channel': 'IronSoulElf',
1898 'playable_in_embed': True,
1899 'like_count': int,
1900 'age_limit': 0,
1901 'channel_follower_count': int,
1903 'params': {
1904 'skip_download': True,
1908 # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1909 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1910 'only_matching': True,
1913 # Video with yt:stretch=17:0
1914 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1915 'info_dict': {
1916 'id': 'Q39EVAstoRM',
1917 'ext': 'mp4',
1918 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1919 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1920 'upload_date': '20151107',
1922 'params': {
1923 'skip_download': True,
1925 'skip': 'This video does not exist.',
1928 # Video with incomplete 'yt:stretch=16:'
1929 'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1930 'only_matching': True,
1933 # Video licensed under Creative Commons
1934 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1935 'info_dict': {
1936 'id': 'M4gD1WSo5mA',
1937 'ext': 'mp4',
1938 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1939 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1940 'duration': 721,
1941 'upload_date': '20150128',
1942 'license': 'Creative Commons Attribution license (reuse allowed)',
1943 'channel_id': 'UCuLGmD72gJDBwmLw06X58SA',
1944 'channel_url': 'https://www.youtube.com/channel/UCuLGmD72gJDBwmLw06X58SA',
1945 'like_count': int,
1946 'age_limit': 0,
1947 'tags': ['Copyright (Legal Subject)', 'Law (Industry)', 'William W. Fisher (Author)'],
1948 'channel': 'The Berkman Klein Center for Internet & Society',
1949 'availability': 'public',
1950 'view_count': int,
1951 'categories': ['Education'],
1952 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp',
1953 'live_status': 'not_live',
1954 'playable_in_embed': True,
1955 'channel_follower_count': int,
1956 'chapters': list,
1957 'uploader': 'The Berkman Klein Center for Internet & Society',
1958 'uploader_id': '@BKCHarvard',
1959 'uploader_url': 'https://www.youtube.com/@BKCHarvard',
1960 'timestamp': 1422422076,
1962 'params': {
1963 'skip_download': True,
1967 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1968 'info_dict': {
1969 'id': 'eQcmzGIKrzg',
1970 'ext': 'mp4',
1971 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1972 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1973 'duration': 4060,
1974 'upload_date': '20151120',
1975 'license': 'Creative Commons Attribution license (reuse allowed)',
1976 'playable_in_embed': True,
1977 'tags': 'count:12',
1978 'like_count': int,
1979 'channel_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1980 'age_limit': 0,
1981 'availability': 'public',
1982 'categories': ['News & Politics'],
1983 'channel': 'Bernie Sanders',
1984 'thumbnail': 'https://i.ytimg.com/vi_webp/eQcmzGIKrzg/maxresdefault.webp',
1985 'view_count': int,
1986 'live_status': 'not_live',
1987 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1988 'comment_count': int,
1989 'channel_follower_count': int,
1990 'chapters': list,
1991 'uploader': 'Bernie Sanders',
1992 'uploader_url': 'https://www.youtube.com/@BernieSanders',
1993 'uploader_id': '@BernieSanders',
1994 'channel_is_verified': True,
1995 'heatmap': 'count:100',
1996 'timestamp': 1447987198,
1998 'params': {
1999 'skip_download': True,
2003 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
2004 'only_matching': True,
2007 # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
2008 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
2009 'only_matching': True,
2012 # Rental video preview
2013 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
2014 'info_dict': {
2015 'id': 'uGpuVWrhIzE',
2016 'ext': 'mp4',
2017 'title': 'Piku - Trailer',
2018 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
2019 'upload_date': '20150811',
2020 'license': 'Standard YouTube License',
2022 'params': {
2023 'skip_download': True,
2025 'skip': 'This video is not available.',
2028 # YouTube Red video with episode data
2029 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
2030 'info_dict': {
2031 'id': 'iqKdEhx-dD4',
2032 'ext': 'mp4',
2033 'title': 'Isolation - Mind Field (Ep 1)',
2034 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
2035 'duration': 2085,
2036 'upload_date': '20170118',
2037 'series': 'Mind Field',
2038 'season_number': 1,
2039 'episode_number': 1,
2040 'thumbnail': 'https://i.ytimg.com/vi_webp/iqKdEhx-dD4/maxresdefault.webp',
2041 'tags': 'count:12',
2042 'view_count': int,
2043 'availability': 'public',
2044 'age_limit': 0,
2045 'channel': 'Vsauce',
2046 'episode': 'Episode 1',
2047 'categories': ['Entertainment'],
2048 'season': 'Season 1',
2049 'channel_id': 'UC6nSFpj9HTCZ5t-N3Rm3-HA',
2050 'channel_url': 'https://www.youtube.com/channel/UC6nSFpj9HTCZ5t-N3Rm3-HA',
2051 'like_count': int,
2052 'playable_in_embed': True,
2053 'live_status': 'not_live',
2054 'channel_follower_count': int,
2055 'uploader': 'Vsauce',
2056 'uploader_url': 'https://www.youtube.com/@Vsauce',
2057 'uploader_id': '@Vsauce',
2058 'comment_count': int,
2059 'channel_is_verified': True,
2060 'timestamp': 1484761047,
2062 'params': {
2063 'skip_download': True,
2065 'expected_warnings': [
2066 'Skipping DASH manifest',
2070 # The following content has been identified by the YouTube community
2071 # as inappropriate or offensive to some audiences.
2072 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
2073 'info_dict': {
2074 'id': '6SJNVb0GnPI',
2075 'ext': 'mp4',
2076 'title': 'Race Differences in Intelligence',
2077 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
2078 'duration': 965,
2079 'upload_date': '20140124',
2081 'params': {
2082 'skip_download': True,
2084 'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
2087 # itag 212
2088 'url': '1t24XAntNCY',
2089 'only_matching': True,
2092 # geo restricted to JP
2093 'url': 'sJL6WA-aGkQ',
2094 'only_matching': True,
2097 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
2098 'only_matching': True,
2101 'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
2102 'only_matching': True,
2105 # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
2106 'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
2107 'only_matching': True,
2110 # DRM protected
2111 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
2112 'only_matching': True,
2115 # Video with unsupported adaptive stream type formats
2116 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
2117 'info_dict': {
2118 'id': 'Z4Vy8R84T1U',
2119 'ext': 'mp4',
2120 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
2121 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
2122 'duration': 433,
2123 'upload_date': '20130923',
2124 'formats': 'maxcount:10',
2126 'params': {
2127 'skip_download': True,
2128 'youtube_include_dash_manifest': False,
2130 'skip': 'not actual anymore',
2133 # Youtube Music Auto-generated description
2134 # TODO: fix metadata extraction
2135 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2136 'info_dict': {
2137 'id': 'MgNrAu2pzNs',
2138 'ext': 'mp4',
2139 'title': 'Voyeur Girl',
2140 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
2141 'upload_date': '20190312',
2142 'artists': ['Stephen'],
2143 'creators': ['Stephen'],
2144 'track': 'Voyeur Girl',
2145 'album': 'it\'s too much love to know my dear',
2146 'release_date': '20190313',
2147 'alt_title': 'Voyeur Girl',
2148 'view_count': int,
2149 'playable_in_embed': True,
2150 'like_count': int,
2151 'categories': ['Music'],
2152 'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
2153 'channel': 'Stephen', # TODO: should be "Stephen - Topic"
2154 'uploader': 'Stephen',
2155 'availability': 'public',
2156 'duration': 169,
2157 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
2158 'age_limit': 0,
2159 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
2160 'tags': 'count:11',
2161 'live_status': 'not_live',
2162 'channel_follower_count': int,
2164 'params': {
2165 'skip_download': True,
2169 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
2170 'only_matching': True,
2173 # invalid -> valid video id redirection
2174 'url': 'DJztXj2GPfl',
2175 'info_dict': {
2176 'id': 'DJztXj2GPfk',
2177 'ext': 'mp4',
2178 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
2179 'description': 'md5:bf577a41da97918e94fa9798d9228825',
2180 'upload_date': '20090125',
2181 'artist': 'Panjabi MC',
2182 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
2183 'album': 'Beware of the Boys (Mundian To Bach Ke)',
2185 'params': {
2186 'skip_download': True,
2188 'skip': 'Video unavailable',
2191 # empty description results in an empty string
2192 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
2193 'info_dict': {
2194 'id': 'x41yOUIvK2k',
2195 'ext': 'mp4',
2196 'title': 'IMG 3456',
2197 'description': '',
2198 'upload_date': '20170613',
2199 'view_count': int,
2200 'thumbnail': 'https://i.ytimg.com/vi_webp/x41yOUIvK2k/maxresdefault.webp',
2201 'like_count': int,
2202 'channel_id': 'UCo03ZQPBW5U4UC3regpt1nw',
2203 'tags': [],
2204 'channel_url': 'https://www.youtube.com/channel/UCo03ZQPBW5U4UC3regpt1nw',
2205 'availability': 'public',
2206 'age_limit': 0,
2207 'categories': ['Pets & Animals'],
2208 'duration': 7,
2209 'playable_in_embed': True,
2210 'live_status': 'not_live',
2211 'channel': 'l\'Or Vert asbl',
2212 'channel_follower_count': int,
2213 'uploader': 'l\'Or Vert asbl',
2214 'uploader_url': 'https://www.youtube.com/@ElevageOrVert',
2215 'uploader_id': '@ElevageOrVert',
2216 'timestamp': 1497343210,
2218 'params': {
2219 'skip_download': True,
2223 # with '};' inside yt initial data (see [1])
2224 # see [2] for an example with '};' inside ytInitialPlayerResponse
2225 # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
2226 # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
2227 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
2228 'info_dict': {
2229 'id': 'CHqg6qOn4no',
2230 'ext': 'mp4',
2231 'title': 'Part 77 Sort a list of simple types in c#',
2232 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
2233 'upload_date': '20130831',
2234 'channel_id': 'UCCTVrRB5KpIiK6V2GGVsR1Q',
2235 'like_count': int,
2236 'channel_url': 'https://www.youtube.com/channel/UCCTVrRB5KpIiK6V2GGVsR1Q',
2237 'live_status': 'not_live',
2238 'categories': ['Education'],
2239 'availability': 'public',
2240 'thumbnail': 'https://i.ytimg.com/vi/CHqg6qOn4no/sddefault.jpg',
2241 'tags': 'count:12',
2242 'playable_in_embed': True,
2243 'age_limit': 0,
2244 'view_count': int,
2245 'duration': 522,
2246 'channel': 'kudvenkat',
2247 'comment_count': int,
2248 'channel_follower_count': int,
2249 'chapters': list,
2250 'uploader': 'kudvenkat',
2251 'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
2252 'uploader_id': '@Csharp-video-tutorialsBlogspot',
2253 'channel_is_verified': True,
2254 'heatmap': 'count:100',
2255 'timestamp': 1377976349,
2257 'params': {
2258 'skip_download': True,
2262 # another example of '};' in ytInitialData
2263 'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
2264 'only_matching': True,
2267 'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
2268 'only_matching': True,
2271 # https://github.com/ytdl-org/youtube-dl/pull/28094
2272 'url': 'OtqTfy26tG0',
2273 'info_dict': {
2274 'id': 'OtqTfy26tG0',
2275 'ext': 'mp4',
2276 'title': 'Burn Out',
2277 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
2278 'upload_date': '20141120',
2279 'artist': 'The Cinematic Orchestra',
2280 'track': 'Burn Out',
2281 'album': 'Every Day',
2282 'like_count': int,
2283 'live_status': 'not_live',
2284 'alt_title': 'Burn Out',
2285 'duration': 614,
2286 'age_limit': 0,
2287 'view_count': int,
2288 'channel_url': 'https://www.youtube.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
2289 'creator': 'The Cinematic Orchestra',
2290 'channel': 'The Cinematic Orchestra',
2291 'tags': ['The Cinematic Orchestra', 'Every Day', 'Burn Out'],
2292 'channel_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
2293 'availability': 'public',
2294 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg',
2295 'categories': ['Music'],
2296 'playable_in_embed': True,
2297 'channel_follower_count': int,
2298 'uploader': 'The Cinematic Orchestra',
2299 'comment_count': int,
2301 'params': {
2302 'skip_download': True,
2306 # controversial video, only works with bpctr when authenticated with cookies
2307 'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
2308 'only_matching': True,
2311 # controversial video, requires bpctr/contentCheckOk
2312 'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
2313 'info_dict': {
2314 'id': 'SZJvDhaSDnc',
2315 'ext': 'mp4',
2316 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
2317 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
2318 'upload_date': '20140716',
2319 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7',
2320 'duration': 170,
2321 'categories': ['News & Politics'],
2322 'view_count': int,
2323 'channel': 'CBS Mornings',
2324 'tags': ['suicide', 'bullying', 'video', 'cbs', 'news'],
2325 'thumbnail': 'https://i.ytimg.com/vi/SZJvDhaSDnc/hqdefault.jpg',
2326 'age_limit': 18,
2327 'availability': 'needs_auth',
2328 'channel_url': 'https://www.youtube.com/channel/UC-SJ6nODDmufqBzPBwCvYvQ',
2329 'like_count': int,
2330 'live_status': 'not_live',
2331 'playable_in_embed': True,
2332 'channel_follower_count': int,
2333 'uploader': 'CBS Mornings',
2334 'uploader_url': 'https://www.youtube.com/@CBSMornings',
2335 'uploader_id': '@CBSMornings',
2336 'comment_count': int,
2337 'channel_is_verified': True,
2338 'timestamp': 1405513526,
2342 # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
2343 'url': 'cBvYw8_A0vQ',
2344 'info_dict': {
2345 'id': 'cBvYw8_A0vQ',
2346 'ext': 'mp4',
2347 'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
2348 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
2349 'upload_date': '20201120',
2350 'duration': 1456,
2351 'categories': ['Travel & Events'],
2352 'channel_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
2353 'view_count': int,
2354 'channel': 'Walk around Japan',
2355 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'],
2356 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg',
2357 'age_limit': 0,
2358 'availability': 'public',
2359 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
2360 'live_status': 'not_live',
2361 'playable_in_embed': True,
2362 'channel_follower_count': int,
2363 'uploader': 'Walk around Japan',
2364 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124',
2365 'uploader_id': '@walkaroundjapan7124',
2366 'timestamp': 1605884416,
2368 'params': {
2369 'skip_download': True,
2371 }, {
2372 # Has multiple audio streams
2373 'url': 'WaOKSUlf4TM',
2374 'only_matching': True,
2375 }, {
2376 # Requires Premium: has format 141 when requested using YTM url
2377 'url': 'https://music.youtube.com/watch?v=XclachpHxis',
2378 'only_matching': True,
2379 }, {
2380 # multiple subtitles with same lang_code
2381 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
2382 'only_matching': True,
2383 }, {
2384 # Force use android client fallback
2385 'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
2386 'info_dict': {
2387 'id': 'YOelRv7fMxY',
2388 'title': 'DIGGING A SECRET TUNNEL Part 1',
2389 'ext': '3gp',
2390 'upload_date': '20210624',
2391 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
2392 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
2393 'description': 'md5:5d5991195d599b56cd0c4148907eec50',
2394 'duration': 596,
2395 'categories': ['Entertainment'],
2396 'view_count': int,
2397 'channel': 'colinfurze',
2398 'tags': ['Colin', 'furze', 'Terry', 'tunnel', 'underground', 'bunker'],
2399 'thumbnail': 'https://i.ytimg.com/vi/YOelRv7fMxY/maxresdefault.jpg',
2400 'age_limit': 0,
2401 'availability': 'public',
2402 'like_count': int,
2403 'live_status': 'not_live',
2404 'playable_in_embed': True,
2405 'channel_follower_count': int,
2406 'chapters': list,
2407 'uploader': 'colinfurze',
2408 'uploader_url': 'https://www.youtube.com/@colinfurze',
2409 'uploader_id': '@colinfurze',
2410 'comment_count': int,
2411 'channel_is_verified': True,
2412 'heatmap': 'count:100',
2414 'params': {
2415 'format': '17', # 3gp format available on android
2416 'extractor_args': {'youtube': {'player_client': ['android']}},
2418 'skip': 'android client broken',
2421 # Skip download of additional client configs (remix client config in this case)
2422 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
2423 'only_matching': True,
2424 'params': {
2425 'extractor_args': {'youtube': {'player_skip': ['configs']}},
2427 }, {
2428 # shorts
2429 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
2430 'only_matching': True,
2431 }, {
2432 'note': 'Storyboards',
2433 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
2434 'info_dict': {
2435 'id': '5KLPxDtMqe8',
2436 'ext': 'mhtml',
2437 'format_id': 'sb0',
2438 'title': 'Your Brain is Plastic',
2439 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
2440 'upload_date': '20140324',
2441 'like_count': int,
2442 'channel_id': 'UCZYTClx2T1of7BRZ86-8fow',
2443 'channel_url': 'https://www.youtube.com/channel/UCZYTClx2T1of7BRZ86-8fow',
2444 'view_count': int,
2445 'thumbnail': 'https://i.ytimg.com/vi/5KLPxDtMqe8/maxresdefault.jpg',
2446 'playable_in_embed': True,
2447 'tags': 'count:12',
2448 'availability': 'public',
2449 'channel': 'SciShow',
2450 'live_status': 'not_live',
2451 'duration': 248,
2452 'categories': ['Education'],
2453 'age_limit': 0,
2454 'channel_follower_count': int,
2455 'chapters': list,
2456 'uploader': 'SciShow',
2457 'uploader_url': 'https://www.youtube.com/@SciShow',
2458 'uploader_id': '@SciShow',
2459 'comment_count': int,
2460 'channel_is_verified': True,
2461 'heatmap': 'count:100',
2462 'timestamp': 1395685455,
2463 }, 'params': {'format': 'mhtml', 'skip_download': True},
2464 }, {
2465 # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
2466 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
2467 'info_dict': {
2468 'id': '2NUZ8W2llS4',
2469 'ext': 'mp4',
2470 'title': 'The NP that test your phone performance 🙂',
2471 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
2472 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
2473 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
2474 'duration': 21,
2475 'view_count': int,
2476 'age_limit': 0,
2477 'categories': ['Gaming'],
2478 'tags': 'count:23',
2479 'playable_in_embed': True,
2480 'live_status': 'not_live',
2481 'upload_date': '20220103',
2482 'like_count': int,
2483 'availability': 'public',
2484 'channel': 'Leon Nguyen',
2485 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
2486 'comment_count': int,
2487 'channel_follower_count': int,
2488 'uploader': 'Leon Nguyen',
2489 'uploader_url': 'https://www.youtube.com/@LeonNguyen',
2490 'uploader_id': '@LeonNguyen',
2491 'heatmap': 'count:100',
2492 'timestamp': 1641170939,
2494 }, {
2495 # date text is premiered video, ensure upload date in UTC (published 1641172509)
2496 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
2497 'info_dict': {
2498 'id': 'mzZzzBU6lrM',
2499 'ext': 'mp4',
2500 'title': 'I Met GeorgeNotFound In Real Life...',
2501 'description': 'md5:978296ec9783a031738b684d4ebf302d',
2502 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q',
2503 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q',
2504 'duration': 955,
2505 'view_count': int,
2506 'age_limit': 0,
2507 'categories': ['Entertainment'],
2508 'tags': 'count:26',
2509 'playable_in_embed': True,
2510 'live_status': 'not_live',
2511 'release_timestamp': 1641172509,
2512 'release_date': '20220103',
2513 'upload_date': '20220103',
2514 'like_count': int,
2515 'availability': 'public',
2516 'channel': 'Quackity',
2517 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg',
2518 'channel_follower_count': int,
2519 'uploader': 'Quackity',
2520 'uploader_id': '@Quackity',
2521 'uploader_url': 'https://www.youtube.com/@Quackity',
2522 'comment_count': int,
2523 'channel_is_verified': True,
2524 'heatmap': 'count:100',
2525 'timestamp': 1641172509,
2528 { # continuous livestream.
2529 # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00
2530 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk',
2531 'info_dict': {
2532 'id': 'jfKfPfyJRdk',
2533 'ext': 'mp4',
2534 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow',
2535 'like_count': int,
2536 'uploader': 'Lofi Girl',
2537 'categories': ['Music'],
2538 'concurrent_view_count': int,
2539 'playable_in_embed': True,
2540 'timestamp': 1657627949,
2541 'release_date': '20220712',
2542 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow',
2543 'description': 'md5:13a6f76df898f5674f9127139f3df6f7',
2544 'age_limit': 0,
2545 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg',
2546 'release_timestamp': 1657641570,
2547 'uploader_url': 'https://www.youtube.com/@LofiGirl',
2548 'channel_follower_count': int,
2549 'channel_is_verified': True,
2550 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to',
2551 'view_count': int,
2552 'live_status': 'is_live',
2553 'tags': 'count:32',
2554 'channel': 'Lofi Girl',
2555 'availability': 'public',
2556 'upload_date': '20220712',
2557 'uploader_id': '@LofiGirl',
2559 'params': {'skip_download': True},
2560 }, {
2561 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
2562 'info_dict': {
2563 'id': 'tjjjtzRLHvA',
2564 'ext': 'mp4',
2565 'title': 'ハッシュタグ無し };if window.ytcsi',
2566 'upload_date': '20220323',
2567 'like_count': int,
2568 'availability': 'unlisted',
2569 'channel': 'Lesmiscore',
2570 'thumbnail': r're:^https?://.*\.jpg',
2571 'age_limit': 0,
2572 'categories': ['Music'],
2573 'view_count': int,
2574 'description': '',
2575 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
2576 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
2577 'live_status': 'not_live',
2578 'playable_in_embed': True,
2579 'channel_follower_count': int,
2580 'duration': 6,
2581 'tags': [],
2582 'uploader_id': '@lesmiscore',
2583 'uploader': 'Lesmiscore',
2584 'uploader_url': 'https://www.youtube.com/@lesmiscore',
2585 'timestamp': 1648005313,
2587 }, {
2588 # Prefer primary title+description language metadata by default
2589 # Do not prefer translated description if primary is empty
2590 'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
2591 'info_dict': {
2592 'id': 'el3E4MbxRqQ',
2593 'ext': 'mp4',
2594 'title': 'dlp test video 2 - primary sv no desc',
2595 'description': '',
2596 'channel': 'cole-dlp-test-acc',
2597 'tags': [],
2598 'view_count': int,
2599 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2600 'like_count': int,
2601 'playable_in_embed': True,
2602 'availability': 'unlisted',
2603 'thumbnail': r're:^https?://.*\.jpg',
2604 'age_limit': 0,
2605 'duration': 5,
2606 'live_status': 'not_live',
2607 'upload_date': '20220908',
2608 'categories': ['People & Blogs'],
2609 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2610 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2611 'uploader_id': '@coletdjnz',
2612 'uploader': 'cole-dlp-test-acc',
2613 'timestamp': 1662677394,
2615 'params': {'skip_download': True},
2616 }, {
2617 # Extractor argument: prefer translated title+description
2618 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
2619 'info_dict': {
2620 'id': 'gHKT4uU8Zng',
2621 'ext': 'mp4',
2622 'channel': 'cole-dlp-test-acc',
2623 'tags': [],
2624 'duration': 5,
2625 'live_status': 'not_live',
2626 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
2627 'upload_date': '20220729',
2628 'view_count': int,
2629 'categories': ['People & Blogs'],
2630 'thumbnail': r're:^https?://.*\.jpg',
2631 'title': 'dlp test video title translated (fr)',
2632 'availability': 'public',
2633 'age_limit': 0,
2634 'description': 'dlp test video description translated (fr)',
2635 'playable_in_embed': True,
2636 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
2637 'uploader_url': 'https://www.youtube.com/@coletdjnz',
2638 'uploader_id': '@coletdjnz',
2639 'uploader': 'cole-dlp-test-acc',
2640 'timestamp': 1659073275,
2641 'like_count': int,
2643 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
2644 'expected_warnings': [r'Preferring "fr" translated fields'],
2645 }, {
2646 'note': '6 channel audio',
2647 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
2648 'only_matching': True,
2649 }, {
2650 'note': 'Multiple HLS formats with same itag',
2651 'url': 'https://www.youtube.com/watch?v=kX3nB4PpJko',
2652 'info_dict': {
2653 'id': 'kX3nB4PpJko',
2654 'ext': 'mp4',
2655 'categories': ['Entertainment'],
2656 'description': 'md5:e8031ff6e426cdb6a77670c9b81f6fa6',
2657 'live_status': 'not_live',
2658 'duration': 937,
2659 'channel_follower_count': int,
2660 'thumbnail': 'https://i.ytimg.com/vi_webp/kX3nB4PpJko/maxresdefault.webp',
2661 'title': 'Last To Take Hand Off Jet, Keeps It!',
2662 'channel': 'MrBeast',
2663 'playable_in_embed': True,
2664 'view_count': int,
2665 'upload_date': '20221112',
2666 'channel_url': 'https://www.youtube.com/channel/UCX6OQ3DkcsbYNE6H8uQQuVA',
2667 'age_limit': 0,
2668 'availability': 'public',
2669 'channel_id': 'UCX6OQ3DkcsbYNE6H8uQQuVA',
2670 'like_count': int,
2671 'tags': [],
2672 'uploader': 'MrBeast',
2673 'uploader_url': 'https://www.youtube.com/@MrBeast',
2674 'uploader_id': '@MrBeast',
2675 'comment_count': int,
2676 'channel_is_verified': True,
2677 'heatmap': 'count:100',
2679 'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
2680 }, {
2681 'note': 'Audio formats with Dynamic Range Compression',
2682 'url': 'https://www.youtube.com/watch?v=Tq92D6wQ1mg',
2683 'info_dict': {
2684 'id': 'Tq92D6wQ1mg',
2685 'ext': 'webm',
2686 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
2687 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ',
2688 'channel_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
2689 'channel_follower_count': int,
2690 'description': 'md5:17eccca93a786d51bc67646756894066',
2691 'upload_date': '20191228',
2692 'tags': ['mmd', 'dance', 'mikumikudance', 'kpop', 'vtuber'],
2693 'playable_in_embed': True,
2694 'like_count': int,
2695 'categories': ['Entertainment'],
2696 'thumbnail': 'https://i.ytimg.com/vi/Tq92D6wQ1mg/sddefault.jpg',
2697 'age_limit': 18,
2698 'channel': 'Projekt Melody',
2699 'view_count': int,
2700 'availability': 'needs_auth',
2701 'comment_count': int,
2702 'live_status': 'not_live',
2703 'duration': 106,
2704 'uploader': 'Projekt Melody',
2705 'uploader_id': '@ProjektMelody',
2706 'uploader_url': 'https://www.youtube.com/@ProjektMelody',
2707 'timestamp': 1577508724,
2709 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'},
2712 'url': 'https://www.youtube.com/live/qVv6vCqciTM',
2713 'info_dict': {
2714 'id': 'qVv6vCqciTM',
2715 'ext': 'mp4',
2716 'age_limit': 0,
2717 'comment_count': int,
2718 'chapters': 'count:13',
2719 'upload_date': '20221223',
2720 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
2721 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
2722 'like_count': int,
2723 'release_date': '20221223',
2724 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
2725 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
2726 'view_count': int,
2727 'playable_in_embed': True,
2728 'duration': 4438,
2729 'availability': 'public',
2730 'channel_follower_count': int,
2731 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
2732 'categories': ['Entertainment'],
2733 'live_status': 'was_live',
2734 'release_timestamp': 1671793345,
2735 'channel': 'さなちゃんねる',
2736 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
2737 'uploader': 'さなちゃんねる',
2738 'uploader_url': 'https://www.youtube.com/@sana_natori',
2739 'uploader_id': '@sana_natori',
2740 'channel_is_verified': True,
2741 'heatmap': 'count:100',
2742 'timestamp': 1671798112,
2746 # Fallbacks when webpage and web client is unavailable
2747 'url': 'https://www.youtube.com/watch?v=wSSmNUl9Snw',
2748 'info_dict': {
2749 'id': 'wSSmNUl9Snw',
2750 'ext': 'mp4',
2751 # 'categories': ['Science & Technology'],
2752 'view_count': int,
2753 'chapters': 'count:2',
2754 'channel': 'Scott Manley',
2755 'like_count': int,
2756 'age_limit': 0,
2757 # 'availability': 'public',
2758 'channel_follower_count': int,
2759 'live_status': 'not_live',
2760 'upload_date': '20170831',
2761 'duration': 682,
2762 'tags': 'count:8',
2763 'uploader_url': 'https://www.youtube.com/@scottmanley',
2764 'description': 'md5:f4bed7b200404b72a394c2f97b782c02',
2765 'uploader': 'Scott Manley',
2766 'uploader_id': '@scottmanley',
2767 'title': 'The Computer Hack That Saved Apollo 14',
2768 'channel_id': 'UCxzC4EngIsMrPmbm6Nxvb-A',
2769 'thumbnail': r're:^https?://.*\.webp',
2770 'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
2771 'playable_in_embed': True,
2772 'comment_count': int,
2773 'channel_is_verified': True,
2774 'heatmap': 'count:100',
2776 'params': {
2777 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
2782 _WEBPAGE_TESTS = [
2783 # YouTube <object> embed
2785 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
2786 'md5': '873c81d308b979f0e23ee7e620b312a3',
2787 'info_dict': {
2788 'id': 'msN87y-iEx0',
2789 'ext': 'mp4',
2790 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
2791 'upload_date': '20080526',
2792 'description': 'md5:873c81d308b979f0e23ee7e620b312a3',
2793 'age_limit': 0,
2794 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'],
2795 'channel_id': 'UCCeo--lls1vna5YJABWAcVA',
2796 'playable_in_embed': True,
2797 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg',
2798 'like_count': int,
2799 'comment_count': int,
2800 'channel': 'Christopher Sykes',
2801 'live_status': 'not_live',
2802 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA',
2803 'availability': 'public',
2804 'duration': 195,
2805 'view_count': int,
2806 'categories': ['Science & Technology'],
2807 'channel_follower_count': int,
2808 'uploader': 'Christopher Sykes',
2809 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
2810 'uploader_id': '@ChristopherSykesDocumentaries',
2811 'heatmap': 'count:100',
2812 'timestamp': 1211825920,
2814 'params': {
2815 'skip_download': True,
2820 @classmethod
2821 def suitable(cls, url):
2822 from ..utils import parse_qs
2824 qs = parse_qs(url)
2825 if qs.get('list', [None])[0]:
2826 return False
2827 return super().suitable(url)
2829 def __init__(self, *args, **kwargs):
2830 super().__init__(*args, **kwargs)
2831 self._code_cache = {}
2832 self._player_cache = {}
2834 def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
2835 lock = threading.Lock()
2836 start_time = time.time()
2837 formats = [f for f in formats if f.get('is_from_start')]
2839 def refetch_manifest(format_id, delay):
2840 nonlocal formats, start_time, is_live
2841 if time.time() <= start_time + delay:
2842 return
2844 _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
2845 video_details = traverse_obj(prs, (..., 'videoDetails'), expected_type=dict)
2846 microformats = traverse_obj(
2847 prs, (..., 'microformat', 'playerMicroformatRenderer'),
2848 expected_type=dict)
2849 _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
2850 is_live = live_status == 'is_live'
2851 start_time = time.time()
2853 def mpd_feed(format_id, delay):
2855 @returns (manifest_url, manifest_stream_number, is_live) or None
2857 for retry in self.RetryManager(fatal=False):
2858 with lock:
2859 refetch_manifest(format_id, delay)
2861 f = next((f for f in formats if f['format_id'] == format_id), None)
2862 if not f:
2863 if not is_live:
2864 retry.error = f'{video_id}: Video is no longer live'
2865 else:
2866 retry.error = f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}'
2867 continue
2868 return f['manifest_url'], f['manifest_stream_number'], is_live
2869 return None
2871 for f in formats:
2872 f['is_live'] = is_live
2873 gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
2874 live_start_time, mpd_feed, not is_live and f.copy())
2875 if is_live:
2876 f['fragments'] = gen
2877 f['protocol'] = 'http_dash_segments_generator'
2878 else:
2879 f['fragments'] = LazyList(gen({}))
2880 del f['is_from_start']
2882 def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
2883 FETCH_SPAN, MAX_DURATION = 5, 432000
2885 mpd_url, stream_number, is_live = None, None, True
2887 begin_index = 0
2888 download_start_time = ctx.get('start') or time.time()
2890 lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION
2891 if lack_early_segments:
2892 self.report_warning(bug_reports_message(
2893 'Starting download from the last 120 hours of the live stream since '
2894 'YouTube does not have data before that. If you think this is wrong,'), only_once=True)
2895 lack_early_segments = True
2897 known_idx, no_fragment_score, last_segment_url = begin_index, 0, None
2898 fragments, fragment_base_url = None, None
2900 def _extract_sequence_from_mpd(refresh_sequence, immediate):
2901 nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
2902 # Obtain from MPD's maximum seq value
2903 old_mpd_url = mpd_url
2904 last_error = ctx.pop('last_error', None)
2905 expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
2906 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
2907 or (mpd_url, stream_number, False))
2908 if not refresh_sequence:
2909 if expire_fast and not is_live:
2910 return False, last_seq
2911 elif old_mpd_url == mpd_url:
2912 return True, last_seq
2913 if manifestless_orig_fmt:
2914 fmt_info = manifestless_orig_fmt
2915 else:
2916 try:
2917 fmts, _ = self._extract_mpd_formats_and_subtitles(
2918 mpd_url, None, note=False, errnote=False, fatal=False)
2919 except ExtractorError:
2920 fmts = None
2921 if not fmts:
2922 no_fragment_score += 2
2923 return False, last_seq
2924 fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
2925 fragments = fmt_info['fragments']
2926 fragment_base_url = fmt_info['fragment_base_url']
2927 assert fragment_base_url
2929 _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
2930 return True, _last_seq
2932 self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
2933 while is_live:
2934 fetch_time = time.time()
2935 if no_fragment_score > 30:
2936 return
2937 if last_segment_url:
2938 # Obtain from "X-Head-Seqnum" header value from each segment
2939 try:
2940 urlh = self._request_webpage(
2941 last_segment_url, None, note=False, errnote=False, fatal=False)
2942 except ExtractorError:
2943 urlh = None
2944 last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum']))
2945 if last_seq is None:
2946 no_fragment_score += 2
2947 last_segment_url = None
2948 continue
2949 else:
2950 should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15)
2951 no_fragment_score += 2
2952 if not should_continue:
2953 continue
2955 if known_idx > last_seq:
2956 last_segment_url = None
2957 continue
2959 last_seq += 1
2961 if begin_index < 0 and known_idx < 0:
2962 # skip from the start when it's negative value
2963 known_idx = last_seq + begin_index
2964 if lack_early_segments:
2965 known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration']))
2966 try:
2967 for idx in range(known_idx, last_seq):
2968 # do not update sequence here or you'll get skipped some part of it
2969 should_continue, _ = _extract_sequence_from_mpd(False, False)
2970 if not should_continue:
2971 known_idx = idx - 1
2972 raise ExtractorError('breaking out of outer loop')
2973 last_segment_url = urljoin(fragment_base_url, f'sq/{idx}')
2974 yield {
2975 'url': last_segment_url,
2976 'fragment_count': last_seq,
2978 if known_idx == last_seq:
2979 no_fragment_score += 5
2980 else:
2981 no_fragment_score = 0
2982 known_idx = last_seq
2983 except ExtractorError:
2984 continue
2986 if manifestless_orig_fmt:
2987 # Stop at the first iteration if running for post-live manifestless;
2988 # fragment count no longer increase since it starts
2989 break
2991 time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
2993 def _extract_player_url(self, *ytcfgs, webpage=None):
2994 player_url = traverse_obj(
2995 ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
2996 get_all=False, expected_type=str)
2997 if not player_url:
2998 return
2999 return urljoin('https://www.youtube.com', player_url)
3001 def _download_player_url(self, video_id, fatal=False):
3002 res = self._download_webpage(
3003 'https://www.youtube.com/iframe_api',
3004 note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
3005 if res:
3006 player_version = self._search_regex(
3007 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
3008 if player_version:
3009 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
3011 def _signature_cache_id(self, example_sig):
3012 """ Return a string representation of a signature """
3013 return '.'.join(str(len(part)) for part in example_sig.split('.'))
3015 @classmethod
3016 def _extract_player_info(cls, player_url):
3017 for player_re in cls._PLAYER_INFO_RE:
3018 id_m = re.search(player_re, player_url)
3019 if id_m:
3020 break
3021 else:
3022 raise ExtractorError(f'Cannot identify player {player_url!r}')
3023 return id_m.group('id')
3025 def _load_player(self, video_id, player_url, fatal=True):
3026 player_id = self._extract_player_info(player_url)
3027 if player_id not in self._code_cache:
3028 code = self._download_webpage(
3029 player_url, video_id, fatal=fatal,
3030 note='Downloading player ' + player_id,
3031 errnote=f'Download of {player_url} failed')
3032 if code:
3033 self._code_cache[player_id] = code
3034 return self._code_cache.get(player_id)
3036 def _extract_signature_function(self, video_id, player_url, example_sig):
3037 player_id = self._extract_player_info(player_url)
3039 # Read from filesystem cache
3040 func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
3041 assert os.path.basename(func_id) == func_id
3043 self.write_debug(f'Extracting signature function {func_id}')
3044 cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
3046 if not cache_spec:
3047 code = self._load_player(video_id, player_url)
3048 if code:
3049 res = self._parse_sig_js(code)
3050 test_string = ''.join(map(chr, range(len(example_sig))))
3051 cache_spec = [ord(c) for c in res(test_string)]
3052 self.cache.store('youtube-sigfuncs', func_id, cache_spec)
3054 return lambda s: ''.join(s[i] for i in cache_spec)
3056 def _print_sig_code(self, func, example_sig):
3057 if not self.get_param('youtube_print_sig_code'):
3058 return
3060 def gen_sig_code(idxs):
3061 def _genslice(start, end, step):
3062 starts = '' if start == 0 else str(start)
3063 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
3064 steps = '' if step == 1 else (':%d' % step)
3065 return f's[{starts}{ends}{steps}]'
3067 step = None
3068 # Quelch pyflakes warnings - start will be set when step is set
3069 start = '(Never used)'
3070 for i, prev in zip(idxs[1:], idxs[:-1]):
3071 if step is not None:
3072 if i - prev == step:
3073 continue
3074 yield _genslice(start, prev, step)
3075 step = None
3076 continue
3077 if i - prev in [-1, 1]:
3078 step = i - prev
3079 start = prev
3080 continue
3081 else:
3082 yield 's[%d]' % prev
3083 if step is None:
3084 yield 's[%d]' % i
3085 else:
3086 yield _genslice(start, i, step)
3088 test_string = ''.join(map(chr, range(len(example_sig))))
3089 cache_res = func(test_string)
3090 cache_spec = [ord(c) for c in cache_res]
3091 expr_code = ' + '.join(gen_sig_code(cache_spec))
3092 signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.')))
3093 code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n'
3094 f' return {expr_code}\n')
3095 self.to_screen('Extracted signature function:\n' + code)
3097 def _parse_sig_js(self, jscode):
3098 funcname = self._search_regex(
3099 (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3100 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3101 r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
3102 r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
3103 r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?',
3104 r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
3105 # Obsolete patterns
3106 r'("|\')signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3107 r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
3108 r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3109 r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3110 r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
3111 r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
3112 jscode, 'Initial JS player signature function name', group='sig')
3114 jsi = JSInterpreter(jscode)
3115 initial_function = jsi.extract_function(funcname)
3116 return lambda s: initial_function([s])
3118 def _cached(self, func, *cache_id):
3119 def inner(*args, **kwargs):
3120 if cache_id not in self._player_cache:
3121 try:
3122 self._player_cache[cache_id] = func(*args, **kwargs)
3123 except ExtractorError as e:
3124 self._player_cache[cache_id] = e
3125 except Exception as e:
3126 self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
3128 ret = self._player_cache[cache_id]
3129 if isinstance(ret, Exception):
3130 raise ret
3131 return ret
3132 return inner
3134 def _decrypt_signature(self, s, video_id, player_url):
3135 """Turn the encrypted s field into a working signature"""
3136 extract_sig = self._cached(
3137 self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
3138 func = extract_sig(video_id, player_url, s)
3139 self._print_sig_code(func, s)
3140 return func(s)
3142 def _decrypt_nsig(self, s, video_id, player_url):
3143 """Turn the encrypted n field into a working signature"""
3144 if player_url is None:
3145 raise ExtractorError('Cannot decrypt nsig without player_url')
3146 player_url = urljoin('https://www.youtube.com', player_url)
3148 try:
3149 jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
3150 except ExtractorError as e:
3151 raise ExtractorError('Unable to extract nsig function code', cause=e)
3152 if self.get_param('youtube_print_sig_code'):
3153 self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
3155 try:
3156 extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
3157 ret = extract_nsig(jsi, func_code)(s)
3158 except JSInterpreter.Exception as e:
3159 try:
3160 jsi = PhantomJSwrapper(self, timeout=5000)
3161 except ExtractorError:
3162 raise e
3163 self.report_warning(
3164 f'Native nsig extraction failed: Trying with PhantomJS\n'
3165 f' n = {s} ; player = {player_url}', video_id)
3166 self.write_debug(e, only_once=True)
3168 args, func_body = func_code
3169 ret = jsi.execute(
3170 f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
3171 video_id=video_id, note='Executing signature code').strip()
3173 self.write_debug(f'Decrypted nsig {s} => {ret}')
3174 return ret
3176 def _extract_n_function_name(self, jscode):
3177 # Examples (with placeholders nfunc, narray, idx):
3178 # * .get("n"))&&(b=nfunc(b)
3179 # * .get("n"))&&(b=narray[idx](b)
3180 # * b=String.fromCharCode(110),c=a.get(b))&&c=narray[idx](c)
3181 # * a.D&&(b="nn"[+a.D],c=a.get(b))&&(c=narray[idx](c),a.set(b,c),narray.length||nfunc("")
3182 # * a.D&&(PL(a),b=a.j.n||null)&&(b=narray[0](b),a.set("n",b),narray.length||nfunc("")
3183 funcname, idx = self._search_regex(
3184 r'''(?x)
3186 \.get\("n"\)\)&&\(b=|
3188 b=String\.fromCharCode\(110\)|
3189 (?P<str_idx>[a-zA-Z0-9_$.]+)&&\(b="nn"\[\+(?P=str_idx)\]
3190 ),c=a\.get\(b\)\)&&\(c=|
3191 \b(?P<var>[a-zA-Z0-9_$]+)=
3192 )(?P<nfunc>[a-zA-Z0-9_$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z]\)
3193 (?(var),[a-zA-Z0-9_$]+\.set\("n"\,(?P=var)\),(?P=nfunc)\.length)''',
3194 jscode, 'n function name', group=('nfunc', 'idx'), default=(None, None))
3195 if not funcname:
3196 self.report_warning('Falling back to generic n function search')
3197 return self._search_regex(
3198 r'''(?xs)
3199 ;\s*(?P<name>[a-zA-Z0-9_$]+)\s*=\s*function\([a-zA-Z0-9_$]+\)
3200 \s*\{(?:(?!};).)+?["']enhanced_except_''',
3201 jscode, 'Initial JS player n function name', group='name')
3202 elif not idx:
3203 return funcname
3205 return json.loads(js_to_json(self._search_regex(
3206 rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
3207 f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
3209 def _extract_n_function_code(self, video_id, player_url):
3210 player_id = self._extract_player_info(player_url)
3211 func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
3212 jscode = func_code or self._load_player(video_id, player_url)
3213 jsi = JSInterpreter(jscode)
3215 if func_code:
3216 return jsi, player_id, func_code
3218 func_name = self._extract_n_function_name(jscode)
3220 func_code = jsi.extract_function_code(func_name)
3222 self.cache.store('youtube-nsig', player_id, func_code)
3223 return jsi, player_id, func_code
3225 def _extract_n_function_from_code(self, jsi, func_code):
3226 func = jsi.extract_function_from_code(*func_code)
3228 def extract_nsig(s):
3229 try:
3230 ret = func([s])
3231 except JSInterpreter.Exception:
3232 raise
3233 except Exception as e:
3234 raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
3236 if ret.startswith('enhanced_except_'):
3237 raise JSInterpreter.Exception('Signature function returned an exception')
3238 return ret
3240 return extract_nsig
3242 def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
3244 Extract signatureTimestamp (sts)
3245 Required to tell API what sig/player version is in use.
3247 sts = None
3248 if isinstance(ytcfg, dict):
3249 sts = int_or_none(ytcfg.get('STS'))
3251 if not sts:
3252 # Attempt to extract from player
3253 if player_url is None:
3254 error_msg = 'Cannot extract signature timestamp without player_url.'
3255 if fatal:
3256 raise ExtractorError(error_msg)
3257 self.report_warning(error_msg)
3258 return
3259 code = self._load_player(video_id, player_url, fatal=fatal)
3260 if code:
3261 sts = int_or_none(self._search_regex(
3262 r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
3263 'JS player signature timestamp', group='sts', fatal=fatal))
3264 return sts
3266 def _mark_watched(self, video_id, player_responses):
3267 for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
3268 label = 'fully ' if is_full else ''
3269 url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
3270 expected_type=url_or_none)
3271 if not url:
3272 self.report_warning(f'Unable to mark {label}watched')
3273 return
3274 parsed_url = urllib.parse.urlparse(url)
3275 qs = urllib.parse.parse_qs(parsed_url.query)
3277 # cpn generation algorithm is reverse engineered from base.js.
3278 # In fact it works even with dummy cpn.
3279 CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
3280 cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
3282 # # more consistent results setting it to right before the end
3283 video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
3285 qs.update({
3286 'ver': ['2'],
3287 'cpn': [cpn],
3288 'cmt': video_length,
3289 'el': 'detailpage', # otherwise defaults to "shorts"
3292 if is_full:
3293 # these seem to mark watchtime "history" in the real world
3294 # they're required, so send in a single value
3295 qs.update({
3296 'st': 0,
3297 'et': video_length,
3300 url = urllib.parse.urlunparse(
3301 parsed_url._replace(query=urllib.parse.urlencode(qs, True)))
3303 self._download_webpage(
3304 url, video_id, f'Marking {label}watched',
3305 'Unable to mark watched', fatal=False)
3307 @classmethod
3308 def _extract_from_webpage(cls, url, webpage):
3309 # Invidious Instances
3310 # https://github.com/yt-dlp/yt-dlp/issues/195
3311 # https://github.com/iv-org/invidious/pull/1730
3312 mobj = re.search(
3313 r'<link rel="alternate" href="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"',
3314 webpage)
3315 if mobj:
3316 yield cls.url_result(mobj.group('url'), cls)
3317 raise cls.StopExtraction
3319 yield from super()._extract_from_webpage(url, webpage)
3321 # lazyYT YouTube embed
3322 for id_ in re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage):
3323 yield cls.url_result(unescapeHTML(id_), cls, id_)
3325 # Wordpress "YouTube Video Importer" plugin
3326 for m in re.findall(r'''(?x)<div[^>]+
3327 class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
3328 data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage):
3329 yield cls.url_result(m[-1], cls, m[-1])
3331 @classmethod
3332 def extract_id(cls, url):
3333 video_id = cls.get_temp_id(url)
3334 if not video_id:
3335 raise ExtractorError(f'Invalid URL: {url}')
3336 return video_id
3338 def _extract_chapters_from_json(self, data, duration):
3339 chapter_list = traverse_obj(
3340 data, (
3341 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
3342 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters',
3343 ), expected_type=list)
3345 return self._extract_chapters_helper(
3346 chapter_list,
3347 start_function=lambda chapter: float_or_none(
3348 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
3349 title_function=lambda chapter: traverse_obj(
3350 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
3351 duration=duration)
3353 def _extract_chapters_from_engagement_panel(self, data, duration):
3354 content_list = traverse_obj(
3355 data,
3356 ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
3357 expected_type=list)
3358 chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
3359 chapter_title = lambda chapter: self._get_text(chapter, 'title')
3361 return next(filter(None, (
3362 self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
3363 chapter_time, chapter_title, duration)
3364 for contents in content_list)), [])
3366 def _extract_heatmap(self, data):
3367 return traverse_obj(data, (
3368 'frameworkUpdates', 'entityBatchUpdate', 'mutations',
3369 lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
3370 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
3371 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
3372 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
3373 'value': ('intensityScoreNormalized', {float_or_none}),
3374 })) or None
3376 def _extract_comment(self, entities, parent=None):
3377 comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict}))
3378 if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))):
3379 return
3381 toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict}))
3382 time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or ''
3384 return {
3385 'id': comment_id,
3386 'parent': parent or 'root',
3387 **traverse_obj(comment_entity_payload, {
3388 'text': ('properties', 'content', 'content', {str}),
3389 'like_count': ('toolbar', 'likeCountA11y', {parse_count}),
3390 'author_id': ('author', 'channelId', {self.ucid_or_none}),
3391 'author': ('author', 'displayName', {str}),
3392 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}),
3393 'author_is_uploader': ('author', 'isCreator', {bool}),
3394 'author_is_verified': ('author', 'isVerified', {bool}),
3395 'author_url': ('author', 'channelCommand', 'innertubeCommand', (
3396 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'),
3397 ), {lambda x: urljoin('https://www.youtube.com', x)}),
3398 }, get_all=False),
3399 'is_favorited': (None if toolbar_entity_payload is None else
3400 toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'),
3401 '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate.
3402 'timestamp': self._parse_time_text(time_text),
3405 def _extract_comment_old(self, comment_renderer, parent=None):
3406 comment_id = comment_renderer.get('commentId')
3407 if not comment_id:
3408 return
3410 info = {
3411 'id': comment_id,
3412 'text': self._get_text(comment_renderer, 'contentText'),
3413 'like_count': self._get_count(comment_renderer, 'voteCount'),
3414 'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
3415 'author': self._get_text(comment_renderer, 'authorText'),
3416 'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
3417 'parent': parent or 'root',
3420 # Timestamp is an estimate calculated from the current time and time_text
3421 time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
3422 timestamp = self._parse_time_text(time_text)
3424 info.update({
3425 # FIXME: non-standard, but we need a way of showing that it is an estimate.
3426 '_time_text': time_text,
3427 'timestamp': timestamp,
3430 info['author_url'] = urljoin(
3431 'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
3432 ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
3433 expected_type=str, get_all=False))
3435 author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
3436 if author_is_uploader is not None:
3437 info['author_is_uploader'] = author_is_uploader
3439 comment_abr = traverse_obj(
3440 comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
3441 if comment_abr is not None:
3442 info['is_favorited'] = 'creatorHeart' in comment_abr
3444 badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
3445 if self._has_badge(badges, BadgeType.VERIFIED):
3446 info['author_is_verified'] = True
3448 is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
3449 if is_pinned:
3450 info['is_pinned'] = True
3452 return info
3454 def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
3456 get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0]
3458 def extract_header(contents):
3459 _continuation = None
3460 for content in contents:
3461 comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer')
3462 expected_comment_count = self._get_count(
3463 comments_header_renderer, 'countText', 'commentsCount')
3465 if expected_comment_count is not None:
3466 tracker['est_total'] = expected_comment_count
3467 self.to_screen(f'Downloading ~{expected_comment_count} comments')
3468 comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top
3470 sort_menu_item = try_get(
3471 comments_header_renderer,
3472 lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
3473 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
3475 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
3476 if not _continuation:
3477 continue
3479 sort_text = str_or_none(sort_menu_item.get('title'))
3480 if not sort_text:
3481 sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
3482 self.to_screen(f'Sorting comments by {sort_text.lower()}')
3483 break
3484 return _continuation
3486 def extract_thread(contents, entity_payloads):
3487 if not parent:
3488 tracker['current_page_thread'] = 0
3489 for content in contents:
3490 if not parent and tracker['total_parent_comments'] >= max_parents:
3491 yield
3492 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
3494 # old comment format
3495 if not entity_payloads:
3496 comment_renderer = get_first(
3497 (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]],
3498 expected_type=dict, default={})
3500 comment = self._extract_comment_old(comment_renderer, parent)
3502 # new comment format
3503 else:
3504 view_model = (
3505 traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict}))
3506 or traverse_obj(content, ('commentViewModel', {dict})))
3507 comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str}))
3508 if not comment_keys:
3509 continue
3510 entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys)
3511 comment = self._extract_comment(entities, parent)
3512 if comment:
3513 comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None
3515 if not comment:
3516 continue
3517 comment_id = comment['id']
3519 if comment.get('is_pinned'):
3520 tracker['pinned_comment_ids'].add(comment_id)
3521 # Sometimes YouTube may break and give us infinite looping comments.
3522 # See: https://github.com/yt-dlp/yt-dlp/issues/6290
3523 if comment_id in tracker['seen_comment_ids']:
3524 if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
3525 # Pinned comments may appear a second time in newest first sort
3526 # See: https://github.com/yt-dlp/yt-dlp/issues/6712
3527 continue
3528 self.report_warning(
3529 'Detected YouTube comments looping. Stopping comment extraction '
3530 f'{"for this thread" if parent else ""} as we probably cannot get any more.')
3531 yield
3532 else:
3533 tracker['seen_comment_ids'].add(comment['id'])
3535 tracker['running_total'] += 1
3536 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
3537 yield comment
3539 # Attempt to get the replies
3540 comment_replies_renderer = try_get(
3541 comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
3543 if comment_replies_renderer:
3544 tracker['current_page_thread'] += 1
3545 comment_entries_iter = self._comment_entries(
3546 comment_replies_renderer, ytcfg, video_id,
3547 parent=comment.get('id'), tracker=tracker)
3548 yield from itertools.islice(comment_entries_iter, min(
3549 max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments'])))
3551 # Keeps track of counts across recursive calls
3552 if not tracker:
3553 tracker = {
3554 'running_total': 0,
3555 'est_total': None,
3556 'current_page_thread': 0,
3557 'total_parent_comments': 0,
3558 'total_reply_comments': 0,
3559 'seen_comment_ids': set(),
3560 'pinned_comment_ids': set(),
3563 # TODO: Deprecated
3564 # YouTube comments have a max depth of 2
3565 max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
3566 if max_depth:
3567 self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
3568 'Set max replies in the max-comments extractor argument instead')
3569 if max_depth == 1 and parent:
3570 return
3572 max_comments, max_parents, max_replies, max_replies_per_thread, *_ = (
3573 int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4)
3575 continuation = self._extract_continuation(root_continuation_data)
3577 response = None
3578 is_forced_continuation = False
3579 is_first_continuation = parent is None
3580 if is_first_continuation and not continuation:
3581 # Sometimes you can get comments by generating the continuation yourself,
3582 # even if YouTube initially reports them being disabled - e.g. stories comments.
3583 # Note: if the comment section is actually disabled, YouTube may return a response with
3584 # required check_get_keys missing. So we will disable that check initially in this case.
3585 continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
3586 is_forced_continuation = True
3588 continuation_items_path = (
3589 'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
3590 for page_num in itertools.count(0):
3591 if not continuation:
3592 break
3593 headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
3594 comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
3595 if page_num == 0:
3596 if is_first_continuation:
3597 note_prefix = 'Downloading comment section API JSON'
3598 else:
3599 note_prefix = ' Downloading comment API JSON reply thread %d %s' % (
3600 tracker['current_page_thread'], comment_prog_str)
3601 else:
3602 note_prefix = '{}Downloading comment{} API JSON page {} {}'.format(
3603 ' ' if parent else '', ' replies' if parent else '',
3604 page_num, comment_prog_str)
3606 # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
3607 # Ignore check if YouTube says the comment count is 0.
3608 check_get_keys = None
3609 if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
3610 check_get_keys = [[*continuation_items_path, ..., (
3611 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]]
3612 try:
3613 response = self._extract_response(
3614 item_id=None, query=continuation,
3615 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
3616 check_get_keys=check_get_keys)
3617 except ExtractorError as e:
3618 # Ignore incomplete data error for replies if retries didn't work.
3619 # This is to allow any other parent comments and comment threads to be downloaded.
3620 # See: https://github.com/yt-dlp/yt-dlp/issues/4669
3621 if 'incomplete data' in str(e).lower() and parent:
3622 if self.get_param('ignoreerrors') in (True, 'only_download'):
3623 self.report_warning(
3624 'Received incomplete data for a comment reply thread and retrying did not help. '
3625 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
3626 return
3627 else:
3628 raise ExtractorError(
3629 'Incomplete data received for comment reply thread. '
3630 'Pass --ignore-errors to ignore and allow rest of comments to download.',
3631 expected=True)
3632 raise
3633 is_forced_continuation = False
3634 continuation = None
3635 mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict}))
3636 for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
3637 if is_first_continuation:
3638 continuation = extract_header(continuation_items)
3639 is_first_continuation = False
3640 if continuation:
3641 break
3642 continue
3644 for entry in extract_thread(continuation_items, mutations):
3645 if not entry:
3646 return
3647 yield entry
3648 continuation = self._extract_continuation({'contents': continuation_items})
3649 if continuation:
3650 break
3652 message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
3653 if message and not parent and tracker['running_total'] == 0:
3654 self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True)
3655 raise self.CommentsDisabled
3657 @staticmethod
3658 def _generate_comment_continuation(video_id):
3660 Generates initial comment section continuation token from given video id
3662 token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section'
3663 return base64.b64encode(token.encode()).decode()
3665 def _get_comments(self, ytcfg, video_id, contents, webpage):
3666 """Entry for comment extraction"""
3667 def _real_comment_extract(contents):
3668 renderer = next((
3669 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
3670 if item.get('sectionIdentifier') == 'comment-item-section'), None)
3671 yield from self._comment_entries(renderer, ytcfg, video_id)
3673 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
3674 return itertools.islice(_real_comment_extract(contents), 0, max_comments)
3676 @staticmethod
3677 def _get_checkok_params():
3678 return {'contentCheckOk': True, 'racyCheckOk': True}
3680 @classmethod
3681 def _generate_player_context(cls, sts=None):
3682 context = {
3683 'html5Preference': 'HTML5_PREF_WANTS',
3685 if sts is not None:
3686 context['signatureTimestamp'] = sts
3687 return {
3688 'playbackContext': {
3689 'contentPlaybackContext': context,
3691 **cls._get_checkok_params(),
3694 @staticmethod
3695 def _is_agegated(player_response):
3696 if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
3697 return True
3699 reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')))
3700 AGE_GATE_REASONS = (
3701 'confirm your age', 'age-restricted', 'inappropriate', # reason
3702 'age_verification_required', 'age_check_required', # status
3704 return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
3706 @staticmethod
3707 def _is_unplayable(player_response):
3708 return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
3710 def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
3712 session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
3713 syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
3714 sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
3715 headers = self.generate_api_headers(
3716 ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
3718 yt_query = {
3719 'videoId': video_id,
3722 default_pp = traverse_obj(
3723 INNERTUBE_CLIENTS, (_split_innertube_client(client)[0], 'PLAYER_PARAMS', {str}))
3724 if player_params := self._configuration_arg('player_params', [default_pp], casesense=True)[0]:
3725 yt_query['params'] = player_params
3727 yt_query.update(self._generate_player_context(sts))
3728 return self._extract_response(
3729 item_id=video_id, ep='player', query=yt_query,
3730 ytcfg=player_ytcfg, headers=headers, fatal=True,
3731 default_client=client,
3732 note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()),
3733 ) or None
3735 def _get_requested_clients(self, url, smuggled_data):
3736 requested_clients = []
3737 broken_clients = []
3738 default = ['ios', 'web']
3739 allowed_clients = sorted(
3740 (client for client in INNERTUBE_CLIENTS if client[:1] != '_'),
3741 key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
3742 for client in self._configuration_arg('player_client'):
3743 if client == 'default':
3744 requested_clients.extend(default)
3745 elif client == 'all':
3746 requested_clients.extend(allowed_clients)
3747 elif client not in allowed_clients:
3748 self.report_warning(f'Skipping unsupported client {client}')
3749 elif client in self._BROKEN_CLIENTS.values():
3750 broken_clients.append(client)
3751 else:
3752 requested_clients.append(client)
3753 # Force deprioritization of _BROKEN_CLIENTS for format de-duplication
3754 requested_clients.extend(broken_clients)
3755 if not requested_clients:
3756 requested_clients = default
3758 if smuggled_data.get('is_music_url') or self.is_music_url(url):
3759 for requested_client in requested_clients:
3760 _, base_client, variant = _split_innertube_client(requested_client)
3761 music_client = f'{base_client}_music'
3762 if variant != 'music' and music_client in INNERTUBE_CLIENTS:
3763 requested_clients.append(music_client)
3765 return orderedSet(requested_clients)
3767 def _invalid_player_response(self, pr, video_id):
3768 # YouTube may return a different video player response than expected.
3769 # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
3770 if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id:
3771 return pr_id
3773 def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
3774 initial_pr = ignore_initial_response = None
3775 if webpage:
3776 if 'web' in clients:
3777 experiments = traverse_obj(master_ytcfg, (
3778 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {lambda x: x.split(',')}, ...))
3779 if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
3780 self.report_warning(
3781 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response')
3782 ignore_initial_response = True
3783 initial_pr = self._search_json(
3784 self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False)
3786 prs = []
3787 if initial_pr and not self._invalid_player_response(initial_pr, video_id):
3788 # Android player_response does not have microFormats which are needed for
3789 # extraction of some data. So we return the initial_pr with formats
3790 # stripped out even if not requested by the user
3791 # See: https://github.com/yt-dlp/yt-dlp/issues/501
3792 prs.append({**initial_pr, 'streamingData': None})
3794 all_clients = set(clients)
3795 clients = clients[::-1]
3797 def append_client(*client_names):
3798 """ Append the first client name that exists but not already used """
3799 for client_name in client_names:
3800 actual_client = _split_innertube_client(client_name)[0]
3801 if actual_client in INNERTUBE_CLIENTS:
3802 if actual_client not in all_clients:
3803 clients.append(client_name)
3804 all_clients.add(actual_client)
3805 return
3807 tried_iframe_fallback = False
3808 player_url = None
3809 skipped_clients = {}
3810 while clients:
3811 client, base_client, variant = _split_innertube_client(clients.pop())
3812 player_ytcfg = {}
3813 if client == 'web':
3814 player_ytcfg = self._get_default_ytcfg() if ignore_initial_response else master_ytcfg
3815 elif 'configs' not in self._configuration_arg('player_skip'):
3816 player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
3818 player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
3819 require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
3820 if 'js' in self._configuration_arg('player_skip'):
3821 require_js_player = False
3822 player_url = None
3824 if not player_url and not tried_iframe_fallback and require_js_player:
3825 player_url = self._download_player_url(video_id)
3826 tried_iframe_fallback = True
3828 pr = initial_pr if client == 'web' and not ignore_initial_response else None
3829 for retry in self.RetryManager(fatal=False):
3830 try:
3831 pr = pr or self._extract_player_response(
3832 client, video_id, player_ytcfg or master_ytcfg, player_ytcfg,
3833 player_url if require_js_player else None, initial_pr, smuggled_data)
3834 except ExtractorError as e:
3835 self.report_warning(e)
3836 break
3837 experiments = traverse_obj(pr, (
3838 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK',
3839 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ...))
3840 if all(x in experiments for x in self._POTOKEN_EXPERIMENTS):
3841 pr = None
3842 retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True)
3843 if not pr:
3844 continue
3846 if pr_id := self._invalid_player_response(pr, video_id):
3847 skipped_clients[client] = pr_id
3848 elif pr:
3849 # Save client name for introspection later
3850 name = short_client_name(client)
3851 sd = traverse_obj(pr, ('streamingData', {dict})) or {}
3852 sd[STREAMING_DATA_CLIENT_NAME] = name
3853 for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
3854 f[STREAMING_DATA_CLIENT_NAME] = name
3855 prs.append(pr)
3857 # tv_embedded can work around age-gate and age-verification IF the video is embeddable
3858 if self._is_agegated(pr) and variant != 'tv_embedded':
3859 append_client(f'tv_embedded.{base_client}')
3861 # Unauthenticated users will only get tv_embedded client formats if age-gated
3862 if self._is_agegated(pr) and not self.is_authenticated:
3863 self.to_screen(
3864 f'{video_id}: This video is age-restricted; some formats may be missing '
3865 f'without authentication. {self._login_hint()}', only_once=True)
3867 # EU countries require age-verification for accounts to access age-restricted videos
3868 # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients
3869 # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded
3870 embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr)
3871 if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled):
3872 self.to_screen(
3873 f'{video_id}: This video is age-restricted and YouTube is requiring '
3874 'account age-verification; some formats may be missing', only_once=True)
3875 # web_creator and mediaconnect can work around the age-verification requirement
3876 # _producer, _testsuite, & _vr variants can also work around age-verification
3877 append_client('web_creator', 'mediaconnect')
3879 if skipped_clients:
3880 self.report_warning(
3881 f'Skipping player responses from {"/".join(skipped_clients)} clients '
3882 f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")')
3883 if not prs:
3884 raise ExtractorError(
3885 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True)
3886 elif not prs:
3887 raise ExtractorError('Failed to extract any player response')
3888 return prs, player_url
3890 def _needs_live_processing(self, live_status, duration):
3891 if (live_status == 'is_live' and self.get_param('live_from_start')
3892 or live_status == 'post_live' and (duration or 0) > 2 * 3600):
3893 return live_status
3895 def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
3896 CHUNK_SIZE = 10 << 20
3897 PREFERRED_LANG_VALUE = 10
3898 original_language = None
3899 itags, stream_ids = collections.defaultdict(set), []
3900 itag_qualities, res_qualities = {}, {0: None}
3901 q = qualities([
3902 # Normally tiny is the smallest video-only formats. But
3903 # audio-only formats with unknown quality may get tagged as tiny
3904 'tiny',
3905 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats
3906 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres',
3908 streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
3909 format_types = self._configuration_arg('formats')
3910 all_formats = 'duplicate' in format_types
3911 if self._configuration_arg('include_duplicate_formats'):
3912 all_formats = True
3913 self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
3914 'Use formats=duplicate extractor argument instead')
3916 def build_fragments(f):
3917 return LazyList({
3918 'url': update_url_query(f['url'], {
3919 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}',
3921 } for range_start in range(0, f['filesize'], CHUNK_SIZE))
3923 for fmt in streaming_formats:
3924 if fmt.get('targetDurationSec'):
3925 continue
3927 itag = str_or_none(fmt.get('itag'))
3928 audio_track = fmt.get('audioTrack') or {}
3929 stream_id = (itag, audio_track.get('id'), fmt.get('isDrc'))
3930 if not all_formats:
3931 if stream_id in stream_ids:
3932 continue
3934 quality = fmt.get('quality')
3935 height = int_or_none(fmt.get('height'))
3936 if quality == 'tiny' or not quality:
3937 quality = fmt.get('audioQuality', '').lower() or quality
3938 # The 3gp format (17) in android client has a quality of "small",
3939 # but is actually worse than other formats
3940 if itag == '17':
3941 quality = 'tiny'
3942 if quality:
3943 if itag:
3944 itag_qualities[itag] = quality
3945 if height:
3946 res_qualities[height] = quality
3948 is_default = audio_track.get('audioIsDefault')
3949 is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
3950 language_code = audio_track.get('id', '').split('.')[0]
3951 if language_code and is_default:
3952 original_language = language_code
3954 # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
3955 # (adding `&sq=0` to the URL) and parsing emsg box to determine the
3956 # number of fragment that would subsequently requested with (`&sq=N`)
3957 if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
3958 continue
3960 fmt_url = fmt.get('url')
3961 if not fmt_url:
3962 sc = urllib.parse.parse_qs(fmt.get('signatureCipher'))
3963 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
3964 encrypted_sig = try_get(sc, lambda x: x['s'][0])
3965 if not all((sc, fmt_url, player_url, encrypted_sig)):
3966 continue
3967 try:
3968 fmt_url += '&{}={}'.format(
3969 traverse_obj(sc, ('sp', -1)) or 'signature',
3970 self._decrypt_signature(encrypted_sig, video_id, player_url),
3972 except ExtractorError as e:
3973 self.report_warning('Signature extraction failed: Some formats may be missing',
3974 video_id=video_id, only_once=True)
3975 self.write_debug(e, only_once=True)
3976 continue
3978 query = parse_qs(fmt_url)
3979 if query.get('n'):
3980 try:
3981 decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
3982 fmt_url = update_url_query(fmt_url, {
3983 'n': decrypt_nsig(query['n'][0], video_id, player_url),
3985 except ExtractorError as e:
3986 phantomjs_hint = ''
3987 if isinstance(e, JSInterpreter.Exception):
3988 phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
3989 f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
3990 if player_url:
3991 self.report_warning(
3992 f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
3993 f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
3994 self.write_debug(e, only_once=True)
3995 else:
3996 self.report_warning(
3997 'Cannot decrypt nsig without player_url: Some formats may be missing',
3998 video_id=video_id, only_once=True)
3999 continue
4001 tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
4002 format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
4003 # Some formats may have much smaller duration than others (possibly damaged during encoding)
4004 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
4005 # Make sure to avoid false positives with small duration differences.
4006 # E.g. __2ABJjxzNo, ySuUZEjARPY
4007 is_damaged = try_call(lambda: format_duration < duration // 2)
4008 if is_damaged:
4009 self.report_warning(
4010 f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
4012 client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
4013 # _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
4014 # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
4015 is_broken = client_name in self._BROKEN_CLIENTS
4016 if is_broken:
4017 self.report_warning(
4018 f'{video_id}: {self._BROKEN_CLIENTS[client_name]} client formats are broken '
4019 'and may yield HTTP Error 403. They will be deprioritized', only_once=True)
4021 name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
4022 fps = int_or_none(fmt.get('fps')) or 0
4023 dct = {
4024 'asr': int_or_none(fmt.get('audioSampleRate')),
4025 'filesize': int_or_none(fmt.get('contentLength')),
4026 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
4027 'format_note': join_nonempty(
4028 join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
4029 name, fmt.get('isDrc') and 'DRC',
4030 try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
4031 try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
4032 is_damaged and 'DAMAGED', is_broken and 'BROKEN',
4033 (self.get_param('verbose') or all_formats) and client_name,
4034 delim=', '),
4035 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
4036 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
4037 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
4038 'audio_channels': fmt.get('audioChannels'),
4039 'height': height,
4040 'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
4041 'has_drm': bool(fmt.get('drmFamilies')),
4042 'tbr': tbr,
4043 'filesize_approx': filesize_from_tbr(tbr, format_duration),
4044 'url': fmt_url,
4045 'width': int_or_none(fmt.get('width')),
4046 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
4047 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
4048 # Strictly de-prioritize broken, damaged and 3gp formats
4049 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
4051 mime_mobj = re.match(
4052 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
4053 if mime_mobj:
4054 dct['ext'] = mimetype2ext(mime_mobj.group(1))
4055 dct.update(parse_codecs(mime_mobj.group(2)))
4056 if itag:
4057 itags[itag].add(('https', dct.get('language')))
4058 stream_ids.append(stream_id)
4059 single_stream = 'none' in (dct.get('acodec'), dct.get('vcodec'))
4060 if single_stream and dct.get('ext'):
4061 dct['container'] = dct['ext'] + '_dash'
4063 if (all_formats or 'dashy' in format_types) and dct['filesize']:
4064 yield {
4065 **dct,
4066 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
4067 'protocol': 'http_dash_segments',
4068 'fragments': build_fragments(dct),
4070 if all_formats or 'dashy' not in format_types:
4071 dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
4072 yield dct
4074 needs_live_processing = self._needs_live_processing(live_status, duration)
4075 skip_bad_formats = 'incomplete' not in format_types
4076 if self._configuration_arg('include_incomplete_formats'):
4077 skip_bad_formats = False
4078 self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
4079 'Use formats=incomplete extractor argument instead')
4081 skip_manifests = set(self._configuration_arg('skip'))
4082 if (not self.get_param('youtube_include_hls_manifest', True)
4083 or needs_live_processing == 'is_live' # These will be filtered out by YoutubeDL anyway
4084 or needs_live_processing and skip_bad_formats):
4085 skip_manifests.add('hls')
4087 if not self.get_param('youtube_include_dash_manifest', True):
4088 skip_manifests.add('dash')
4089 if self._configuration_arg('include_live_dash'):
4090 self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
4091 'Use formats=incomplete extractor argument instead')
4092 elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
4093 skip_manifests.add('dash')
4095 def process_manifest_format(f, proto, client_name, itag):
4096 key = (proto, f.get('language'))
4097 if not all_formats and key in itags[itag]:
4098 return False
4099 itags[itag].add(key)
4101 if itag and all_formats:
4102 f['format_id'] = f'{itag}-{proto}'
4103 elif any(p != proto for p, _ in itags[itag]):
4104 f['format_id'] = f'{itag}-{proto}'
4105 elif itag:
4106 f['format_id'] = itag
4108 if original_language and f.get('language') == original_language:
4109 f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
4110 f['language_preference'] = PREFERRED_LANG_VALUE
4112 if f.get('source_preference') is None:
4113 f['source_preference'] = -1
4115 if itag in ('616', '235'):
4116 f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
4117 f['source_preference'] += 100
4119 f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
4120 if f['quality'] == -1 and f.get('height'):
4121 f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
4122 if self.get_param('verbose') or all_formats:
4123 f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
4124 if f.get('fps') and f['fps'] <= 1:
4125 del f['fps']
4127 if proto == 'hls' and f.get('has_drm'):
4128 f['has_drm'] = 'maybe'
4129 f['source_preference'] -= 5
4130 return True
4132 subtitles = {}
4133 for sd in streaming_data:
4134 client_name = sd.get(STREAMING_DATA_CLIENT_NAME)
4136 hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
4137 if hls_manifest_url:
4138 fmts, subs = self._extract_m3u8_formats_and_subtitles(
4139 hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
4140 subtitles = self._merge_subtitles(subs, subtitles)
4141 for f in fmts:
4142 if process_manifest_format(f, 'hls', client_name, self._search_regex(
4143 r'/itag/(\d+)', f['url'], 'itag', default=None)):
4144 yield f
4146 dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
4147 if dash_manifest_url:
4148 formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
4149 subtitles = self._merge_subtitles(subs, subtitles) # Prioritize HLS subs over DASH
4150 for f in formats:
4151 if process_manifest_format(f, 'dash', client_name, f['format_id']):
4152 f['filesize'] = int_or_none(self._search_regex(
4153 r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
4154 if needs_live_processing:
4155 f['is_from_start'] = True
4157 yield f
4158 yield subtitles
4160 def _extract_storyboard(self, player_responses, duration):
4161 spec = get_first(
4162 player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
4163 base_url = url_or_none(urljoin('https://i.ytimg.com/', spec.pop() or None))
4164 if not base_url:
4165 return
4166 L = len(spec) - 1
4167 for i, args in enumerate(spec):
4168 args = args.split('#')
4169 counts = list(map(int_or_none, args[:5]))
4170 if len(args) != 8 or not all(counts):
4171 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
4172 continue
4173 width, height, frame_count, cols, rows = counts
4174 N, sigh = args[6:]
4176 url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
4177 fragment_count = frame_count / (cols * rows)
4178 fragment_duration = duration / fragment_count
4179 yield {
4180 'format_id': f'sb{i}',
4181 'format_note': 'storyboard',
4182 'ext': 'mhtml',
4183 'protocol': 'mhtml',
4184 'acodec': 'none',
4185 'vcodec': 'none',
4186 'url': url,
4187 'width': width,
4188 'height': height,
4189 'fps': frame_count / duration,
4190 'rows': rows,
4191 'columns': cols,
4192 'fragments': [{
4193 'url': url.replace('$M', str(j)),
4194 'duration': min(fragment_duration, duration - (j * fragment_duration)),
4195 } for j in range(math.ceil(fragment_count))],
4198 def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
4199 webpage = None
4200 if 'webpage' not in self._configuration_arg('player_skip'):
4201 query = {'bpctr': '9999999999', 'has_verified': '1'}
4202 pp = self._configuration_arg('player_params', [None], casesense=True)[0]
4203 if pp:
4204 query['pp'] = pp
4205 webpage = self._download_webpage(
4206 webpage_url, video_id, fatal=False, query=query)
4208 master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
4210 player_responses, player_url = self._extract_player_responses(
4211 self._get_requested_clients(url, smuggled_data),
4212 video_id, webpage, master_ytcfg, smuggled_data)
4214 return webpage, master_ytcfg, player_responses, player_url
4216 def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
4217 live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
4218 is_live = get_first(video_details, 'isLive')
4219 if is_live is None:
4220 is_live = get_first(live_broadcast_details, 'isLiveNow')
4221 live_content = get_first(video_details, 'isLiveContent')
4222 is_upcoming = get_first(video_details, 'isUpcoming')
4223 post_live = get_first(video_details, 'isPostLiveDvr')
4224 live_status = ('post_live' if post_live
4225 else 'is_live' if is_live
4226 else 'is_upcoming' if is_upcoming
4227 else 'was_live' if live_content
4228 else 'not_live' if False in (is_live, live_content)
4229 else None)
4230 streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
4231 *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
4232 if all(f.get('has_drm') for f in formats):
4233 # If there are no formats that definitely don't have DRM, all have DRM
4234 for f in formats:
4235 f['has_drm'] = True
4237 return live_broadcast_details, live_status, streaming_data, formats, subtitles
4239 def _real_extract(self, url):
4240 url, smuggled_data = unsmuggle_url(url, {})
4241 video_id = self._match_id(url)
4243 base_url = self.http_scheme() + '//www.youtube.com/'
4244 webpage_url = base_url + 'watch?v=' + video_id
4246 webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
4248 playability_statuses = traverse_obj(
4249 player_responses, (..., 'playabilityStatus'), expected_type=dict)
4251 trailer_video_id = get_first(
4252 playability_statuses,
4253 ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
4254 expected_type=str)
4255 if trailer_video_id:
4256 return self.url_result(
4257 trailer_video_id, self.ie_key(), trailer_video_id)
4259 search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
4260 if webpage else (lambda x: None))
4262 video_details = traverse_obj(player_responses, (..., 'videoDetails'), expected_type=dict)
4263 microformats = traverse_obj(
4264 player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
4265 expected_type=dict)
4267 translated_title = self._get_text(microformats, (..., 'title'))
4268 video_title = (self._preferred_lang and translated_title
4269 or get_first(video_details, 'title') # primary
4270 or translated_title
4271 or search_meta(['og:title', 'twitter:title', 'title']))
4272 translated_description = self._get_text(microformats, (..., 'description'))
4273 original_description = get_first(video_details, 'shortDescription')
4274 video_description = (
4275 self._preferred_lang and translated_description
4276 # If original description is blank, it will be an empty string.
4277 # Do not prefer translated description in this case.
4278 or original_description if original_description is not None else translated_description)
4280 multifeed_metadata_list = get_first(
4281 player_responses,
4282 ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
4283 expected_type=str)
4284 if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
4285 if self.get_param('noplaylist'):
4286 self.to_screen(f'Downloading just video {video_id} because of --no-playlist')
4287 else:
4288 entries = []
4289 feed_ids = []
4290 for feed in multifeed_metadata_list.split(','):
4291 # Unquote should take place before split on comma (,) since textual
4292 # fields may contain comma as well (see
4293 # https://github.com/ytdl-org/youtube-dl/issues/8536)
4294 feed_data = urllib.parse.parse_qs(
4295 urllib.parse.unquote_plus(feed))
4297 def feed_entry(name):
4298 return try_get(
4299 feed_data, lambda x: x[name][0], str)
4301 feed_id = feed_entry('id')
4302 if not feed_id:
4303 continue
4304 feed_title = feed_entry('title')
4305 title = video_title
4306 if feed_title:
4307 title += f' ({feed_title})'
4308 entries.append({
4309 '_type': 'url_transparent',
4310 'ie_key': 'Youtube',
4311 'url': smuggle_url(
4312 '{}watch?v={}'.format(base_url, feed_data['id'][0]),
4313 {'force_singlefeed': True}),
4314 'title': title,
4316 feed_ids.append(feed_id)
4317 self.to_screen(
4318 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format(
4319 ', '.join(feed_ids), video_id))
4320 return self.playlist_result(
4321 entries, video_id, video_title, video_description)
4323 duration = (int_or_none(get_first(video_details, 'lengthSeconds'))
4324 or int_or_none(get_first(microformats, 'lengthSeconds'))
4325 or parse_duration(search_meta('duration')) or None)
4327 live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
4328 self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
4329 if live_status == 'post_live':
4330 self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
4332 if not formats:
4333 if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
4334 self.report_drm(video_id)
4335 pemr = get_first(
4336 playability_statuses,
4337 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
4338 reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
4339 subreason = clean_html(self._get_text(pemr, 'subreason') or '')
4340 if subreason:
4341 if subreason == 'The uploader has not made this video available in your country.':
4342 countries = get_first(microformats, 'availableCountries')
4343 if not countries:
4344 regions_allowed = search_meta('regionsAllowed')
4345 countries = regions_allowed.split(',') if regions_allowed else None
4346 self.raise_geo_restricted(subreason, countries, metadata_available=True)
4347 reason += f'. {subreason}'
4348 if reason:
4349 self.raise_no_formats(reason, expected=True)
4351 keywords = get_first(video_details, 'keywords', expected_type=list) or []
4352 if not keywords and webpage:
4353 keywords = [
4354 unescapeHTML(m.group('content'))
4355 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
4356 for keyword in keywords:
4357 if keyword.startswith('yt:stretch='):
4358 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
4359 if mobj:
4360 # NB: float is intentional for forcing float division
4361 w, h = (float(v) for v in mobj.groups())
4362 if w > 0 and h > 0:
4363 ratio = w / h
4364 for f in formats:
4365 if f.get('vcodec') != 'none':
4366 f['stretched_ratio'] = ratio
4367 break
4368 thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
4369 thumbnail_url = search_meta(['og:image', 'twitter:image'])
4370 if thumbnail_url:
4371 thumbnails.append({
4372 'url': thumbnail_url,
4374 original_thumbnails = thumbnails.copy()
4376 # The best resolution thumbnails sometimes does not appear in the webpage
4377 # See: https://github.com/yt-dlp/yt-dlp/issues/340
4378 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
4379 thumbnail_names = [
4380 # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants
4381 # in resolution, these are not the custom thumbnail. So de-prioritize them
4382 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default',
4383 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3',
4385 n_thumbnail_names = len(thumbnail_names)
4386 thumbnails.extend({
4387 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
4388 video_id=video_id, name=name, ext=ext,
4389 webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
4390 } for name in thumbnail_names for ext in ('webp', 'jpg'))
4391 for thumb in thumbnails:
4392 i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
4393 thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
4394 self._remove_duplicate_formats(thumbnails)
4395 self._downloader._sort_thumbnails(original_thumbnails)
4397 category = get_first(microformats, 'category') or search_meta('genre')
4398 channel_id = self.ucid_or_none(str_or_none(
4399 get_first(video_details, 'channelId')
4400 or get_first(microformats, 'externalChannelId')
4401 or search_meta('channelId')))
4402 owner_profile_url = get_first(microformats, 'ownerProfileUrl')
4404 live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
4405 live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
4406 if not duration and live_end_time and live_start_time:
4407 duration = live_end_time - live_start_time
4409 needs_live_processing = self._needs_live_processing(live_status, duration)
4411 def is_bad_format(fmt):
4412 if needs_live_processing and not fmt.get('is_from_start'):
4413 return True
4414 elif (live_status == 'is_live' and needs_live_processing != 'is_live'
4415 and fmt.get('protocol') == 'http_dash_segments'):
4416 return True
4418 for fmt in filter(is_bad_format, formats):
4419 fmt['preference'] = (fmt.get('preference') or -1) - 10
4420 fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
4422 if needs_live_processing:
4423 self._prepare_live_from_start_formats(
4424 formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
4426 formats.extend(self._extract_storyboard(player_responses, duration))
4428 channel_handle = self.handle_from_url(owner_profile_url)
4430 info = {
4431 'id': video_id,
4432 'title': video_title,
4433 'formats': formats,
4434 'thumbnails': thumbnails,
4435 # The best thumbnail that we are sure exists. Prevents unnecessary
4436 # URL checking if user don't care about getting the best possible thumbnail
4437 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
4438 'description': video_description,
4439 'channel_id': channel_id,
4440 'channel_url': format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None),
4441 'duration': duration,
4442 'view_count': int_or_none(
4443 get_first((video_details, microformats), (..., 'viewCount'))
4444 or search_meta('interactionCount')),
4445 'average_rating': float_or_none(get_first(video_details, 'averageRating')),
4446 'age_limit': 18 if (
4447 get_first(microformats, 'isFamilySafe') is False
4448 or search_meta('isFamilyFriendly') == 'false'
4449 or search_meta('og:restrictions:age') == '18+') else 0,
4450 'webpage_url': webpage_url,
4451 'categories': [category] if category else None,
4452 'tags': keywords,
4453 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
4454 'live_status': live_status,
4455 'release_timestamp': live_start_time,
4456 '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
4457 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
4460 subtitles = {}
4461 pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
4462 if pctr:
4463 def get_lang_code(track):
4464 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
4465 or track.get('languageCode'))
4467 # Converted into dicts to remove duplicates
4468 captions = {
4469 get_lang_code(sub): sub
4470 for sub in traverse_obj(pctr, (..., 'captionTracks', ...))}
4471 translation_languages = {
4472 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
4473 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...))}
4475 def process_language(container, base_url, lang_code, sub_name, query):
4476 lang_subs = container.setdefault(lang_code, [])
4477 for fmt in self._SUBTITLE_FORMATS:
4478 query.update({
4479 'fmt': fmt,
4481 lang_subs.append({
4482 'ext': fmt,
4483 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)),
4484 'name': sub_name,
4487 # NB: Constructing the full subtitle dictionary is slow
4488 get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
4489 self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
4490 for lang_code, caption_track in captions.items():
4491 base_url = caption_track.get('baseUrl')
4492 orig_lang = parse_qs(base_url).get('lang', [None])[-1]
4493 if not base_url:
4494 continue
4495 lang_name = self._get_text(caption_track, 'name', max_runs=1)
4496 if caption_track.get('kind') != 'asr':
4497 if not lang_code:
4498 continue
4499 process_language(
4500 subtitles, base_url, lang_code, lang_name, {})
4501 if not caption_track.get('isTranslatable'):
4502 continue
4503 for trans_code, trans_name in translation_languages.items():
4504 if not trans_code:
4505 continue
4506 orig_trans_code = trans_code
4507 if caption_track.get('kind') != 'asr' and trans_code != 'und':
4508 if not get_translated_subs:
4509 continue
4510 trans_code += f'-{lang_code}'
4511 trans_name += format_field(lang_name, None, ' from %s')
4512 if lang_code == f'a-{orig_trans_code}':
4513 # Set audio language based on original subtitles
4514 for f in formats:
4515 if f.get('acodec') != 'none' and not f.get('language'):
4516 f['language'] = orig_trans_code
4517 # Add an "-orig" label to the original language so that it can be distinguished.
4518 # The subs are returned without "-orig" as well for compatibility
4519 process_language(
4520 automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
4521 # Setting tlang=lang returns damaged subtitles.
4522 process_language(automatic_captions, base_url, trans_code, trans_name,
4523 {} if orig_lang == orig_trans_code else {'tlang': trans_code})
4525 info['automatic_captions'] = automatic_captions
4526 info['subtitles'] = subtitles
4528 parsed_url = urllib.parse.urlparse(url)
4529 for component in [parsed_url.fragment, parsed_url.query]:
4530 query = urllib.parse.parse_qs(component)
4531 for k, v in query.items():
4532 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
4533 d_k += '_time'
4534 if d_k not in info and k in s_ks:
4535 info[d_k] = parse_duration(v[0])
4537 # Youtube Music Auto-generated description
4538 if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
4539 # XXX: Causes catastrophic backtracking if description has "·"
4540 # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
4541 # Simulating atomic groups: (?P<a>[^xy]+)x => (?=(?P<a>[^xy]+))(?P=a)x
4542 # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
4543 mobj = re.search(
4544 r'''(?xs)
4545 (?=(?P<track>[^\n·]+))(?P=track)·
4546 (?=(?P<artist>[^\n]+))(?P=artist)\n+
4547 (?=(?P<album>[^\n]+))(?P=album)\n
4548 (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
4549 (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
4550 (.+?\nArtist\s*:\s*
4551 (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
4552 )?.+\nAuto-generated\ by\ YouTube\.\s*$
4553 ''', video_description)
4554 if mobj:
4555 release_year = mobj.group('release_year')
4556 release_date = mobj.group('release_date')
4557 if release_date:
4558 release_date = release_date.replace('-', '')
4559 if not release_year:
4560 release_year = release_date[:4]
4561 info.update({
4562 'album': mobj.group('album'.strip()),
4563 'artists': ([a] if (a := mobj.group('clean_artist'))
4564 else [a.strip() for a in mobj.group('artist').split('·')]),
4565 'track': mobj.group('track').strip(),
4566 'release_date': release_date,
4567 'release_year': int_or_none(release_year),
4570 initial_data = None
4571 if webpage:
4572 initial_data = self.extract_yt_initial_data(video_id, webpage, fatal=False)
4573 if not traverse_obj(initial_data, 'contents'):
4574 self.report_warning('Incomplete data received in embedded initial data; re-fetching using API.')
4575 initial_data = None
4576 if not initial_data:
4577 query = {'videoId': video_id}
4578 query.update(self._get_checkok_params())
4579 initial_data = self._extract_response(
4580 item_id=video_id, ep='next', fatal=False,
4581 ytcfg=master_ytcfg, query=query, check_get_keys='contents',
4582 headers=self.generate_api_headers(ytcfg=master_ytcfg),
4583 note='Downloading initial data API JSON')
4585 info['comment_count'] = traverse_obj(initial_data, (
4586 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer',
4587 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount',
4588 ), (
4589 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section',
4590 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo',
4591 ), expected_type=self._get_count, get_all=False)
4593 try: # This will error if there is no livechat
4594 initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
4595 except (KeyError, IndexError, TypeError):
4596 pass
4597 else:
4598 info.setdefault('subtitles', {})['live_chat'] = [{
4599 # url is needed to set cookies
4600 'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
4601 'video_id': video_id,
4602 'ext': 'json',
4603 'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
4604 else 'youtube_live_chat_replay'),
4607 if initial_data:
4608 info['chapters'] = (
4609 self._extract_chapters_from_json(initial_data, duration)
4610 or self._extract_chapters_from_engagement_panel(initial_data, duration)
4611 or self._extract_chapters_from_description(video_description, duration)
4612 or None)
4614 info['heatmap'] = self._extract_heatmap(initial_data)
4616 contents = traverse_obj(
4617 initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
4618 expected_type=list, default=[])
4620 vpir = get_first(contents, 'videoPrimaryInfoRenderer')
4621 if vpir:
4622 stl = vpir.get('superTitleLink')
4623 if stl:
4624 stl = self._get_text(stl)
4625 if try_get(
4626 vpir,
4627 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
4628 info['location'] = stl
4629 else:
4630 mobj = re.search(r'(.+?)\s*S(\d+)\s*•?\s*E(\d+)', stl)
4631 if mobj:
4632 info.update({
4633 'series': mobj.group(1),
4634 'season_number': int(mobj.group(2)),
4635 'episode_number': int(mobj.group(3)),
4637 for tlb in (try_get(
4638 vpir,
4639 lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
4640 list) or []):
4641 tbrs = variadic(
4642 traverse_obj(
4643 tlb, ('toggleButtonRenderer', ...),
4644 ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer')))
4645 for tbr in tbrs:
4646 for getter, regex in [(
4647 lambda x: x['defaultText']['accessibility']['accessibilityData'],
4648 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
4649 lambda x: x['accessibility'],
4650 lambda x: x['accessibilityData']['accessibilityData'],
4651 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
4652 label = (try_get(tbr, getter, dict) or {}).get('label')
4653 if label:
4654 mobj = re.match(regex, label)
4655 if mobj:
4656 info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
4657 break
4659 info['like_count'] = traverse_obj(vpir, (
4660 'videoActions', 'menuRenderer', 'topLevelButtons', ...,
4661 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel',
4662 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel',
4663 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False)
4665 vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
4666 if vcr:
4667 vc = self._get_count(vcr, 'viewCount')
4668 # Upcoming premieres with waiting count are treated as live here
4669 if vcr.get('isLive'):
4670 info['concurrent_view_count'] = vc
4671 elif info.get('view_count') is None:
4672 info['view_count'] = vc
4674 vsir = get_first(contents, 'videoSecondaryInfoRenderer')
4675 if vsir:
4676 vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
4677 info.update({
4678 'channel': self._get_text(vor, 'title'),
4679 'channel_follower_count': self._get_count(vor, 'subscriberCountText')})
4681 if not channel_handle:
4682 channel_handle = self.handle_from_url(
4683 traverse_obj(vor, (
4684 ('navigationEndpoint', ('title', 'runs', ..., 'navigationEndpoint')),
4685 (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl')),
4686 {str}), get_all=False))
4688 rows = try_get(
4689 vsir,
4690 lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
4691 list) or []
4692 multiple_songs = False
4693 for row in rows:
4694 if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
4695 multiple_songs = True
4696 break
4697 for row in rows:
4698 mrr = row.get('metadataRowRenderer') or {}
4699 mrr_title = mrr.get('title')
4700 if not mrr_title:
4701 continue
4702 mrr_title = self._get_text(mrr, 'title')
4703 mrr_contents_text = self._get_text(mrr, ('contents', 0))
4704 if mrr_title == 'License':
4705 info['license'] = mrr_contents_text
4706 elif not multiple_songs:
4707 if mrr_title == 'Album':
4708 info['album'] = mrr_contents_text
4709 elif mrr_title == 'Artist':
4710 info['artists'] = [mrr_contents_text] if mrr_contents_text else None
4711 elif mrr_title == 'Song':
4712 info['track'] = mrr_contents_text
4713 owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
4714 if self._has_badge(owner_badges, BadgeType.VERIFIED):
4715 info['channel_is_verified'] = True
4717 info.update({
4718 'uploader': info.get('channel'),
4719 'uploader_id': channel_handle,
4720 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4723 # We only want timestamp IF it has time precision AND a timezone
4724 # Currently the uploadDate in microformats appears to be in US/Pacific timezone.
4725 timestamp = (
4726 parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT)
4727 or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT)
4729 upload_date = (
4730 dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else
4732 unified_strdate(get_first(microformats, 'uploadDate'))
4733 or unified_strdate(search_meta('uploadDate'))
4736 # In the case we cannot get the timestamp:
4737 # The upload date for scheduled, live and past live streams / premieres in microformats
4738 # may be different from the stream date. Although not in UTC, we will prefer it in this case.
4739 # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139
4740 if not upload_date or (not timestamp and live_status in ('not_live', None)):
4741 # this should be in UTC, as configured in the cookie/client context
4742 upload_date = strftime_or_none(
4743 self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
4745 info['upload_date'] = upload_date
4746 info['timestamp'] = timestamp
4748 if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
4749 # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
4750 upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc)
4751 if upload_datetime >= datetime_from_str('today-2days'):
4752 for fmt in info['formats']:
4753 if fmt.get('protocol') == 'm3u8_native':
4754 fmt['__needs_testing'] = True
4756 for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]:
4757 v = info.get(s_k)
4758 if v:
4759 info[d_k] = v
4761 badges = self._extract_badges(traverse_obj(vpir, 'badges'))
4763 is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
4764 or get_first(video_details, 'isPrivate', expected_type=bool))
4766 info['availability'] = (
4767 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
4768 else self._availability(
4769 is_private=is_private,
4770 needs_premium=(
4771 self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
4772 or False if initial_data and is_private is not None else None),
4773 needs_subscription=(
4774 self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
4775 or False if initial_data and is_private is not None else None),
4776 needs_auth=info['age_limit'] >= 18,
4777 is_unlisted=None if is_private is None else (
4778 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
4779 or get_first(microformats, 'isUnlisted', expected_type=bool))))
4781 info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
4783 self.mark_watched(video_id, player_responses)
4785 return info
4788 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
4789 @staticmethod
4790 def passthrough_smuggled_data(func):
4791 def _smuggle(info, smuggled_data):
4792 if info.get('_type') not in ('url', 'url_transparent'):
4793 return info
4794 if smuggled_data.get('is_music_url'):
4795 parsed_url = urllib.parse.urlparse(info['url'])
4796 if parsed_url.netloc in ('www.youtube.com', 'music.youtube.com'):
4797 smuggled_data.pop('is_music_url')
4798 info['url'] = urllib.parse.urlunparse(parsed_url._replace(netloc='music.youtube.com'))
4799 if smuggled_data:
4800 info['url'] = smuggle_url(info['url'], smuggled_data)
4801 return info
4803 @functools.wraps(func)
4804 def wrapper(self, url):
4805 url, smuggled_data = unsmuggle_url(url, {})
4806 if self.is_music_url(url):
4807 smuggled_data['is_music_url'] = True
4808 info_dict = func(self, url, smuggled_data)
4809 if smuggled_data:
4810 _smuggle(info_dict, smuggled_data)
4811 if info_dict.get('entries'):
4812 info_dict['entries'] = (_smuggle(i, smuggled_data.copy()) for i in info_dict['entries'])
4813 return info_dict
4814 return wrapper
4816 @staticmethod
4817 def _extract_basic_item_renderer(item):
4818 # Modified from _extract_grid_item_renderer
4819 known_basic_renderers = (
4820 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer',
4822 for key, renderer in item.items():
4823 if not isinstance(renderer, dict):
4824 continue
4825 elif key in known_basic_renderers:
4826 return renderer
4827 elif key.startswith('grid') and key.endswith('Renderer'):
4828 return renderer
4830 def _extract_channel_renderer(self, renderer):
4831 channel_id = self.ucid_or_none(renderer['channelId'])
4832 title = self._get_text(renderer, 'title')
4833 channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
4834 channel_handle = self.handle_from_url(
4835 traverse_obj(renderer, (
4836 'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
4837 ('browseEndpoint', 'canonicalBaseUrl')),
4838 {str}), get_all=False))
4839 if not channel_handle:
4840 # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
4841 channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
4842 return {
4843 '_type': 'url',
4844 'url': channel_url,
4845 'id': channel_id,
4846 'ie_key': YoutubeTabIE.ie_key(),
4847 'channel': title,
4848 'uploader': title,
4849 'channel_id': channel_id,
4850 'channel_url': channel_url,
4851 'title': title,
4852 'uploader_id': channel_handle,
4853 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
4854 # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
4855 # However, in feed/channels this is set correctly to the subscriber count
4856 'channel_follower_count': traverse_obj(
4857 renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
4858 'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
4859 'playlist_count': (
4860 # videoCountText may be the subscriber count
4861 self._get_count(renderer, 'videoCountText')
4862 if self._get_count(renderer, 'subscriberCountText') is not None else None),
4863 'description': self._get_text(renderer, 'descriptionSnippet'),
4864 'channel_is_verified': True if self._has_badge(
4865 self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
4868 def _grid_entries(self, grid_renderer):
4869 for item in grid_renderer['items']:
4870 if not isinstance(item, dict):
4871 continue
4872 renderer = self._extract_basic_item_renderer(item)
4873 if not isinstance(renderer, dict):
4874 continue
4875 title = self._get_text(renderer, 'title')
4877 # playlist
4878 playlist_id = renderer.get('playlistId')
4879 if playlist_id:
4880 yield self.url_result(
4881 f'https://www.youtube.com/playlist?list={playlist_id}',
4882 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4883 video_title=title)
4884 continue
4885 # video
4886 video_id = renderer.get('videoId')
4887 if video_id:
4888 yield self._extract_video(renderer)
4889 continue
4890 # channel
4891 channel_id = renderer.get('channelId')
4892 if channel_id:
4893 yield self._extract_channel_renderer(renderer)
4894 continue
4895 # generic endpoint URL support
4896 ep_url = urljoin('https://www.youtube.com/', try_get(
4897 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
4898 str))
4899 if ep_url:
4900 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
4901 if ie.suitable(ep_url):
4902 yield self.url_result(
4903 ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
4904 break
4906 def _music_reponsive_list_entry(self, renderer):
4907 video_id = traverse_obj(renderer, ('playlistItemData', 'videoId'))
4908 if video_id:
4909 title = traverse_obj(renderer, (
4910 'flexColumns', 0, 'musicResponsiveListItemFlexColumnRenderer',
4911 'text', 'runs', 0, 'text'))
4912 return self.url_result(f'https://music.youtube.com/watch?v={video_id}',
4913 ie=YoutubeIE.ie_key(), video_id=video_id, title=title)
4914 playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId'))
4915 if playlist_id:
4916 video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId'))
4917 if video_id:
4918 return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}',
4919 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4920 return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}',
4921 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4922 browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId'))
4923 if browse_id:
4924 return self.url_result(f'https://music.youtube.com/browse/{browse_id}',
4925 ie=YoutubeTabIE.ie_key(), video_id=browse_id)
4927 def _shelf_entries_from_content(self, shelf_renderer):
4928 content = shelf_renderer.get('content')
4929 if not isinstance(content, dict):
4930 return
4931 renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
4932 if renderer:
4933 # TODO: add support for nested playlists so each shelf is processed
4934 # as separate playlist
4935 # TODO: this includes only first N items
4936 yield from self._grid_entries(renderer)
4937 renderer = content.get('horizontalListRenderer')
4938 if renderer:
4939 # TODO: handle case
4940 pass
4942 def _shelf_entries(self, shelf_renderer, skip_channels=False):
4943 ep = try_get(
4944 shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4945 str)
4946 shelf_url = urljoin('https://www.youtube.com', ep)
4947 if shelf_url:
4948 # Skipping links to another channels, note that checking for
4949 # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
4950 # will not work
4951 if skip_channels and '/channels?' in shelf_url:
4952 return
4953 title = self._get_text(shelf_renderer, 'title')
4954 yield self.url_result(shelf_url, video_title=title)
4955 # Shelf may not contain shelf URL, fallback to extraction from content
4956 yield from self._shelf_entries_from_content(shelf_renderer)
4958 def _playlist_entries(self, video_list_renderer):
4959 for content in video_list_renderer['contents']:
4960 if not isinstance(content, dict):
4961 continue
4962 renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
4963 if not isinstance(renderer, dict):
4964 continue
4965 video_id = renderer.get('videoId')
4966 if not video_id:
4967 continue
4968 yield self._extract_video(renderer)
4970 def _rich_entries(self, rich_grid_renderer):
4971 renderer = traverse_obj(
4972 rich_grid_renderer,
4973 ('content', ('videoRenderer', 'reelItemRenderer', 'playlistRenderer')), get_all=False) or {}
4974 video_id = renderer.get('videoId')
4975 if video_id:
4976 yield self._extract_video(renderer)
4977 return
4978 playlist_id = renderer.get('playlistId')
4979 if playlist_id:
4980 yield self.url_result(
4981 f'https://www.youtube.com/playlist?list={playlist_id}',
4982 ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4983 video_title=self._get_text(renderer, 'title'))
4984 return
4986 def _video_entry(self, video_renderer):
4987 video_id = video_renderer.get('videoId')
4988 if video_id:
4989 return self._extract_video(video_renderer)
4991 def _hashtag_tile_entry(self, hashtag_tile_renderer):
4992 url = urljoin('https://youtube.com', traverse_obj(
4993 hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url')))
4994 if url:
4995 return self.url_result(
4996 url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag'))
4998 def _post_thread_entries(self, post_thread_renderer):
4999 post_renderer = try_get(
5000 post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
5001 if not post_renderer:
5002 return
5003 # video attachment
5004 video_renderer = try_get(
5005 post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
5006 video_id = video_renderer.get('videoId')
5007 if video_id:
5008 entry = self._extract_video(video_renderer)
5009 if entry:
5010 yield entry
5011 # playlist attachment
5012 playlist_id = try_get(
5013 post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str)
5014 if playlist_id:
5015 yield self.url_result(
5016 f'https://www.youtube.com/playlist?list={playlist_id}',
5017 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
5018 # inline video links
5019 runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
5020 for run in runs:
5021 if not isinstance(run, dict):
5022 continue
5023 ep_url = try_get(
5024 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], str)
5025 if not ep_url:
5026 continue
5027 if not YoutubeIE.suitable(ep_url):
5028 continue
5029 ep_video_id = YoutubeIE._match_id(ep_url)
5030 if video_id == ep_video_id:
5031 continue
5032 yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
5034 def _post_thread_continuation_entries(self, post_thread_continuation):
5035 contents = post_thread_continuation.get('contents')
5036 if not isinstance(contents, list):
5037 return
5038 for content in contents:
5039 renderer = content.get('backstagePostThreadRenderer')
5040 if isinstance(renderer, dict):
5041 yield from self._post_thread_entries(renderer)
5042 continue
5043 renderer = content.get('videoRenderer')
5044 if isinstance(renderer, dict):
5045 yield self._video_entry(renderer)
5047 r''' # unused
5048 def _rich_grid_entries(self, contents):
5049 for content in contents:
5050 video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
5051 if video_renderer:
5052 entry = self._video_entry(video_renderer)
5053 if entry:
5054 yield entry
5057 def _report_history_entries(self, renderer):
5058 for url in traverse_obj(renderer, (
5059 'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
5060 'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
5061 'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
5062 yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
5064 def _extract_entries(self, parent_renderer, continuation_list):
5065 # continuation_list is modified in-place with continuation_list = [continuation_token]
5066 continuation_list[:] = [None]
5067 contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
5068 for content in contents:
5069 if not isinstance(content, dict):
5070 continue
5071 is_renderer = traverse_obj(
5072 content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
5073 expected_type=dict)
5074 if not is_renderer:
5075 if content.get('richItemRenderer'):
5076 for entry in self._rich_entries(content['richItemRenderer']):
5077 yield entry
5078 continuation_list[0] = self._extract_continuation(parent_renderer)
5079 elif content.get('reportHistorySectionRenderer'): # https://www.youtube.com/reporthistory
5080 table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
5081 yield from self._report_history_entries(table)
5082 continuation_list[0] = self._extract_continuation(table)
5083 continue
5085 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
5086 for isr_content in isr_contents:
5087 if not isinstance(isr_content, dict):
5088 continue
5090 known_renderers = {
5091 'playlistVideoListRenderer': self._playlist_entries,
5092 'gridRenderer': self._grid_entries,
5093 'reelShelfRenderer': self._grid_entries,
5094 'shelfRenderer': self._shelf_entries,
5095 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)],
5096 'backstagePostThreadRenderer': self._post_thread_entries,
5097 'videoRenderer': lambda x: [self._video_entry(x)],
5098 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
5099 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
5100 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
5101 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
5103 for key, renderer in isr_content.items():
5104 if key not in known_renderers:
5105 continue
5106 for entry in known_renderers[key](renderer):
5107 if entry:
5108 yield entry
5109 continuation_list[0] = self._extract_continuation(renderer)
5110 break
5112 if not continuation_list[0]:
5113 continuation_list[0] = self._extract_continuation(is_renderer)
5115 if not continuation_list[0]:
5116 continuation_list[0] = self._extract_continuation(parent_renderer)
5118 def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
5119 continuation_list = [None]
5120 extract_entries = lambda x: self._extract_entries(x, continuation_list)
5121 tab_content = try_get(tab, lambda x: x['content'], dict)
5122 if not tab_content:
5123 return
5124 parent_renderer = (
5125 try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
5126 or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
5127 yield from extract_entries(parent_renderer)
5128 continuation = continuation_list[0]
5129 seen_continuations = set()
5130 for page_num in itertools.count(1):
5131 if not continuation:
5132 break
5133 continuation_token = continuation.get('continuation')
5134 if continuation_token is not None and continuation_token in seen_continuations:
5135 self.write_debug('Detected YouTube feed looping - assuming end of feed.')
5136 break
5137 seen_continuations.add(continuation_token)
5138 headers = self.generate_api_headers(
5139 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
5140 response = self._extract_response(
5141 item_id=f'{item_id} page {page_num}',
5142 query=continuation, headers=headers, ytcfg=ytcfg,
5143 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
5145 if not response:
5146 break
5147 # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
5148 # See: https://github.com/ytdl-org/youtube-dl/issues/28702
5149 visitor_data = self._extract_visitor_data(response) or visitor_data
5151 known_renderers = {
5152 'videoRenderer': (self._grid_entries, 'items'), # for membership tab
5153 'gridPlaylistRenderer': (self._grid_entries, 'items'),
5154 'gridVideoRenderer': (self._grid_entries, 'items'),
5155 'gridChannelRenderer': (self._grid_entries, 'items'),
5156 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
5157 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds
5158 'richItemRenderer': (extract_entries, 'contents'), # for hashtag
5159 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
5160 'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
5161 'playlistVideoListContinuation': (self._playlist_entries, None),
5162 'gridContinuation': (self._grid_entries, None),
5163 'itemSectionContinuation': (self._post_thread_continuation_entries, None),
5164 'sectionListContinuation': (extract_entries, None), # for feeds
5167 continuation_items = traverse_obj(response, (
5168 ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
5169 'appendContinuationItemsAction', 'continuationItems',
5170 ), 'continuationContents', get_all=False)
5171 continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
5173 video_items_renderer = None
5174 for key in continuation_item:
5175 if key not in known_renderers:
5176 continue
5177 func, parent_key = known_renderers[key]
5178 video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
5179 continuation_list = [None]
5180 yield from func(video_items_renderer)
5181 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
5183 if not video_items_renderer:
5184 break
5186 @staticmethod
5187 def _extract_selected_tab(tabs, fatal=True):
5188 for tab_renderer in tabs:
5189 if tab_renderer.get('selected'):
5190 return tab_renderer
5191 if fatal:
5192 raise ExtractorError('Unable to find selected tab')
5194 @staticmethod
5195 def _extract_tab_renderers(response):
5196 return traverse_obj(
5197 response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
5199 def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
5200 metadata = self._extract_metadata_from_tabs(item_id, data)
5202 selected_tab = self._extract_selected_tab(tabs)
5203 metadata['title'] += format_field(selected_tab, 'title', ' - %s')
5204 metadata['title'] += format_field(selected_tab, 'expandedText', ' - %s')
5206 return self.playlist_result(
5207 self._entries(
5208 selected_tab, metadata['id'], ytcfg,
5209 self._extract_account_syncid(ytcfg, data),
5210 self._extract_visitor_data(data, ytcfg)),
5211 **metadata)
5213 def _extract_metadata_from_tabs(self, item_id, data):
5214 info = {'id': item_id}
5216 metadata_renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), expected_type=dict)
5217 if metadata_renderer:
5218 channel_id = traverse_obj(metadata_renderer, ('externalId', {self.ucid_or_none}),
5219 ('channelUrl', {self.ucid_from_url}))
5220 info.update({
5221 'channel': metadata_renderer.get('title'),
5222 'channel_id': channel_id,
5224 if info['channel_id']:
5225 info['id'] = info['channel_id']
5226 else:
5227 metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
5229 # pageHeaderViewModel slow rollout began April 2024
5230 page_header_view_model = traverse_obj(data, (
5231 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
5233 # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
5234 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
5235 def _get_uncropped(url):
5236 return url_or_none((url or '').split('=')[0] + '=s0')
5238 avatar_thumbnails = self._extract_thumbnails(metadata_renderer, 'avatar')
5239 if avatar_thumbnails:
5240 uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url'])
5241 if uncropped_avatar:
5242 avatar_thumbnails.append({
5243 'url': uncropped_avatar,
5244 'id': 'avatar_uncropped',
5245 'preference': 1,
5248 channel_banners = (
5249 self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
5250 or self._extract_thumbnails(
5251 page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
5252 for banner in channel_banners:
5253 banner['preference'] = -10
5255 if channel_banners:
5256 uncropped_banner = _get_uncropped(channel_banners[0]['url'])
5257 if uncropped_banner:
5258 channel_banners.append({
5259 'url': uncropped_banner,
5260 'id': 'banner_uncropped',
5261 'preference': -5,
5264 # Deprecated - remove primary_sidebar_renderer when layout discontinued
5265 primary_sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
5266 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer'), expected_type=dict)
5268 primary_thumbnails = self._extract_thumbnails(
5269 primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail'))
5270 playlist_thumbnails = self._extract_thumbnails(
5271 playlist_header_renderer, ('playlistHeaderBanner', 'heroPlaylistThumbnailRenderer', 'thumbnail'))
5273 info.update({
5274 'title': (traverse_obj(metadata_renderer, 'title')
5275 or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
5276 or info['id']),
5277 'availability': self._extract_availability(data),
5278 'channel_follower_count': (
5279 self._get_count(data, ('header', ..., 'subscriberCountText'))
5280 or traverse_obj(page_header_view_model, (
5281 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
5282 lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
5283 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
5284 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
5285 or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),
5286 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners,
5289 channel_handle = (
5290 traverse_obj(metadata_renderer, (('vanityChannelUrl', ('ownerUrls', ...)), {self.handle_from_url}), get_all=False)
5291 or traverse_obj(data, ('header', ..., 'channelHandleText', {self.handle_or_none}), get_all=False))
5293 if channel_handle:
5294 info.update({
5295 'uploader_id': channel_handle,
5296 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
5299 channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
5300 if self._has_badge(channel_badges, BadgeType.VERIFIED):
5301 info['channel_is_verified'] = True
5302 # Playlist stats is a text runs array containing [video count, view count, last updated].
5303 # last updated or (view count and last updated) may be missing.
5304 playlist_stats = get_first(
5305 (primary_sidebar_renderer, playlist_header_renderer), (('stats', 'briefStats', 'numVideosText'), ))
5307 last_updated_unix = self._parse_time_text(
5308 self._get_text(playlist_stats, 2) # deprecated, remove when old layout discontinued
5309 or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
5310 info['modified_date'] = strftime_or_none(last_updated_unix)
5312 info['view_count'] = self._get_count(playlist_stats, 1)
5313 if info['view_count'] is None: # 0 is allowed
5314 info['view_count'] = self._get_count(playlist_header_renderer, 'viewCountText')
5315 if info['view_count'] is None:
5316 info['view_count'] = self._get_count(data, (
5317 'contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., 'tabRenderer', 'content', 'sectionListRenderer',
5318 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'channelAboutFullMetadataRenderer', 'viewCountText'))
5320 info['playlist_count'] = self._get_count(playlist_stats, 0)
5321 if info['playlist_count'] is None: # 0 is allowed
5322 info['playlist_count'] = self._get_count(playlist_header_renderer, ('byline', 0, 'playlistBylineRenderer', 'text'))
5324 if not info.get('channel_id'):
5325 owner = traverse_obj(playlist_header_renderer, 'ownerText')
5326 if not owner: # Deprecated
5327 owner = traverse_obj(
5328 self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer'),
5329 ('videoOwner', 'videoOwnerRenderer', 'title'))
5330 owner_text = self._get_text(owner)
5331 browse_ep = traverse_obj(owner, ('runs', 0, 'navigationEndpoint', 'browseEndpoint')) or {}
5332 info.update({
5333 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text),
5334 'channel_id': self.ucid_or_none(browse_ep.get('browseId')),
5335 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))),
5338 info.update({
5339 'uploader': info['channel'],
5340 'channel_url': format_field(info.get('channel_id'), None, 'https://www.youtube.com/channel/%s', default=None),
5341 'uploader_url': format_field(info.get('uploader_id'), None, 'https://www.youtube.com/%s', default=None),
5344 return info
5346 def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg):
5347 first_id = last_id = response = None
5348 for page_num in itertools.count(1):
5349 videos = list(self._playlist_entries(playlist))
5350 if not videos:
5351 return
5352 start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
5353 if start >= len(videos):
5354 return
5355 yield from videos[start:]
5356 first_id = first_id or videos[0]['id']
5357 last_id = videos[-1]['id']
5358 watch_endpoint = try_get(
5359 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
5360 headers = self.generate_api_headers(
5361 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5362 visitor_data=self._extract_visitor_data(response, data, ytcfg))
5363 query = {
5364 'playlistId': playlist_id,
5365 'videoId': watch_endpoint.get('videoId') or last_id,
5366 'index': watch_endpoint.get('index') or len(videos),
5367 'params': watch_endpoint.get('params') or 'OAE%3D',
5369 response = self._extract_response(
5370 item_id=f'{playlist_id} page {page_num}',
5371 query=query, ep='next', headers=headers, ytcfg=ytcfg,
5372 check_get_keys='contents',
5374 playlist = try_get(
5375 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
5377 def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
5378 title = playlist.get('title') or try_get(
5379 data, lambda x: x['titleText']['simpleText'], str)
5380 playlist_id = playlist.get('playlistId') or item_id
5382 # Delegating everything except mix playlists to regular tab-based playlist URL
5383 playlist_url = urljoin(url, try_get(
5384 playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
5385 str))
5387 # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1]
5388 # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg
5389 is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id)
5391 if playlist_url and playlist_url != url and not is_known_unviewable:
5392 return self.url_result(
5393 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
5394 video_title=title)
5396 return self.playlist_result(
5397 self._extract_inline_playlist(playlist, playlist_id, data, ytcfg),
5398 playlist_id=playlist_id, playlist_title=title)
5400 def _extract_availability(self, data):
5402 Gets the availability of a given playlist/tab.
5403 Note: Unless YouTube tells us explicitly, we do not assume it is public
5404 @param data: response
5406 sidebar_renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
5407 playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
5408 player_header_privacy = playlist_header_renderer.get('privacy')
5410 badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
5412 # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
5413 privacy_setting_icon = get_first(
5414 (playlist_header_renderer, sidebar_renderer),
5415 ('privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
5416 lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
5417 expected_type=str)
5419 microformats_is_unlisted = traverse_obj(
5420 data, ('microformat', 'microformatDataRenderer', 'unlisted'), expected_type=bool)
5422 return (
5423 'public' if (
5424 self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
5425 or player_header_privacy == 'PUBLIC'
5426 or privacy_setting_icon == 'PRIVACY_PUBLIC')
5427 else self._availability(
5428 is_private=(
5429 self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
5430 or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
5431 else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
5432 is_unlisted=(
5433 self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
5434 or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
5435 else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None
5436 else microformats_is_unlisted if microformats_is_unlisted is not None else None),
5437 needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
5438 needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
5439 needs_auth=False))
5441 @staticmethod
5442 def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
5443 sidebar_renderer = try_get(
5444 data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
5445 for item in sidebar_renderer:
5446 renderer = try_get(item, lambda x: x[info_renderer], expected_type)
5447 if renderer:
5448 return renderer
5450 def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
5452 Reload playlists with unavailable videos (e.g. private videos, region blocked, etc.)
5454 is_playlist = bool(traverse_obj(
5455 data, ('metadata', 'playlistMetadataRenderer'), ('header', 'playlistHeaderRenderer')))
5456 if not is_playlist:
5457 return
5458 headers = self.generate_api_headers(
5459 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
5460 visitor_data=self._extract_visitor_data(data, ytcfg))
5461 query = {
5462 'params': 'wgYCCAA=',
5463 'browseId': f'VL{item_id}',
5465 return self._extract_response(
5466 item_id=item_id, headers=headers, query=query,
5467 check_get_keys='contents', fatal=False, ytcfg=ytcfg,
5468 note='Redownloading playlist API JSON with unavailable videos')
5470 @functools.cached_property
5471 def skip_webpage(self):
5472 return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
5474 def _extract_webpage(self, url, item_id, fatal=True):
5475 webpage, data = None, None
5476 for retry in self.RetryManager(fatal=fatal):
5477 try:
5478 webpage = self._download_webpage(url, item_id, note='Downloading webpage')
5479 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
5480 except ExtractorError as e:
5481 if isinstance(e.cause, network_exceptions):
5482 if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
5483 retry.error = e
5484 continue
5485 self._error_or_warning(e, fatal=fatal)
5486 break
5488 try:
5489 self._extract_and_report_alerts(data)
5490 except ExtractorError as e:
5491 self._error_or_warning(e, fatal=fatal)
5492 break
5494 # Sometimes youtube returns a webpage with incomplete ytInitialData
5495 # See: https://github.com/yt-dlp/yt-dlp/issues/116
5496 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
5497 retry.error = ExtractorError('Incomplete yt initial data received')
5498 data = None
5499 continue
5501 return webpage, data
5503 def _report_playlist_authcheck(self, ytcfg, fatal=True):
5504 """Use if failed to extract ytcfg (and data) from initial webpage"""
5505 if not ytcfg and self.is_authenticated:
5506 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
5507 if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
5508 raise ExtractorError(
5509 f'{msg}. If you are not downloading private content, or '
5510 'your cookies are only for the first account and channel,'
5511 ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
5512 expected=True)
5513 self.report_warning(msg, only_once=True)
5515 def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
5516 data = None
5517 if not self.skip_webpage:
5518 webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
5519 ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
5520 # Reject webpage data if redirected to home page without explicitly requesting
5521 selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
5522 if (url != 'https://www.youtube.com/feed/recommended'
5523 and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
5524 and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
5525 msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page'
5526 if fatal:
5527 raise ExtractorError(msg, expected=True)
5528 self.report_warning(msg, only_once=True)
5529 if not data:
5530 self._report_playlist_authcheck(ytcfg, fatal=fatal)
5531 data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
5532 return data, ytcfg
5534 def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
5535 headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
5536 resolve_response = self._extract_response(
5537 item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
5538 ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
5539 endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
5540 for ep_key, ep in endpoints.items():
5541 params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
5542 if params:
5543 return self._extract_response(
5544 item_id=item_id, query=params, ep=ep, headers=headers,
5545 ytcfg=ytcfg, fatal=fatal, default_client=default_client,
5546 check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions'))
5547 err_note = 'Failed to resolve url (does the playlist exist?)'
5548 if fatal:
5549 raise ExtractorError(err_note, expected=True)
5550 self.report_warning(err_note, item_id)
5552 _SEARCH_PARAMS = None
5554 def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
5555 data = {'query': query}
5556 if params is NO_DEFAULT:
5557 params = self._SEARCH_PARAMS
5558 if params:
5559 data['params'] = params
5561 content_keys = (
5562 ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'),
5563 ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'),
5564 # ytmusic search
5565 ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
5566 ('continuationContents', ),
5568 display_id = f'query "{query}"'
5569 check_get_keys = tuple({keys[0] for keys in content_keys})
5570 ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
5571 self._report_playlist_authcheck(ytcfg, fatal=False)
5573 continuation_list = [None]
5574 search = None
5575 for page_num in itertools.count(1):
5576 data.update(continuation_list[0] or {})
5577 headers = self.generate_api_headers(
5578 ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
5579 search = self._extract_response(
5580 item_id=f'{display_id} page {page_num}', ep='search', query=data,
5581 default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
5582 slr_contents = traverse_obj(search, *content_keys)
5583 yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
5584 if not continuation_list[0]:
5585 break
5588 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
5589 IE_DESC = 'YouTube Tabs'
5590 _VALID_URL = r'''(?x:
5591 https?://
5592 (?!consent\.)(?:\w+\.)?
5594 youtube(?:kids)?\.com|
5595 {invidious}
5598 (?P<channel_type>channel|c|user|browse)/|
5599 (?P<not_channel>
5600 feed/|hashtag/|
5601 (?:playlist|watch)\?.*?\blist=
5603 (?!(?:{reserved_names})\b) # Direct URLs
5605 (?P<id>[^/?\#&]+)
5606 )'''.format(
5607 reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES,
5608 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
5610 IE_NAME = 'youtube:tab'
5612 _TESTS = [{
5613 'note': 'playlists, multipage',
5614 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
5615 'playlist_mincount': 94,
5616 'info_dict': {
5617 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5618 'title': 'Igor Kleiner Ph.D. - Playlists',
5619 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5620 'uploader': 'Igor Kleiner Ph.D.',
5621 'uploader_id': '@IgorDataScience',
5622 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5623 'channel': 'Igor Kleiner Ph.D.',
5624 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5625 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5626 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5627 'channel_follower_count': int,
5629 }, {
5630 'note': 'playlists, multipage, different order',
5631 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
5632 'playlist_mincount': 94,
5633 'info_dict': {
5634 'id': 'UCqj7Cz7revf5maW9g5pgNcg',
5635 'title': 'Igor Kleiner Ph.D. - Playlists',
5636 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a',
5637 'uploader': 'Igor Kleiner Ph.D.',
5638 'uploader_id': '@IgorDataScience',
5639 'uploader_url': 'https://www.youtube.com/@IgorDataScience',
5640 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'],
5641 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg',
5642 'channel': 'Igor Kleiner Ph.D.',
5643 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg',
5644 'channel_follower_count': int,
5646 }, {
5647 'note': 'playlists, series',
5648 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
5649 'playlist_mincount': 5,
5650 'info_dict': {
5651 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5652 'title': '3Blue1Brown - Playlists',
5653 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5654 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5655 'channel': '3Blue1Brown',
5656 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5657 'uploader_id': '@3blue1brown',
5658 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5659 'uploader': '3Blue1Brown',
5660 'tags': ['Mathematics'],
5661 'channel_follower_count': int,
5662 'channel_is_verified': True,
5664 }, {
5665 'note': 'playlists, singlepage',
5666 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
5667 'playlist_mincount': 4,
5668 'info_dict': {
5669 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5670 'title': 'ThirstForScience - Playlists',
5671 'description': 'md5:609399d937ea957b0f53cbffb747a14c',
5672 'uploader': 'ThirstForScience',
5673 'uploader_url': 'https://www.youtube.com/@ThirstForScience',
5674 'uploader_id': '@ThirstForScience',
5675 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
5676 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ',
5677 'tags': 'count:12',
5678 'channel': 'ThirstForScience',
5679 'channel_follower_count': int,
5681 }, {
5682 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
5683 'only_matching': True,
5684 }, {
5685 'note': 'basic, single video playlist',
5686 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5687 'info_dict': {
5688 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5689 'title': 'youtube-dl public playlist',
5690 'description': '',
5691 'tags': [],
5692 'view_count': int,
5693 'modified_date': '20201130',
5694 'channel': 'Sergey M.',
5695 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5696 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5697 'availability': 'public',
5698 'uploader': 'Sergey M.',
5699 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5700 'uploader_id': '@sergeym.6173',
5702 'playlist_count': 1,
5703 }, {
5704 'note': 'empty playlist',
5705 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5706 'info_dict': {
5707 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
5708 'title': 'youtube-dl empty playlist',
5709 'tags': [],
5710 'channel': 'Sergey M.',
5711 'description': '',
5712 'modified_date': '20230921',
5713 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
5714 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5715 'availability': 'unlisted',
5716 'uploader_url': 'https://www.youtube.com/@sergeym.6173',
5717 'uploader_id': '@sergeym.6173',
5718 'uploader': 'Sergey M.',
5720 'playlist_count': 0,
5721 }, {
5722 'note': 'Home tab',
5723 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
5724 'info_dict': {
5725 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5726 'title': 'lex will - Home',
5727 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5728 'uploader': 'lex will',
5729 'uploader_id': '@lexwill718',
5730 'channel': 'lex will',
5731 'tags': ['bible', 'history', 'prophesy'],
5732 'uploader_url': 'https://www.youtube.com/@lexwill718',
5733 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5734 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5735 'channel_follower_count': int,
5737 'playlist_mincount': 2,
5738 }, {
5739 'note': 'Videos tab',
5740 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
5741 'info_dict': {
5742 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5743 'title': 'lex will - Videos',
5744 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5745 'uploader': 'lex will',
5746 'uploader_id': '@lexwill718',
5747 'tags': ['bible', 'history', 'prophesy'],
5748 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5749 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5750 'uploader_url': 'https://www.youtube.com/@lexwill718',
5751 'channel': 'lex will',
5752 'channel_follower_count': int,
5754 'playlist_mincount': 975,
5755 }, {
5756 'note': 'Videos tab, sorted by popular',
5757 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
5758 'info_dict': {
5759 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5760 'title': 'lex will - Videos',
5761 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5762 'uploader': 'lex will',
5763 'uploader_id': '@lexwill718',
5764 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5765 'uploader_url': 'https://www.youtube.com/@lexwill718',
5766 'channel': 'lex will',
5767 'tags': ['bible', 'history', 'prophesy'],
5768 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5769 'channel_follower_count': int,
5771 'playlist_mincount': 199,
5772 }, {
5773 'note': 'Playlists tab',
5774 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
5775 'info_dict': {
5776 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5777 'title': 'lex will - Playlists',
5778 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5779 'uploader': 'lex will',
5780 'uploader_id': '@lexwill718',
5781 'uploader_url': 'https://www.youtube.com/@lexwill718',
5782 'channel': 'lex will',
5783 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5784 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5785 'tags': ['bible', 'history', 'prophesy'],
5786 'channel_follower_count': int,
5788 'playlist_mincount': 17,
5789 }, {
5790 'note': 'Community tab',
5791 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
5792 'info_dict': {
5793 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5794 'title': 'lex will - Community',
5795 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5796 'channel': 'lex will',
5797 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5798 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5799 'tags': ['bible', 'history', 'prophesy'],
5800 'channel_follower_count': int,
5801 'uploader_url': 'https://www.youtube.com/@lexwill718',
5802 'uploader_id': '@lexwill718',
5803 'uploader': 'lex will',
5805 'playlist_mincount': 18,
5806 }, {
5807 'note': 'Channels tab',
5808 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
5809 'info_dict': {
5810 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5811 'title': 'lex will - Channels',
5812 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
5813 'channel': 'lex will',
5814 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
5815 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
5816 'tags': ['bible', 'history', 'prophesy'],
5817 'channel_follower_count': int,
5818 'uploader_url': 'https://www.youtube.com/@lexwill718',
5819 'uploader_id': '@lexwill718',
5820 'uploader': 'lex will',
5822 'playlist_mincount': 12,
5823 }, {
5824 'note': 'Search tab',
5825 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
5826 'playlist_mincount': 40,
5827 'info_dict': {
5828 'id': 'UCYO_jab_esuFRV4b17AJtAw',
5829 'title': '3Blue1Brown - Search - linear algebra',
5830 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
5831 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
5832 'tags': ['Mathematics'],
5833 'channel': '3Blue1Brown',
5834 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
5835 'channel_follower_count': int,
5836 'uploader_url': 'https://www.youtube.com/@3blue1brown',
5837 'uploader_id': '@3blue1brown',
5838 'uploader': '3Blue1Brown',
5839 'channel_is_verified': True,
5841 }, {
5842 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5843 'only_matching': True,
5844 }, {
5845 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5846 'only_matching': True,
5847 }, {
5848 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
5849 'only_matching': True,
5850 }, {
5851 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
5852 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5853 'info_dict': {
5854 'title': '29C3: Not my department',
5855 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
5856 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
5857 'tags': [],
5858 'view_count': int,
5859 'modified_date': '20150605',
5860 'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
5861 'channel_url': 'https://www.youtube.com/channel/UCEPzS1rYsrkqzSLNp76nrcg',
5862 'channel': 'Christiaan008',
5863 'availability': 'public',
5864 'uploader_id': '@ChRiStIaAn008',
5865 'uploader': 'Christiaan008',
5866 'uploader_url': 'https://www.youtube.com/@ChRiStIaAn008',
5868 'playlist_count': 96,
5869 }, {
5870 'note': 'Large playlist',
5871 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
5872 'info_dict': {
5873 'title': 'Uploads from Cauchemar',
5874 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
5875 'channel_url': 'https://www.youtube.com/channel/UCBABnxM4Ar9ten8Mdjj1j0Q',
5876 'tags': [],
5877 'modified_date': r're:\d{8}',
5878 'channel': 'Cauchemar',
5879 'view_count': int,
5880 'description': '',
5881 'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
5882 'availability': 'public',
5883 'uploader_id': '@Cauchemar89',
5884 'uploader': 'Cauchemar',
5885 'uploader_url': 'https://www.youtube.com/@Cauchemar89',
5887 'playlist_mincount': 1123,
5888 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5889 }, {
5890 'note': 'even larger playlist, 8832 videos',
5891 'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
5892 'only_matching': True,
5893 }, {
5894 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
5895 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
5896 'info_dict': {
5897 'title': 'Uploads from Interstellar Movie',
5898 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
5899 'tags': [],
5900 'view_count': int,
5901 'channel_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
5902 'channel_url': 'https://www.youtube.com/channel/UCXw-G3eDE9trcvY2sBMM_aA',
5903 'channel': 'Interstellar Movie',
5904 'description': '',
5905 'modified_date': r're:\d{8}',
5906 'availability': 'public',
5907 'uploader_id': '@InterstellarMovie',
5908 'uploader': 'Interstellar Movie',
5909 'uploader_url': 'https://www.youtube.com/@InterstellarMovie',
5911 'playlist_mincount': 21,
5912 }, {
5913 'note': 'Playlist with "show unavailable videos" button',
5914 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
5915 'info_dict': {
5916 'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
5917 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
5918 'view_count': int,
5919 'channel': 'Phim Siêu Nhân Nhật Bản',
5920 'tags': [],
5921 'description': '',
5922 'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
5923 'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
5924 'modified_date': r're:\d{8}',
5925 'availability': 'public',
5926 'uploader_url': 'https://www.youtube.com/@phimsieunhannhatban',
5927 'uploader_id': '@phimsieunhannhatban',
5928 'uploader': 'Phim Siêu Nhân Nhật Bản',
5930 'playlist_mincount': 200,
5931 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5932 }, {
5933 'note': 'Playlist with unavailable videos in page 7',
5934 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
5935 'info_dict': {
5936 'title': 'Uploads from BlankTV',
5937 'id': 'UU8l9frL61Yl5KFOl87nIm2w',
5938 'channel': 'BlankTV',
5939 'channel_url': 'https://www.youtube.com/channel/UC8l9frL61Yl5KFOl87nIm2w',
5940 'channel_id': 'UC8l9frL61Yl5KFOl87nIm2w',
5941 'view_count': int,
5942 'tags': [],
5943 'modified_date': r're:\d{8}',
5944 'description': '',
5945 'availability': 'public',
5946 'uploader_id': '@blanktv',
5947 'uploader': 'BlankTV',
5948 'uploader_url': 'https://www.youtube.com/@blanktv',
5950 'playlist_mincount': 1000,
5951 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
5952 }, {
5953 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
5954 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5955 'info_dict': {
5956 'title': 'Data Analysis with Dr Mike Pound',
5957 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
5958 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
5959 'tags': [],
5960 'view_count': int,
5961 'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
5962 'channel_url': 'https://www.youtube.com/channel/UC9-y-6csu5WGm29I7JiwpnA',
5963 'channel': 'Computerphile',
5964 'availability': 'public',
5965 'modified_date': '20190712',
5966 'uploader_id': '@Computerphile',
5967 'uploader': 'Computerphile',
5968 'uploader_url': 'https://www.youtube.com/@Computerphile',
5970 'playlist_mincount': 11,
5971 }, {
5972 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
5973 'only_matching': True,
5974 }, {
5975 'note': 'Playlist URL that does not actually serve a playlist',
5976 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
5977 'info_dict': {
5978 'id': 'FqZTN594JQw',
5979 'ext': 'webm',
5980 'title': "Smiley's People 01 detective, Adventure Series, Action",
5981 'upload_date': '20150526',
5982 'license': 'Standard YouTube License',
5983 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
5984 'categories': ['People & Blogs'],
5985 'tags': list,
5986 'view_count': int,
5987 'like_count': int,
5989 'params': {
5990 'skip_download': True,
5992 'skip': 'This video is not available.',
5993 'add_ie': [YoutubeIE.ie_key()],
5994 }, {
5995 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
5996 'only_matching': True,
5997 }, {
5998 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
5999 'only_matching': True,
6000 }, {
6001 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
6002 'info_dict': {
6003 'id': 'hGkQjiJLjWQ', # This will keep changing
6004 'ext': 'mp4',
6005 'title': str,
6006 'upload_date': r're:\d{8}',
6007 'description': str,
6008 'categories': ['News & Politics'],
6009 'tags': list,
6010 'like_count': int,
6011 'release_timestamp': int,
6012 'channel': 'Sky News',
6013 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
6014 'age_limit': 0,
6015 'view_count': int,
6016 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
6017 'playable_in_embed': True,
6018 'release_date': r're:\d+',
6019 'availability': 'public',
6020 'live_status': 'is_live',
6021 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
6022 'channel_follower_count': int,
6023 'concurrent_view_count': int,
6024 'uploader_url': 'https://www.youtube.com/@SkyNews',
6025 'uploader_id': '@SkyNews',
6026 'uploader': 'Sky News',
6027 'channel_is_verified': True,
6029 'params': {
6030 'skip_download': True,
6032 'expected_warnings': ['Ignoring subtitle tracks found in '],
6033 }, {
6034 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
6035 'info_dict': {
6036 'id': 'a48o2S1cPoo',
6037 'ext': 'mp4',
6038 'title': 'The Young Turks - Live Main Show',
6039 'upload_date': '20150715',
6040 'license': 'Standard YouTube License',
6041 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
6042 'categories': ['News & Politics'],
6043 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
6044 'like_count': int,
6046 'params': {
6047 'skip_download': True,
6049 'only_matching': True,
6050 }, {
6051 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
6052 'only_matching': True,
6053 }, {
6054 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
6055 'only_matching': True,
6056 }, {
6057 'note': 'A channel that is not live. Should raise error',
6058 'url': 'https://www.youtube.com/user/numberphile/live',
6059 'only_matching': True,
6060 }, {
6061 'url': 'https://www.youtube.com/feed/trending',
6062 'only_matching': True,
6063 }, {
6064 'url': 'https://www.youtube.com/feed/library',
6065 'only_matching': True,
6066 }, {
6067 'url': 'https://www.youtube.com/feed/history',
6068 'only_matching': True,
6069 }, {
6070 'url': 'https://www.youtube.com/feed/subscriptions',
6071 'only_matching': True,
6072 }, {
6073 'url': 'https://www.youtube.com/feed/watch_later',
6074 'only_matching': True,
6075 }, {
6076 'note': 'Recommended - redirects to home page.',
6077 'url': 'https://www.youtube.com/feed/recommended',
6078 'only_matching': True,
6079 }, {
6080 'note': 'inline playlist with not always working continuations',
6081 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
6082 'only_matching': True,
6083 }, {
6084 'url': 'https://www.youtube.com/course',
6085 'only_matching': True,
6086 }, {
6087 'url': 'https://www.youtube.com/zsecurity',
6088 'only_matching': True,
6089 }, {
6090 'url': 'http://www.youtube.com/NASAgovVideo/videos',
6091 'only_matching': True,
6092 }, {
6093 'url': 'https://www.youtube.com/TheYoungTurks/live',
6094 'only_matching': True,
6095 }, {
6096 'url': 'https://www.youtube.com/hashtag/cctv9',
6097 'info_dict': {
6098 'id': 'cctv9',
6099 'title': 'cctv9 - All',
6100 'tags': [],
6102 'playlist_mincount': 300, # not consistent but should be over 300
6103 }, {
6104 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
6105 'only_matching': True,
6106 }, {
6107 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
6108 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6109 'only_matching': True,
6110 }, {
6111 'note': '/browse/ should redirect to /channel/',
6112 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
6113 'only_matching': True,
6114 }, {
6115 'note': 'VLPL, should redirect to playlist?list=PL...',
6116 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6117 'info_dict': {
6118 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
6119 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
6120 'title': 'NCS : All Releases 💿',
6121 'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
6122 'modified_date': r're:\d{8}',
6123 'view_count': int,
6124 'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
6125 'tags': [],
6126 'channel': 'NoCopyrightSounds',
6127 'availability': 'public',
6128 'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
6129 'uploader': 'NoCopyrightSounds',
6130 'uploader_id': '@NoCopyrightSounds',
6132 'playlist_mincount': 166,
6133 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden', 'YouTube Music is not directly supported'],
6134 }, {
6135 # TODO: fix 'unviewable' issue with this playlist when reloading with unavailable videos
6136 'note': 'Topic, should redirect to playlist?list=UU...',
6137 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6138 'info_dict': {
6139 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6140 'title': 'Uploads from Royalty Free Music - Topic',
6141 'tags': [],
6142 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6143 'channel': 'Royalty Free Music - Topic',
6144 'view_count': int,
6145 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6146 'modified_date': r're:\d{8}',
6147 'description': '',
6148 'availability': 'public',
6149 'uploader': 'Royalty Free Music - Topic',
6151 'playlist_mincount': 101,
6152 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6153 }, {
6154 # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
6155 # Treat as a general feed
6156 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
6157 'info_dict': {
6158 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
6159 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
6160 'tags': [],
6162 'playlist_mincount': 9,
6163 }, {
6164 'note': 'Youtube music Album',
6165 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
6166 'info_dict': {
6167 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
6168 'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
6169 'tags': [],
6170 'view_count': int,
6171 'description': '',
6172 'availability': 'unlisted',
6173 'modified_date': r're:\d{8}',
6175 'playlist_count': 50,
6176 'expected_warnings': ['YouTube Music is not directly supported'],
6177 }, {
6178 'note': 'unlisted single video playlist',
6179 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6180 'info_dict': {
6181 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
6182 'title': 'yt-dlp unlisted playlist test',
6183 'availability': 'unlisted',
6184 'tags': [],
6185 'modified_date': '20220418',
6186 'channel': 'colethedj',
6187 'view_count': int,
6188 'description': '',
6189 'channel_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
6190 'channel_url': 'https://www.youtube.com/channel/UC9zHu_mHU96r19o-wV5Qs1Q',
6191 'uploader_url': 'https://www.youtube.com/@colethedj1894',
6192 'uploader_id': '@colethedj1894',
6193 'uploader': 'colethedj',
6195 'playlist': [{
6196 'info_dict': {
6197 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
6198 'id': 'BaW_jenozKc',
6199 '_type': 'url',
6200 'ie_key': 'Youtube',
6201 'duration': 10,
6202 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
6203 'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
6204 'view_count': int,
6205 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
6206 'channel': 'Philipp Hagemeister',
6207 'uploader_id': '@PhilippHagemeister',
6208 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
6209 'uploader': 'Philipp Hagemeister',
6212 'playlist_count': 1,
6213 'params': {'extract_flat': True},
6214 }, {
6215 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
6216 'url': 'https://www.youtube.com/feed/recommended',
6217 'info_dict': {
6218 'id': 'recommended',
6219 'title': 'recommended',
6220 'tags': [],
6222 'playlist_mincount': 50,
6223 'params': {
6224 'skip_download': True,
6225 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6227 }, {
6228 'note': 'API Fallback: /videos tab, sorted by oldest first',
6229 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
6230 'info_dict': {
6231 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6232 'title': 'Cody\'sLab - Videos',
6233 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
6234 'channel': 'Cody\'sLab',
6235 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
6236 'tags': [],
6237 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw',
6238 'channel_follower_count': int,
6240 'playlist_mincount': 650,
6241 'params': {
6242 'skip_download': True,
6243 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6245 'skip': 'Query for sorting no longer works',
6246 }, {
6247 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
6248 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
6249 'info_dict': {
6250 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
6251 'title': 'Uploads from Royalty Free Music - Topic',
6252 'modified_date': r're:\d{8}',
6253 'channel_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
6254 'description': '',
6255 'channel_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
6256 'tags': [],
6257 'channel': 'Royalty Free Music - Topic',
6258 'view_count': int,
6259 'availability': 'public',
6260 'uploader': 'Royalty Free Music - Topic',
6262 'playlist_mincount': 101,
6263 'params': {
6264 'skip_download': True,
6265 'extractor_args': {'youtubetab': {'skip': ['webpage']}},
6267 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'],
6268 }, {
6269 'note': 'non-standard redirect to regional channel',
6270 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
6271 'only_matching': True,
6272 }, {
6273 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
6274 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6275 'info_dict': {
6276 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
6277 'modified_date': '20220407',
6278 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
6279 'tags': [],
6280 'availability': 'unlisted',
6281 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
6282 'channel': 'pukkandan',
6283 'description': 'Test for collaborative playlist',
6284 'title': 'yt-dlp test - collaborative playlist',
6285 'view_count': int,
6286 'uploader_url': 'https://www.youtube.com/@pukkandan',
6287 'uploader_id': '@pukkandan',
6288 'uploader': 'pukkandan',
6290 'playlist_mincount': 2,
6291 }, {
6292 'note': 'translated tab name',
6293 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
6294 'info_dict': {
6295 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6296 'tags': [],
6297 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6298 'description': 'test description',
6299 'title': 'cole-dlp-test-acc - 再生リスト',
6300 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6301 'channel': 'cole-dlp-test-acc',
6302 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6303 'uploader_id': '@coletdjnz',
6304 'uploader': 'cole-dlp-test-acc',
6306 'playlist_mincount': 1,
6307 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6308 'expected_warnings': ['Preferring "ja"'],
6309 }, {
6310 # XXX: this should really check flat playlist entries, but the test suite doesn't support that
6311 'note': 'preferred lang set with playlist with translated video titles',
6312 'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6313 'info_dict': {
6314 'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
6315 'tags': [],
6316 'view_count': int,
6317 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6318 'channel': 'cole-dlp-test-acc',
6319 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6320 'description': 'test',
6321 'title': 'dlp test playlist',
6322 'availability': 'public',
6323 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6324 'uploader_id': '@coletdjnz',
6325 'uploader': 'cole-dlp-test-acc',
6327 'playlist_mincount': 1,
6328 'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
6329 'expected_warnings': ['Preferring "ja"'],
6330 }, {
6331 # shorts audio pivot for 2GtVksBMYFM.
6332 'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
6333 'info_dict': {
6334 'id': 'sfv_audio_pivot',
6335 'title': 'sfv_audio_pivot',
6336 'tags': [],
6338 'playlist_mincount': 50,
6340 }, {
6341 # Channel with a real live tab (not to be mistaken with streams tab)
6342 # Do not treat like it should redirect to live stream
6343 'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
6344 'info_dict': {
6345 'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
6346 'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
6347 'tags': [],
6349 'playlist_mincount': 20,
6350 }, {
6351 # Tab name is not the same as tab id
6352 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
6353 'info_dict': {
6354 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6355 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
6356 'tags': [],
6358 'playlist_mincount': 8,
6359 }, {
6360 # Home tab id is literally home. Not to get mistaken with featured
6361 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
6362 'info_dict': {
6363 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6364 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
6365 'tags': [],
6367 'playlist_mincount': 8,
6368 }, {
6369 # Should get three playlists for videos, shorts and streams tabs
6370 'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6371 'info_dict': {
6372 'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6373 'title': 'Polka Ch. 尾丸ポルカ',
6374 'channel_follower_count': int,
6375 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA',
6376 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
6377 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2',
6378 'channel': 'Polka Ch. 尾丸ポルカ',
6379 'tags': 'count:35',
6380 'uploader_url': 'https://www.youtube.com/@OmaruPolka',
6381 'uploader': 'Polka Ch. 尾丸ポルカ',
6382 'uploader_id': '@OmaruPolka',
6383 'channel_is_verified': True,
6385 'playlist_count': 3,
6386 }, {
6387 # Shorts tab with channel with handle
6388 # TODO: fix channel description
6389 'url': 'https://www.youtube.com/@NotJustBikes/shorts',
6390 'info_dict': {
6391 'id': 'UC0intLFzLaudFG-xAvUEO-A',
6392 'title': 'Not Just Bikes - Shorts',
6393 'tags': 'count:10',
6394 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
6395 'description': 'md5:5e82545b3a041345927a92d0585df247',
6396 'channel_follower_count': int,
6397 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
6398 'channel': 'Not Just Bikes',
6399 'uploader_url': 'https://www.youtube.com/@NotJustBikes',
6400 'uploader': 'Not Just Bikes',
6401 'uploader_id': '@NotJustBikes',
6402 'channel_is_verified': True,
6404 'playlist_mincount': 10,
6405 }, {
6406 # Streams tab
6407 'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
6408 'info_dict': {
6409 'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6410 'title': '中村悠一 - Live',
6411 'tags': 'count:7',
6412 'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
6413 'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
6414 'channel': '中村悠一',
6415 'channel_follower_count': int,
6416 'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
6417 'uploader_url': 'https://www.youtube.com/@Yuichi-Nakamura',
6418 'uploader_id': '@Yuichi-Nakamura',
6419 'uploader': '中村悠一',
6421 'playlist_mincount': 60,
6422 }, {
6423 # Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
6424 # See test_youtube_lists
6425 'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
6426 'only_matching': True,
6427 }, {
6428 # No uploads and no UCID given. Should fail with no uploads error
6429 # See test_youtube_lists
6430 'url': 'https://www.youtube.com/news',
6431 'only_matching': True,
6432 }, {
6433 # No videos tab but has a shorts tab
6434 'url': 'https://www.youtube.com/c/TKFShorts',
6435 'info_dict': {
6436 'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6437 'title': 'Shorts Break - Shorts',
6438 'tags': 'count:48',
6439 'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
6440 'channel': 'Shorts Break',
6441 'description': 'md5:6de33c5e7ba686e5f3efd4e19c7ef499',
6442 'channel_follower_count': int,
6443 'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
6444 'uploader_url': 'https://www.youtube.com/@ShortsBreak_Official',
6445 'uploader': 'Shorts Break',
6446 'uploader_id': '@ShortsBreak_Official',
6448 'playlist_mincount': 30,
6449 }, {
6450 # Trending Now Tab. tab id is empty
6451 'url': 'https://www.youtube.com/feed/trending',
6452 'info_dict': {
6453 'id': 'trending',
6454 'title': 'trending - Now',
6455 'tags': [],
6457 'playlist_mincount': 30,
6458 }, {
6459 # Trending Gaming Tab. tab id is empty
6460 'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
6461 'info_dict': {
6462 'id': 'trending',
6463 'title': 'trending - Gaming',
6464 'tags': [],
6466 'playlist_mincount': 30,
6467 }, {
6468 # Shorts url result in shorts tab
6469 # TODO: Fix channel id extraction
6470 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/shorts',
6471 'info_dict': {
6472 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6473 'title': 'cole-dlp-test-acc - Shorts',
6474 'channel': 'cole-dlp-test-acc',
6475 'description': 'test description',
6476 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6477 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6478 'tags': [],
6479 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6480 'uploader_id': '@coletdjnz',
6481 'uploader': 'cole-dlp-test-acc',
6483 'playlist': [{
6484 'info_dict': {
6485 # Channel data is not currently available for short renderers (as of 2023-03-01)
6486 '_type': 'url',
6487 'ie_key': 'Youtube',
6488 'url': 'https://www.youtube.com/shorts/sSM9J5YH_60',
6489 'id': 'sSM9J5YH_60',
6490 'title': 'SHORT short',
6491 'view_count': int,
6492 'thumbnails': list,
6495 'params': {'extract_flat': True},
6496 }, {
6497 # Live video status should be extracted
6498 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live',
6499 'info_dict': {
6500 'id': 'UCQvWX73GQygcwXOTSf_VDVg',
6501 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live
6502 'tags': [],
6504 'playlist': [{
6505 'info_dict': {
6506 '_type': 'url',
6507 'ie_key': 'Youtube',
6508 'url': 'startswith:https://www.youtube.com/watch?v=',
6509 'id': str,
6510 'title': str,
6511 'live_status': 'is_live',
6512 'channel_id': str,
6513 'channel_url': str,
6514 'concurrent_view_count': int,
6515 'channel': str,
6516 'uploader': str,
6517 'uploader_url': str,
6518 'uploader_id': str,
6519 'channel_is_verified': bool, # this will keep changing
6522 'params': {'extract_flat': True, 'playlist_items': '1'},
6523 'playlist_mincount': 1,
6524 }, {
6525 # Channel renderer metadata. Contains number of videos on the channel
6526 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels',
6527 'info_dict': {
6528 'id': 'UCiu-3thuViMebBjw_5nWYrA',
6529 'title': 'cole-dlp-test-acc - Channels',
6530 'channel': 'cole-dlp-test-acc',
6531 'description': 'test description',
6532 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
6533 'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
6534 'tags': [],
6535 'uploader_url': 'https://www.youtube.com/@coletdjnz',
6536 'uploader_id': '@coletdjnz',
6537 'uploader': 'cole-dlp-test-acc',
6539 'playlist': [{
6540 'info_dict': {
6541 '_type': 'url',
6542 'ie_key': 'YoutubeTab',
6543 'url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6544 'id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6545 'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6546 'title': 'PewDiePie',
6547 'channel': 'PewDiePie',
6548 'channel_url': 'https://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
6549 'thumbnails': list,
6550 'channel_follower_count': int,
6551 'playlist_count': int,
6552 'uploader': 'PewDiePie',
6553 'uploader_url': 'https://www.youtube.com/@PewDiePie',
6554 'uploader_id': '@PewDiePie',
6555 'channel_is_verified': True,
6558 'params': {'extract_flat': True},
6559 }, {
6560 'url': 'https://www.youtube.com/@3blue1brown/about',
6561 'info_dict': {
6562 'id': '@3blue1brown',
6563 'tags': ['Mathematics'],
6564 'title': '3Blue1Brown',
6565 'channel_follower_count': int,
6566 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw',
6567 'channel': '3Blue1Brown',
6568 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw',
6569 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9',
6570 'uploader_url': 'https://www.youtube.com/@3blue1brown',
6571 'uploader_id': '@3blue1brown',
6572 'uploader': '3Blue1Brown',
6573 'channel_is_verified': True,
6575 'playlist_count': 0,
6576 }, {
6577 # Podcasts tab, with rich entry playlistRenderers
6578 'url': 'https://www.youtube.com/@99percentinvisiblepodcast/podcasts',
6579 'info_dict': {
6580 'id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6581 'channel_id': 'UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6582 'uploader_url': 'https://www.youtube.com/@99percentinvisiblepodcast',
6583 'description': 'md5:3a0ed38f1ad42a68ef0428c04a15695c',
6584 'title': '99 Percent Invisible - Podcasts',
6585 'uploader': '99 Percent Invisible',
6586 'channel_follower_count': int,
6587 'channel_url': 'https://www.youtube.com/channel/UCVMF2HD4ZgC0QHpU9Yq5Xrw',
6588 'tags': [],
6589 'channel': '99 Percent Invisible',
6590 'uploader_id': '@99percentinvisiblepodcast',
6592 'playlist_count': 0,
6593 }, {
6594 # Releases tab, with rich entry playlistRenderers (same as Podcasts tab)
6595 'url': 'https://www.youtube.com/@AHimitsu/releases',
6596 'info_dict': {
6597 'id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6598 'channel': 'A Himitsu',
6599 'uploader_url': 'https://www.youtube.com/@AHimitsu',
6600 'title': 'A Himitsu - Releases',
6601 'uploader_id': '@AHimitsu',
6602 'uploader': 'A Himitsu',
6603 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A',
6604 'tags': 'count:12',
6605 'description': 'I make music',
6606 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
6607 'channel_follower_count': int,
6608 'channel_is_verified': True,
6610 'playlist_mincount': 10,
6611 }, {
6612 # Playlist with only shorts, shown as reel renderers
6613 # FIXME: future: YouTube currently doesn't give continuation for this,
6614 # may do in future.
6615 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
6616 'info_dict': {
6617 'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
6618 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
6619 'view_count': int,
6620 'uploader_id': '@BangyShorts',
6621 'description': '',
6622 'uploader_url': 'https://www.youtube.com/@BangyShorts',
6623 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
6624 'channel': 'Bangy Shorts',
6625 'uploader': 'Bangy Shorts',
6626 'tags': [],
6627 'availability': 'public',
6628 'modified_date': r're:\d{8}',
6629 'title': 'Uploads from Bangy Shorts',
6631 'playlist_mincount': 100,
6632 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6633 }, {
6634 'note': 'Tags containing spaces',
6635 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6636 'playlist_count': 3,
6637 'info_dict': {
6638 'id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6639 'channel': 'Markiplier',
6640 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ',
6641 'title': 'Markiplier',
6642 'channel_follower_count': int,
6643 'description': 'md5:0c010910558658824402809750dc5d97',
6644 'uploader_id': '@markiplier',
6645 'uploader_url': 'https://www.youtube.com/@markiplier',
6646 'uploader': 'Markiplier',
6647 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ',
6648 'channel_is_verified': True,
6649 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments',
6650 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious',
6651 'challenges', 'sketches', 'scary games', 'funny games', 'rage games',
6652 'mark fischbach'],
6656 @classmethod
6657 def suitable(cls, url):
6658 return False if YoutubeIE.suitable(url) else super().suitable(url)
6660 _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
6662 def _get_url_mobj(self, url):
6663 mobj = self._URL_RE.match(url).groupdict()
6664 mobj.update((k, '') for k, v in mobj.items() if v is None)
6665 return mobj
6667 def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
6668 tab_name = (tab.get('title') or '').lower()
6669 tab_url = urljoin(base_url, traverse_obj(
6670 tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
6672 tab_id = (tab_url and self._get_url_mobj(tab_url)['tab'][1:]
6673 or traverse_obj(tab, 'tabIdentifier', expected_type=str))
6674 if tab_id:
6675 return {
6676 'TAB_ID_SPONSORSHIPS': 'membership',
6677 }.get(tab_id, tab_id), tab_name
6679 # Fallback to tab name if we cannot get the tab id.
6680 # XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
6681 # Note that in the case of translated tab name this may result in an empty string, which we don't want.
6682 if tab_name:
6683 self.write_debug(f'Falling back to selected tab name: {tab_name}')
6684 return {
6685 'home': 'featured',
6686 'live': 'streams',
6687 }.get(tab_name, tab_name), tab_name
6689 def _has_tab(self, tabs, tab_id):
6690 return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
6692 def _empty_playlist(self, item_id, data):
6693 return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data))
6695 @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
6696 def _real_extract(self, url, smuggled_data):
6697 item_id = self._match_id(url)
6698 url = urllib.parse.urlunparse(
6699 urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
6700 compat_opts = self.get_param('compat_opts', [])
6702 mobj = self._get_url_mobj(url)
6703 pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
6704 if is_channel and smuggled_data.get('is_music_url'):
6705 if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
6706 return self.url_result(
6707 f'https://music.youtube.com/playlist?list={item_id[2:]}', YoutubeTabIE, item_id[2:])
6708 elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
6709 mdata = self._extract_tab_endpoint(
6710 f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music')
6711 murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'),
6712 get_all=False, expected_type=str)
6713 if not murl:
6714 raise ExtractorError('Failed to resolve album to playlist')
6715 return self.url_result(murl, YoutubeTabIE)
6716 elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
6717 return self.url_result(
6718 f'https://music.youtube.com/channel/{item_id}{tab}{post}', YoutubeTabIE, item_id)
6720 original_tab_id, display_id = tab[1:], f'{item_id}{tab}'
6721 if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
6722 url = f'{pre}/videos{post}'
6723 if smuggled_data.get('is_music_url'):
6724 self.report_warning(f'YouTube Music is not directly supported. Redirecting to {url}')
6726 # Handle both video/playlist URLs
6727 qs = parse_qs(url)
6728 video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list'))
6729 if not video_id and mobj['not_channel'].startswith('watch'):
6730 if not playlist_id:
6731 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
6732 raise ExtractorError('A video URL was given without video ID', expected=True)
6733 # Common mistake: https://www.youtube.com/watch?list=playlist_id
6734 self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
6735 return self.url_result(
6736 f'https://www.youtube.com/playlist?list={playlist_id}', YoutubeTabIE, playlist_id)
6738 if not self._yes_playlist(playlist_id, video_id):
6739 return self.url_result(
6740 f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6742 data, ytcfg = self._extract_data(url, display_id)
6744 # YouTube may provide a non-standard redirect to the regional channel
6745 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
6746 # https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
6747 redirect_url = traverse_obj(
6748 data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
6749 if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
6750 redirect_url = ''.join((urljoin('https://www.youtube.com', redirect_url), tab, post))
6751 self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
6752 return self.url_result(redirect_url, YoutubeTabIE)
6754 tabs, extra_tabs = self._extract_tab_renderers(data), []
6755 if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
6756 selected_tab = self._extract_selected_tab(tabs)
6757 selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
6758 self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
6760 # /about is no longer a tab
6761 if original_tab_id == 'about':
6762 return self._empty_playlist(item_id, data)
6764 if not original_tab_id and selected_tab_name:
6765 self.to_screen('Downloading all uploads of the channel. '
6766 'To download only the videos in a specific tab, pass the tab\'s URL')
6767 if self._has_tab(tabs, 'streams'):
6768 extra_tabs.append(''.join((pre, '/streams', post)))
6769 if self._has_tab(tabs, 'shorts'):
6770 extra_tabs.append(''.join((pre, '/shorts', post)))
6771 # XXX: Members-only tab should also be extracted
6773 if not extra_tabs and selected_tab_id != 'videos':
6774 # Channel does not have streams, shorts or videos tabs
6775 if item_id[:2] != 'UC':
6776 return self._empty_playlist(item_id, data)
6778 # Topic channels don't have /videos. Use the equivalent playlist instead
6779 pl_id = f'UU{item_id[2:]}'
6780 pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
6781 try:
6782 data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
6783 except ExtractorError:
6784 return self._empty_playlist(item_id, data)
6785 else:
6786 item_id, url = pl_id, pl_url
6787 self.to_screen(
6788 f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
6790 elif extra_tabs and selected_tab_id != 'videos':
6791 # When there are shorts/live tabs but not videos tab
6792 url, data = f'{pre}{post}', None
6794 elif (original_tab_id or 'videos') != selected_tab_id:
6795 if original_tab_id == 'live':
6796 # Live tab should have redirected to the video
6797 # Except in the case the channel has an actual live tab
6798 # Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
6799 raise UserNotLive(video_id=item_id)
6800 elif selected_tab_name:
6801 raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
6803 # For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
6804 url = f'{pre}{post}'
6806 # YouTube sometimes provides a button to reload playlist with unavailable videos.
6807 if 'no-youtube-unavailable-videos' not in compat_opts:
6808 data = self._reload_with_unavailable_videos(display_id, data, ytcfg) or data
6809 self._extract_and_report_alerts(data, only_once=True)
6811 tabs, entries = self._extract_tab_renderers(data), []
6812 if tabs:
6813 entries = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
6814 entries[0].update({
6815 'extractor_key': YoutubeTabIE.ie_key(),
6816 'extractor': YoutubeTabIE.IE_NAME,
6817 'webpage_url': url,
6819 if self.get_param('playlist_items') == '0':
6820 entries.extend(self.url_result(u, YoutubeTabIE) for u in extra_tabs)
6821 else: # Users expect to get all `video_id`s even with `--flat-playlist`. So don't return `url_result`
6822 entries.extend(map(self._real_extract, extra_tabs))
6824 if len(entries) == 1:
6825 return entries[0]
6826 elif entries:
6827 metadata = self._extract_metadata_from_tabs(item_id, data)
6828 uploads_url = 'the Uploads (UU) playlist URL'
6829 if try_get(metadata, lambda x: x['channel_id'].startswith('UC')):
6830 uploads_url = f'https://www.youtube.com/playlist?list=UU{metadata["channel_id"][2:]}'
6831 self.to_screen(
6832 'Downloading as multiple playlists, separated by tabs. '
6833 f'To download as a single playlist instead, pass {uploads_url}')
6834 return self.playlist_result(entries, item_id, **metadata)
6836 # Inline playlist
6837 playlist = traverse_obj(
6838 data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
6839 if playlist:
6840 return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
6842 video_id = traverse_obj(
6843 data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id
6844 if video_id:
6845 if tab != '/live': # live tab is expected to redirect to video
6846 self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
6847 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
6849 raise ExtractorError('Unable to recognize tab page')
6852 class YoutubePlaylistIE(InfoExtractor):
6853 IE_DESC = 'YouTube playlists'
6854 _VALID_URL = r'''(?x)(?:
6855 (?:https?://)?
6856 (?:\w+\.)?
6859 youtube(?:kids)?\.com|
6860 {invidious}
6862 /.*?\?.*?\blist=
6864 (?P<id>{playlist_id})
6865 )'''.format(
6866 playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
6867 invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
6869 IE_NAME = 'youtube:playlist'
6870 _TESTS = [{
6871 'note': 'issue #673',
6872 'url': 'PLBB231211A4F62143',
6873 'info_dict': {
6874 'title': '[OLD]Team Fortress 2 (Class-based LP)',
6875 'id': 'PLBB231211A4F62143',
6876 'uploader': 'Wickman',
6877 'uploader_id': '@WickmanVT',
6878 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
6879 'view_count': int,
6880 'uploader_url': 'https://www.youtube.com/@WickmanVT',
6881 'modified_date': r're:\d{8}',
6882 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
6883 'channel': 'Wickman',
6884 'tags': [],
6885 'channel_url': 'https://www.youtube.com/channel/UCKSpbfbl5kRQpTdL7kMc-1Q',
6886 'availability': 'public',
6888 'playlist_mincount': 29,
6889 }, {
6890 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6891 'info_dict': {
6892 'title': 'YDL_safe_search',
6893 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
6895 'playlist_count': 2,
6896 'skip': 'This playlist is private',
6897 }, {
6898 'note': 'embedded',
6899 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6900 'playlist_count': 4,
6901 'info_dict': {
6902 'title': 'JODA15',
6903 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
6904 'uploader': 'milan',
6905 'uploader_id': '@milan5503',
6906 'description': '',
6907 'channel_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
6908 'tags': [],
6909 'modified_date': '20140919',
6910 'view_count': int,
6911 'channel': 'milan',
6912 'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
6913 'uploader_url': 'https://www.youtube.com/@milan5503',
6914 'availability': 'public',
6916 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'],
6917 }, {
6918 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6919 'playlist_mincount': 455,
6920 'info_dict': {
6921 'title': '2018 Chinese New Singles (11/6 updated)',
6922 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
6923 'uploader': 'LBK',
6924 'uploader_id': '@music_king',
6925 'description': 'md5:da521864744d60a198e3a88af4db0d9d',
6926 'channel': 'LBK',
6927 'view_count': int,
6928 'channel_url': 'https://www.youtube.com/channel/UC21nz3_MesPLqtDqwdvnoxA',
6929 'tags': [],
6930 'uploader_url': 'https://www.youtube.com/@music_king',
6931 'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
6932 'modified_date': r're:\d{8}',
6933 'availability': 'public',
6935 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
6936 }, {
6937 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
6938 'only_matching': True,
6939 }, {
6940 # music album playlist
6941 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
6942 'only_matching': True,
6945 @classmethod
6946 def suitable(cls, url):
6947 if YoutubeTabIE.suitable(url):
6948 return False
6949 from ..utils import parse_qs
6950 qs = parse_qs(url)
6951 if qs.get('v', [None])[0]:
6952 return False
6953 return super().suitable(url)
6955 def _real_extract(self, url):
6956 playlist_id = self._match_id(url)
6957 is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
6958 url = update_url_query(
6959 'https://www.youtube.com/playlist',
6960 parse_qs(url) or {'list': playlist_id})
6961 if is_music_url:
6962 url = smuggle_url(url, {'is_music_url': True})
6963 return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
6966 class YoutubeYtBeIE(InfoExtractor):
6967 IE_DESC = 'youtu.be'
6968 _VALID_URL = rf'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P<playlist_id>{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})'
6969 _TESTS = [{
6970 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
6971 'info_dict': {
6972 'id': 'yeWKywCrFtk',
6973 'ext': 'mp4',
6974 'title': 'Small Scale Baler and Braiding Rugs',
6975 'uploader': 'Backus-Page House Museum',
6976 'uploader_id': '@backuspagemuseum',
6977 'uploader_url': r're:https?://(?:www\.)?youtube\.com/@backuspagemuseum',
6978 'upload_date': '20161008',
6979 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
6980 'categories': ['Nonprofits & Activism'],
6981 'tags': list,
6982 'like_count': int,
6983 'age_limit': 0,
6984 'playable_in_embed': True,
6985 'thumbnail': r're:^https?://.*\.webp',
6986 'channel': 'Backus-Page House Museum',
6987 'channel_id': 'UCEfMCQ9bs3tjvjy1s451zaw',
6988 'live_status': 'not_live',
6989 'view_count': int,
6990 'channel_url': 'https://www.youtube.com/channel/UCEfMCQ9bs3tjvjy1s451zaw',
6991 'availability': 'public',
6992 'duration': 59,
6993 'comment_count': int,
6994 'channel_follower_count': int,
6996 'params': {
6997 'noplaylist': True,
6998 'skip_download': True,
7000 }, {
7001 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
7002 'only_matching': True,
7005 def _real_extract(self, url):
7006 mobj = self._match_valid_url(url)
7007 video_id = mobj.group('id')
7008 playlist_id = mobj.group('playlist_id')
7009 return self.url_result(
7010 update_url_query('https://www.youtube.com/watch', {
7011 'v': video_id,
7012 'list': playlist_id,
7013 'feature': 'youtu.be',
7014 }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
7017 class YoutubeLivestreamEmbedIE(InfoExtractor):
7018 IE_DESC = 'YouTube livestream embeds'
7019 _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P<id>[^&#]+)'
7020 _TESTS = [{
7021 'url': 'https://www.youtube.com/embed/live_stream?channel=UC2_KI6RB__jGdlnK6dvFEZA',
7022 'only_matching': True,
7025 def _real_extract(self, url):
7026 channel_id = self._match_id(url)
7027 return self.url_result(
7028 f'https://www.youtube.com/channel/{channel_id}/live',
7029 ie=YoutubeTabIE.ie_key(), video_id=channel_id)
7032 class YoutubeYtUserIE(InfoExtractor):
7033 IE_DESC = 'YouTube user videos; "ytuser:" prefix'
7034 IE_NAME = 'youtube:user'
7035 _VALID_URL = r'ytuser:(?P<id>.+)'
7036 _TESTS = [{
7037 'url': 'ytuser:phihag',
7038 'only_matching': True,
7041 def _real_extract(self, url):
7042 user_id = self._match_id(url)
7043 return self.url_result(f'https://www.youtube.com/user/{user_id}', YoutubeTabIE, user_id)
7046 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
7047 IE_NAME = 'youtube:favorites'
7048 IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
7049 _VALID_URL = r':ytfav(?:ou?rite)?s?'
7050 _LOGIN_REQUIRED = True
7051 _TESTS = [{
7052 'url': ':ytfav',
7053 'only_matching': True,
7054 }, {
7055 'url': ':ytfavorites',
7056 'only_matching': True,
7059 def _real_extract(self, url):
7060 return self.url_result(
7061 'https://www.youtube.com/playlist?list=LL',
7062 ie=YoutubeTabIE.ie_key())
7065 class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
7066 IE_NAME = 'youtube:notif'
7067 IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
7068 _VALID_URL = r':ytnotif(?:ication)?s?'
7069 _LOGIN_REQUIRED = True
7070 _TESTS = [{
7071 'url': ':ytnotif',
7072 'only_matching': True,
7073 }, {
7074 'url': ':ytnotifications',
7075 'only_matching': True,
7078 def _extract_notification_menu(self, response, continuation_list):
7079 notification_list = traverse_obj(
7080 response,
7081 ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
7082 ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
7083 expected_type=list) or []
7084 continuation_list[0] = None
7085 for item in notification_list:
7086 entry = self._extract_notification_renderer(item.get('notificationRenderer'))
7087 if entry:
7088 yield entry
7089 continuation = item.get('continuationItemRenderer')
7090 if continuation:
7091 continuation_list[0] = continuation
7093 def _extract_notification_renderer(self, notification):
7094 video_id = traverse_obj(
7095 notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
7096 url = f'https://www.youtube.com/watch?v={video_id}'
7097 channel_id = None
7098 if not video_id:
7099 browse_ep = traverse_obj(
7100 notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
7101 channel_id = self.ucid_or_none(traverse_obj(browse_ep, 'browseId', expected_type=str))
7102 post_id = self._search_regex(
7103 r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
7104 'post id', default=None)
7105 if not channel_id or not post_id:
7106 return
7107 # The direct /post url redirects to this in the browser
7108 url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
7110 channel = traverse_obj(
7111 notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
7112 expected_type=str)
7113 notification_title = self._get_text(notification, 'shortMessage')
7114 if notification_title:
7115 notification_title = notification_title.replace('\xad', '') # remove soft hyphens
7116 # TODO: handle recommended videos
7117 title = self._search_regex(
7118 rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
7119 'video title', default=None)
7120 timestamp = (self._parse_time_text(self._get_text(notification, 'sentTimeText'))
7121 if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
7122 else None)
7123 return {
7124 '_type': 'url',
7125 'url': url,
7126 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
7127 'video_id': video_id,
7128 'title': title,
7129 'channel_id': channel_id,
7130 'channel': channel,
7131 'uploader': channel,
7132 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
7133 'timestamp': timestamp,
7136 def _notification_menu_entries(self, ytcfg):
7137 continuation_list = [None]
7138 response = None
7139 for page in itertools.count(1):
7140 ctoken = traverse_obj(
7141 continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
7142 response = self._extract_response(
7143 item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
7144 ep='notification/get_notification_menu', check_get_keys='actions',
7145 headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
7146 yield from self._extract_notification_menu(response, continuation_list)
7147 if not continuation_list[0]:
7148 break
7150 def _real_extract(self, url):
7151 display_id = 'notifications'
7152 ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
7153 self._report_playlist_authcheck(ytcfg)
7154 return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
7157 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7158 IE_DESC = 'YouTube search'
7159 IE_NAME = 'youtube:search'
7160 _SEARCH_KEY = 'ytsearch'
7161 _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
7162 _TESTS = [{
7163 'url': 'ytsearch5:youtube-dl test video',
7164 'playlist_count': 5,
7165 'info_dict': {
7166 'id': 'youtube-dl test video',
7167 'title': 'youtube-dl test video',
7169 }, {
7170 'note': 'Suicide/self-harm search warning',
7171 'url': 'ytsearch1:i hate myself and i wanna die',
7172 'playlist_count': 1,
7173 'info_dict': {
7174 'id': 'i hate myself and i wanna die',
7175 'title': 'i hate myself and i wanna die',
7180 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
7181 IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
7182 _SEARCH_KEY = 'ytsearchdate'
7183 IE_DESC = 'YouTube search, newest videos first'
7184 _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
7185 _TESTS = [{
7186 'url': 'ytsearchdate5:youtube-dl test video',
7187 'playlist_count': 5,
7188 'info_dict': {
7189 'id': 'youtube-dl test video',
7190 'title': 'youtube-dl test video',
7195 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
7196 IE_DESC = 'YouTube search URLs with sorting and filter support'
7197 IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
7198 _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7199 _TESTS = [{
7200 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
7201 'playlist_mincount': 5,
7202 'info_dict': {
7203 'id': 'youtube-dl test video',
7204 'title': 'youtube-dl test video',
7206 }, {
7207 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
7208 'playlist_mincount': 5,
7209 'info_dict': {
7210 'id': 'python',
7211 'title': 'python',
7213 }, {
7214 'url': 'https://www.youtube.com/results?search_query=%23cats',
7215 'playlist_mincount': 1,
7216 'info_dict': {
7217 'id': '#cats',
7218 'title': '#cats',
7219 # The test suite does not have support for nested playlists
7220 # 'entries': [{
7221 # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
7222 # 'title': '#cats',
7223 # }],
7225 }, {
7226 # Channel results
7227 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
7228 'info_dict': {
7229 'id': 'kurzgesagt',
7230 'title': 'kurzgesagt',
7232 'playlist': [{
7233 'info_dict': {
7234 '_type': 'url',
7235 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7236 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7237 'ie_key': 'YoutubeTab',
7238 'channel': 'Kurzgesagt – In a Nutshell',
7239 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
7240 'title': 'Kurzgesagt – In a Nutshell',
7241 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
7242 # No longer available for search as it is set to the handle.
7243 # 'playlist_count': int,
7244 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
7245 'thumbnails': list,
7246 'uploader_id': '@kurzgesagt',
7247 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
7248 'uploader': 'Kurzgesagt – In a Nutshell',
7249 'channel_is_verified': True,
7250 'channel_follower_count': int,
7253 'params': {'extract_flat': True, 'playlist_items': '1'},
7254 'playlist_mincount': 1,
7255 }, {
7256 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
7257 'only_matching': True,
7260 def _real_extract(self, url):
7261 qs = parse_qs(url)
7262 query = (qs.get('search_query') or qs.get('q'))[0]
7263 return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
7266 class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
7267 IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
7268 IE_NAME = 'youtube:music:search_url'
7269 _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
7270 _TESTS = [{
7271 'url': 'https://music.youtube.com/search?q=royalty+free+music',
7272 'playlist_count': 16,
7273 'info_dict': {
7274 'id': 'royalty free music',
7275 'title': 'royalty free music',
7277 }, {
7278 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
7279 'playlist_mincount': 30,
7280 'info_dict': {
7281 'id': 'royalty free music - songs',
7282 'title': 'royalty free music - songs',
7284 'params': {'extract_flat': 'in_playlist'},
7285 }, {
7286 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
7287 'playlist_mincount': 30,
7288 'info_dict': {
7289 'id': 'royalty free music - community playlists',
7290 'title': 'royalty free music - community playlists',
7292 'params': {'extract_flat': 'in_playlist'},
7295 _SECTIONS = {
7296 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
7297 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
7298 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
7299 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
7300 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
7301 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
7304 def _real_extract(self, url):
7305 qs = parse_qs(url)
7306 query = (qs.get('search_query') or qs.get('q'))[0]
7307 params = qs.get('sp', (None,))[0]
7308 if params:
7309 section = next((k for k, v in self._SECTIONS.items() if v == params), params)
7310 else:
7311 section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
7312 params = self._SECTIONS.get(section)
7313 if not params:
7314 section = None
7315 title = join_nonempty(query, section, delim=' - ')
7316 return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
7319 class YoutubeFeedsInfoExtractor(InfoExtractor):
7321 Base class for feed extractors
7322 Subclasses must re-define the _FEED_NAME property.
7324 _LOGIN_REQUIRED = True
7325 _FEED_NAME = 'feeds'
7327 def _real_initialize(self):
7328 YoutubeBaseInfoExtractor._check_login_required(self)
7330 @classproperty
7331 def IE_NAME(cls):
7332 return f'youtube:{cls._FEED_NAME}'
7334 def _real_extract(self, url):
7335 return self.url_result(
7336 f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
7339 class YoutubeWatchLaterIE(InfoExtractor):
7340 IE_NAME = 'youtube:watchlater'
7341 IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
7342 _VALID_URL = r':ytwatchlater'
7343 _TESTS = [{
7344 'url': ':ytwatchlater',
7345 'only_matching': True,
7348 def _real_extract(self, url):
7349 return self.url_result(
7350 'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
7353 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
7354 IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
7355 _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
7356 _FEED_NAME = 'recommended'
7357 _LOGIN_REQUIRED = False
7358 _TESTS = [{
7359 'url': ':ytrec',
7360 'only_matching': True,
7361 }, {
7362 'url': ':ytrecommended',
7363 'only_matching': True,
7364 }, {
7365 'url': 'https://youtube.com',
7366 'only_matching': True,
7370 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
7371 IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
7372 _VALID_URL = r':ytsub(?:scription)?s?'
7373 _FEED_NAME = 'subscriptions'
7374 _TESTS = [{
7375 'url': ':ytsubs',
7376 'only_matching': True,
7377 }, {
7378 'url': ':ytsubscriptions',
7379 'only_matching': True,
7383 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
7384 IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
7385 _VALID_URL = r':ythis(?:tory)?'
7386 _FEED_NAME = 'history'
7387 _TESTS = [{
7388 'url': ':ythistory',
7389 'only_matching': True,
7393 class YoutubeShortsAudioPivotIE(InfoExtractor):
7394 IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
7395 IE_NAME = 'youtube:shorts:pivot:audio'
7396 _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
7397 _TESTS = [{
7398 'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
7399 'only_matching': True,
7402 @staticmethod
7403 def _generate_audio_pivot_params(video_id):
7405 Generates sfv_audio_pivot browse params for this video id
7407 pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
7408 return urllib.parse.quote(base64.b64encode(pb_params).decode())
7410 def _real_extract(self, url):
7411 video_id = self._match_id(url)
7412 return self.url_result(
7413 f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
7414 ie=YoutubeTabIE)
7417 class YoutubeTruncatedURLIE(InfoExtractor):
7418 IE_NAME = 'youtube:truncated_url'
7419 IE_DESC = False # Do not list
7420 _VALID_URL = r'''(?x)
7421 (?:https?://)?
7422 (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
7423 (?:watch\?(?:
7424 feature=[a-z_]+|
7425 annotation_id=annotation_[^&]+|
7426 x-yt-cl=[0-9]+|
7427 hl=[^&]*|
7428 t=[0-9]+
7431 attribution_link\?a=[^&]+
7436 _TESTS = [{
7437 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
7438 'only_matching': True,
7439 }, {
7440 'url': 'https://www.youtube.com/watch?',
7441 'only_matching': True,
7442 }, {
7443 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
7444 'only_matching': True,
7445 }, {
7446 'url': 'https://www.youtube.com/watch?feature=foo',
7447 'only_matching': True,
7448 }, {
7449 'url': 'https://www.youtube.com/watch?hl=en-GB',
7450 'only_matching': True,
7451 }, {
7452 'url': 'https://www.youtube.com/watch?t=2372',
7453 'only_matching': True,
7456 def _real_extract(self, url):
7457 raise ExtractorError(
7458 'Did you forget to quote the URL? Remember that & is a meta '
7459 'character in most shells, so you want to put the URL in quotes, '
7460 'like youtube-dl '
7461 '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
7462 ' or simply youtube-dl BaW_jenozKc .',
7463 expected=True)
7466 class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
7467 IE_NAME = 'youtube:clip'
7468 _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/(?P<id>[^/?#]+)'
7469 _TESTS = [{
7470 # FIXME: Other metadata should be extracted from the clip, not from the base video
7471 'url': 'https://www.youtube.com/clip/UgytZKpehg-hEMBSn3F4AaABCQ',
7472 'info_dict': {
7473 'id': 'UgytZKpehg-hEMBSn3F4AaABCQ',
7474 'ext': 'mp4',
7475 'section_start': 29.0,
7476 'section_end': 39.7,
7477 'duration': 10.7,
7478 'age_limit': 0,
7479 'availability': 'public',
7480 'categories': ['Gaming'],
7481 'channel': 'Scott The Woz',
7482 'channel_id': 'UC4rqhyiTs7XyuODcECvuiiQ',
7483 'channel_url': 'https://www.youtube.com/channel/UC4rqhyiTs7XyuODcECvuiiQ',
7484 'description': 'md5:7a4517a17ea9b4bd98996399d8bb36e7',
7485 'like_count': int,
7486 'playable_in_embed': True,
7487 'tags': 'count:17',
7488 'thumbnail': 'https://i.ytimg.com/vi_webp/ScPX26pdQik/maxresdefault.webp',
7489 'title': 'Mobile Games on Console - Scott The Woz',
7490 'upload_date': '20210920',
7491 'uploader': 'Scott The Woz',
7492 'uploader_id': '@ScottTheWoz',
7493 'uploader_url': 'https://www.youtube.com/@ScottTheWoz',
7494 'view_count': int,
7495 'live_status': 'not_live',
7496 'channel_follower_count': int,
7497 'chapters': 'count:20',
7498 'comment_count': int,
7499 'heatmap': 'count:100',
7503 def _real_extract(self, url):
7504 clip_id = self._match_id(url)
7505 _, data = self._extract_webpage(url, clip_id)
7507 video_id = traverse_obj(data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'))
7508 if not video_id:
7509 raise ExtractorError('Unable to find video ID')
7511 clip_data = traverse_obj(data, (
7512 'engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'clipSectionRenderer',
7513 'contents', ..., 'clipAttributionRenderer', 'onScrubExit', 'commandExecutorCommand', 'commands', ...,
7514 'openPopupAction', 'popup', 'notificationActionRenderer', 'actionButton', 'buttonRenderer', 'command',
7515 'commandExecutorCommand', 'commands', ..., 'loopCommand'), get_all=False)
7517 return {
7518 '_type': 'url_transparent',
7519 'url': f'https://www.youtube.com/watch?v={video_id}',
7520 'ie_key': YoutubeIE.ie_key(),
7521 'id': clip_id,
7522 'section_start': int(clip_data['startTimeMs']) / 1000,
7523 'section_end': int(clip_data['endTimeMs']) / 1000,
7527 class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
7528 IE_NAME = 'youtube:consent'
7529 IE_DESC = False # Do not list
7530 _VALID_URL = r'https?://consent\.youtube\.com/m\?'
7531 _TESTS = [{
7532 'url': 'https://consent.youtube.com/m?continue=https%3A%2F%2Fwww.youtube.com%2Flive%2FqVv6vCqciTM%3Fcbrd%3D1&gl=NL&m=0&pc=yt&hl=en&src=1',
7533 'info_dict': {
7534 'id': 'qVv6vCqciTM',
7535 'ext': 'mp4',
7536 'age_limit': 0,
7537 'uploader_id': '@sana_natori',
7538 'comment_count': int,
7539 'chapters': 'count:13',
7540 'upload_date': '20221223',
7541 'thumbnail': 'https://i.ytimg.com/vi/qVv6vCqciTM/maxresdefault.jpg',
7542 'channel_url': 'https://www.youtube.com/channel/UCIdEIHpS0TdkqRkHL5OkLtA',
7543 'uploader_url': 'https://www.youtube.com/@sana_natori',
7544 'like_count': int,
7545 'release_date': '20221223',
7546 'tags': ['Vtuber', '月ノ美兎', '名取さな', 'にじさんじ', 'クリスマス', '3D配信'],
7547 'title': '【 #インターネット女クリスマス 】3Dで歌ってはしゃぐインターネットの女たち【月ノ美兎/名取さな】',
7548 'view_count': int,
7549 'playable_in_embed': True,
7550 'duration': 4438,
7551 'availability': 'public',
7552 'channel_follower_count': int,
7553 'channel_id': 'UCIdEIHpS0TdkqRkHL5OkLtA',
7554 'categories': ['Entertainment'],
7555 'live_status': 'was_live',
7556 'release_timestamp': 1671793345,
7557 'channel': 'さなちゃんねる',
7558 'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
7559 'uploader': 'さなちゃんねる',
7560 'channel_is_verified': True,
7561 'heatmap': 'count:100',
7563 'add_ie': ['Youtube'],
7564 'params': {'skip_download': 'Youtube'},
7567 def _real_extract(self, url):
7568 redirect_url = url_or_none(parse_qs(url).get('continue', [None])[-1])
7569 if not redirect_url:
7570 raise ExtractorError('Invalid cookie consent redirect URL', expected=True)
7571 return self.url_result(redirect_url)
7574 class YoutubeTruncatedIDIE(InfoExtractor):
7575 IE_NAME = 'youtube:truncated_id'
7576 IE_DESC = False # Do not list
7577 _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
7579 _TESTS = [{
7580 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
7581 'only_matching': True,
7584 def _real_extract(self, url):
7585 video_id = self._match_id(url)
7586 raise ExtractorError(
7587 f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.',
7588 expected=True)